diff mbox

[v4,12/29] numa: add -numa node,memdev= option

Message ID a1a492e67357c8ba34555ba4e34fcaf70d0dd521.1402299637.git.hutao@cn.fujitsu.com
State New
Headers show

Commit Message

Hu Tao June 9, 2014, 10:25 a.m. UTC
From: Paolo Bonzini <pbonzini@redhat.com>

This option provides the infrastructure for binding guest NUMA nodes
to host NUMA nodes.  For example:

 -object memory-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 \
 -numa node,nodeid=0,cpus=0,memdev=ram-node0 \
 -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \
 -numa node,nodeid=1,cpus=1,memdev=ram-node1

The option replaces "-numa node,mem=".

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
---
 include/sysemu/sysemu.h |  1 +
 numa.c                  | 62 +++++++++++++++++++++++++++++++++++++++++++++++--
 qapi-schema.json        | 11 ++++++---
 qemu-options.hx         | 12 ++++++----
 4 files changed, 77 insertions(+), 9 deletions(-)

Comments

Eric Blake June 9, 2014, 5:22 p.m. UTC | #1
On 06/09/2014 04:25 AM, Hu Tao wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> This option provides the infrastructure for binding guest NUMA nodes
> to host NUMA nodes.  For example:
> 
>  -object memory-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 \
>  -numa node,nodeid=0,cpus=0,memdev=ram-node0 \
>  -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \
>  -numa node,nodeid=1,cpus=1,memdev=ram-node1
> 
> The option replaces "-numa node,mem=".
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> ---
>  include/sysemu/sysemu.h |  1 +

> +# @mem: #optional memory size of this node; mutually exclusive with @memdev.
> +#       Equally divide total memory among nodes if both @mem and @memdev are
> +#       omitted.
> +#
> +# @memdev: #optional memory backend object.  If specified for one node,
> +#          it must be specified for all nodes.
>  #
>  # Since: 2.1
>  ##
> @@ -4753,4 +4757,5 @@
>    'data': {
>     '*nodeid': 'uint16',
>     '*cpus':   ['uint16'],
> -   '*mem':    'size' }}
> +   '*mem':    'size',
> +   '*memdev': 'str' }}

This looks okay.

> diff --git a/qemu-options.hx b/qemu-options.hx
> index d3cd2ce..e448d33 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -95,16 +95,20 @@ specifies the maximum number of hotpluggable CPUs.
>  ETEXI
>  
>  DEF("numa", HAS_ARG, QEMU_OPTION_numa,
> -    "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
> +    "-numa node[,mem=size][,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)

But this implies both parameters can be used at once.  Is it worth
rewriting in two lines:

"-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n"
"-numa node[,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n"

to make the exclusion clearer?


> -to allocate RAM and vCPUs respectively.
> +to allocate RAM and vCPU srespectively, and possibly @option{-object}

s/vCPU srespectively/vCPUs respectively/
Hu Tao June 10, 2014, 2:23 a.m. UTC | #2
On Mon, Jun 09, 2014 at 11:22:05AM -0600, Eric Blake wrote:
> On 06/09/2014 04:25 AM, Hu Tao wrote:
> > From: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > This option provides the infrastructure for binding guest NUMA nodes
> > to host NUMA nodes.  For example:
> > 
> >  -object memory-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 \
> >  -numa node,nodeid=0,cpus=0,memdev=ram-node0 \
> >  -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \
> >  -numa node,nodeid=1,cpus=1,memdev=ram-node1
> > 
> > The option replaces "-numa node,mem=".
> > 
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
> > ---
> >  include/sysemu/sysemu.h |  1 +
> 
> > +# @mem: #optional memory size of this node; mutually exclusive with @memdev.
> > +#       Equally divide total memory among nodes if both @mem and @memdev are
> > +#       omitted.
> > +#
> > +# @memdev: #optional memory backend object.  If specified for one node,
> > +#          it must be specified for all nodes.
> >  #
> >  # Since: 2.1
> >  ##
> > @@ -4753,4 +4757,5 @@
> >    'data': {
> >     '*nodeid': 'uint16',
> >     '*cpus':   ['uint16'],
> > -   '*mem':    'size' }}
> > +   '*mem':    'size',
> > +   '*memdev': 'str' }}
> 
> This looks okay.
> 
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index d3cd2ce..e448d33 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -95,16 +95,20 @@ specifies the maximum number of hotpluggable CPUs.
> >  ETEXI
> >  
> >  DEF("numa", HAS_ARG, QEMU_OPTION_numa,
> > -    "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
> > +    "-numa node[,mem=size][,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
> 
> But this implies both parameters can be used at once.  Is it worth
> rewriting in two lines:
> 
> "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n"
> "-numa node[,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n"
> 
> to make the exclusion clearer?

OK.

> 
> 
> > -to allocate RAM and vCPUs respectively.
> > +to allocate RAM and vCPU srespectively, and possibly @option{-object}
> 
> s/vCPU srespectively/vCPUs respectively/

:-P

> 
> -- 
> Eric Blake   eblake redhat com    +1-919-301-3266
> Libvirt virtualization library http://libvirt.org
>
diff mbox

Patch

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index caf88dd..1e141e3 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -147,6 +147,7 @@  extern int nb_numa_nodes;
 typedef struct node_info {
     uint64_t node_mem;
     DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
+    struct HostMemoryBackend *node_memdev;
 } NodeInfo;
 extern NodeInfo numa_info[MAX_NODES];
 void set_numa_nodes(void);
diff --git a/numa.c b/numa.c
index efdebf4..ce43e69 100644
--- a/numa.c
+++ b/numa.c
@@ -33,6 +33,7 @@ 
 #include "qapi/dealloc-visitor.h"
 #include "qapi/qmp/qerror.h"
 #include "hw/boards.h"
+#include "sysemu/hostmem.h"
 
 QemuOptsList qemu_numa_opts = {
     .name = "numa",
@@ -41,6 +42,8 @@  QemuOptsList qemu_numa_opts = {
     .desc = { { 0 } } /* validated with OptsVisitor */
 };
 
+static int have_memdevs = -1;
+
 static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
 {
     uint16_t nodenr;
@@ -67,6 +70,20 @@  static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
         bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
     }
 
+    if (node->has_mem && node->has_memdev) {
+        error_setg(errp, "qemu: cannot specify both mem= and memdev=\n");
+        return;
+    }
+
+    if (have_memdevs == -1) {
+        have_memdevs = node->has_memdev;
+    }
+    if (node->has_memdev != have_memdevs) {
+        error_setg(errp, "qemu: memdev option must be specified for either "
+                   "all or no nodes\n");
+        return;
+    }
+
     if (node->has_mem) {
         uint64_t mem_size = node->mem;
         const char *mem_str = qemu_opt_get(opts, "mem");
@@ -76,6 +93,18 @@  static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
         }
         numa_info[nodenr].node_mem = mem_size;
     }
+    if (node->has_memdev) {
+        Object *o;
+        o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL);
+        if (!o) {
+            error_setg(errp, "memdev=%s is ambiguous", node->memdev);
+            return;
+        }
+
+        object_ref(o);
+        numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL);
+        numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
+    }
 }
 
 int numa_init_func(QemuOpts *opts, void *opaque)
@@ -195,10 +224,39 @@  void set_numa_modes(void)
     }
 }
 
+static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
+                                           const char *name,
+                                           uint64_t ram_size)
+{
+    memory_region_init_ram(mr, owner, name, ram_size);
+    vmstate_register_ram_global(mr);
+}
+
 void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
                                           const char *name,
                                           uint64_t ram_size)
 {
-    memory_region_init_ram(mr, owner, name, ram_size);
-    vmstate_register_ram_global(mr);
+    uint64_t addr = 0;
+    int i;
+
+    if (nb_numa_nodes == 0 || !have_memdevs) {
+        allocate_system_memory_nonnuma(mr, owner, name, ram_size);
+        return;
+    }
+
+    memory_region_init(mr, owner, name, ram_size);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        Error *local_err = NULL;
+        uint64_t size = numa_info[i].node_mem;
+        HostMemoryBackend *backend = numa_info[i].node_memdev;
+        MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err);
+        if (local_err) {
+            qerror_report_err(local_err);
+            exit(1);
+        }
+
+        memory_region_add_subregion(mr, addr, seg);
+        vmstate_register_ram_global(seg);
+        addr += size;
+    }
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index 8ce01cb..d5ab066 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4744,8 +4744,12 @@ 
 # @cpus: #optional VCPUs belonging to this node (assign VCPUS round-robin
 #         if omitted)
 #
-# @mem: #optional memory size of this node (equally divide total memory among
-#        nodes if omitted)
+# @mem: #optional memory size of this node; mutually exclusive with @memdev.
+#       Equally divide total memory among nodes if both @mem and @memdev are
+#       omitted.
+#
+# @memdev: #optional memory backend object.  If specified for one node,
+#          it must be specified for all nodes.
 #
 # Since: 2.1
 ##
@@ -4753,4 +4757,5 @@ 
   'data': {
    '*nodeid': 'uint16',
    '*cpus':   ['uint16'],
-   '*mem':    'size' }}
+   '*mem':    'size',
+   '*memdev': 'str' }}
diff --git a/qemu-options.hx b/qemu-options.hx
index d3cd2ce..e448d33 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -95,16 +95,20 @@  specifies the maximum number of hotpluggable CPUs.
 ETEXI
 
 DEF("numa", HAS_ARG, QEMU_OPTION_numa,
-    "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
+    "-numa node[,mem=size][,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
 STEXI
-@item -numa node[,mem=@var{size}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}]
+@item -numa node[,mem=@var{size}][,memdev=@var{id}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}]
 @findex -numa
-Simulate a multi node NUMA system. If @samp{mem}
+Simulate a multi node NUMA system. If @samp{mem}, @samp{memdev}
 and @samp{cpus} are omitted, resources are split equally. Also, note
 that the -@option{numa} option doesn't allocate any of the specified
 resources. That is, it just assigns existing resources to NUMA nodes. This
 means that one still has to use the @option{-m}, @option{-smp} options
-to allocate RAM and vCPUs respectively.
+to allocate RAM and vCPU srespectively, and possibly @option{-object}
+to specify the memory backend for the @samp{memdev} suboption.
+
+@samp{mem} and @samp{memdev} are mutually exclusive.  Furthermore, if one
+node uses @samp{memdev}, all of them have to use it.
 ETEXI
 
 DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,