Message ID | b87b2fb5255071e1f47c02f25949f2c73c856652.1392794450.git.hutao@cn.fujitsu.com |
---|---|
State | New |
Headers | show |
On Wed, 19 Feb 2014 15:54:01 +0800 Hu Tao <hutao@cn.fujitsu.com> wrote: > From: Paolo Bonzini <pbonzini@redhat.com> > > This option provides the infrastructure for binding guest NUMA nodes > to host NUMA nodes. For example: > > -object memory-ram,size=1024M,policy=membind,host-nodes=0,id=ram-node0 \ > -numa node,nodeid=0,cpus=0,memdev=ram-node0 \ > -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \ > -numa node,nodeid=1,cpus=1,memdev=ram-node1 > > The option replaces "-numa mem". > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > Conflicts: > include/sysemu/sysemu.h > numa.c > > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com> > --- > include/sysemu/sysemu.h | 2 ++ > numa.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++-- > qapi-schema.json | 6 ++++- > 3 files changed, 69 insertions(+), 3 deletions(-) > > diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h > index e9da760..acfc0c7 100644 > --- a/include/sysemu/sysemu.h > +++ b/include/sysemu/sysemu.h > @@ -12,6 +12,7 @@ > #include "qemu/bitmap.h" > #include "qom/object.h" > #include "hw/boards.h" > +#include "sysemu/hostmem.h" > > /* vl.c */ > > @@ -140,6 +141,7 @@ extern int nb_numa_nodes; > typedef struct node_info { > uint64_t node_mem; > DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); > + HostMemoryBackend *node_memdev; > } NodeInfo; > extern NodeInfo numa_info[MAX_NODES]; > void set_numa_nodes(void); > diff --git a/numa.c b/numa.c > index 403b08b..ca55ad7 100644 > --- a/numa.c > +++ b/numa.c > @@ -27,6 +27,8 @@ > #include "qapi-visit.h" > #include "qapi/opts-visitor.h" > #include "qapi/dealloc-visitor.h" > +#include "qapi/qmp/qerror.h" > + > QemuOptsList qemu_numa_opts = { > .name = "numa", > .implied_opt_name = "type", > @@ -34,10 +36,13 @@ QemuOptsList qemu_numa_opts = { > .desc = { { 0 } } /* validated with OptsVisitor */ > }; > > +static int have_memdevs = -1; > + > static int numa_node_parse(NumaNodeOptions *opts) > { > uint16_t nodenr; > uint16List *cpus = NULL; > + Error *local_err = NULL; > > if (opts->has_nodeid) { > nodenr = opts->nodeid; > @@ -60,6 +65,19 @@ static int numa_node_parse(NumaNodeOptions *opts) > bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); > } > > + if (opts->has_mem && opts->has_memdev) { > + fprintf(stderr, "qemu: cannot specify both mem= and memdev=\n"); > + return -1; > + } > + > + if (have_memdevs == -1) { > + have_memdevs = opts->has_memdev; > + } > + if (opts->has_memdev != have_memdevs) { > + fprintf(stderr, "qemu: memdev option must be specified for either " > + "all or no nodes\n"); > + } > + > if (opts->has_mem) { > int64_t mem_size; > char *endptr; > @@ -70,7 +88,19 @@ static int numa_node_parse(NumaNodeOptions *opts) > } > numa_info[nodenr].node_mem = mem_size; > } > + if (opts->has_memdev) { > + Object *o; > + o = object_resolve_path_type(opts->memdev, TYPE_MEMORY_BACKEND, NULL); > + if (!o) { > + error_setg(&local_err, "memdev=%s is ambiguous", opts->memdev); > + qerror_report_err(local_err); > + return -1; > + } > > + object_ref(o); > + numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL); > + numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); if you make numa_info QOM object node_memdev link<> property, then above hunk could be replaced with just setting link. And node_mem could be replaced with readonly property that reads size directly from memdev avoiding data duplication. As side-effect it numa_info will also become accessible for introspection using QOM interface. Something like: qom-list /machine/memory-node[X] qom-get /machine/memory-node[X]/memory_size > + } > return 0; > } > > @@ -189,12 +219,42 @@ void set_numa_modes(void) > } > } > > +static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, > + const char *name, > + QEMUMachineInitArgs *args) > +{ > + uint64_t ram_size = args->ram_size; > + > + memory_region_init_ram(mr, owner, name, ram_size); > + vmstate_register_ram_global(mr); > +} > + > void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, > const char *name, > QEMUMachineInitArgs *args) > { > uint64_t ram_size = args->ram_size; > + uint64_t addr = 0; > + int i; > > - memory_region_init_ram(mr, owner, name, ram_size); > - vmstate_register_ram_global(mr); > + if (nb_numa_nodes == 0 || !have_memdevs) { > + allocate_system_memory_nonnuma(mr, owner, name, args); > + return; > + } > + > + memory_region_init(mr, owner, name, ram_size); > + for (i = 0; i < nb_numa_nodes; i++) { > + Error *local_err = NULL; > + uint64_t size = numa_info[i].node_mem; > + HostMemoryBackend *backend = numa_info[i].node_memdev; > + MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err); > + if (local_err) { > + qerror_report_err(local_err); > + exit(1); > + } > + > + memory_region_add_subregion(mr, addr, seg); > + vmstate_register_ram_global(seg); > + addr += size; > + } > } > diff --git a/qapi-schema.json b/qapi-schema.json > index a2839b8..498ea9b 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -4441,7 +4441,10 @@ > # > # @cpus: #optional VCPUs belong to this node > # > -# @mem: #optional memory size of this node > +# @memdev: #optional memory backend object. If specified for one node, > +# it must be specified for all nodes. > +# > +# @mem: #optional memory size of this node; mutually exclusive with @memdev. > # > # Since: 2.0 > ## > @@ -4449,4 +4452,5 @@ > 'data': { > '*nodeid': 'uint16', > '*cpus': ['uint16'], > + '*memdev': 'str', > '*mem': 'str' }}
Il 19/02/2014 10:50, Igor Mammedov ha scritto: >> > + numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL); >> > + numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); > if you make numa_info QOM object node_memdev link<> property, > then above hunk could be replaced with just setting link. > And node_mem could be replaced with readonly property that reads size > directly from memdev avoiding data duplication. > > As side-effect it numa_info will also become accessible for introspection > using QOM interface. Something like: > qom-list /machine/memory-node[X] > qom-get /machine/memory-node[X]/memory_size I agree, but I think we can do it on top. Paolo
On 02/19/2014 12:54 AM, Hu Tao wrote: > From: Paolo Bonzini <pbonzini@redhat.com> > > This option provides the infrastructure for binding guest NUMA nodes > to host NUMA nodes. For example: > > -object memory-ram,size=1024M,policy=membind,host-nodes=0,id=ram-node0 \ > -numa node,nodeid=0,cpus=0,memdev=ram-node0 \ > -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \ > -numa node,nodeid=1,cpus=1,memdev=ram-node1 > > The option replaces "-numa mem". > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > Conflicts: > include/sysemu/sysemu.h > numa.c Until this patch is taken upstream, a 'Conflicts:' section in your commit message doesn't make sense. That is useful for downstream or stable branch backports, but doesn't belong on the mainline branch. > > Signed-off-by: Hu Tao <hutao@cn.fujitsu.com> > --- > include/sysemu/sysemu.h | 2 ++ > numa.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++-- > qapi-schema.json | 6 ++++- > 3 files changed, 69 insertions(+), 3 deletions(-) >
On Mon, Mar 03, 2014 at 05:10:20PM -0700, Eric Blake wrote: > On 02/19/2014 12:54 AM, Hu Tao wrote: > > From: Paolo Bonzini <pbonzini@redhat.com> > > > > This option provides the infrastructure for binding guest NUMA nodes > > to host NUMA nodes. For example: > > > > -object memory-ram,size=1024M,policy=membind,host-nodes=0,id=ram-node0 \ > > -numa node,nodeid=0,cpus=0,memdev=ram-node0 \ > > -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \ > > -numa node,nodeid=1,cpus=1,memdev=ram-node1 > > > > The option replaces "-numa mem". > > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > > > Conflicts: > > include/sysemu/sysemu.h > > numa.c > > Until this patch is taken upstream, a 'Conflicts:' section in your > commit message doesn't make sense. That is useful for downstream or > stable branch backports, but doesn't belong on the mainline branch. I think this was introduced during rebase. Will fix. Thanks.
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index e9da760..acfc0c7 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -12,6 +12,7 @@ #include "qemu/bitmap.h" #include "qom/object.h" #include "hw/boards.h" +#include "sysemu/hostmem.h" /* vl.c */ @@ -140,6 +141,7 @@ extern int nb_numa_nodes; typedef struct node_info { uint64_t node_mem; DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); + HostMemoryBackend *node_memdev; } NodeInfo; extern NodeInfo numa_info[MAX_NODES]; void set_numa_nodes(void); diff --git a/numa.c b/numa.c index 403b08b..ca55ad7 100644 --- a/numa.c +++ b/numa.c @@ -27,6 +27,8 @@ #include "qapi-visit.h" #include "qapi/opts-visitor.h" #include "qapi/dealloc-visitor.h" +#include "qapi/qmp/qerror.h" + QemuOptsList qemu_numa_opts = { .name = "numa", .implied_opt_name = "type", @@ -34,10 +36,13 @@ QemuOptsList qemu_numa_opts = { .desc = { { 0 } } /* validated with OptsVisitor */ }; +static int have_memdevs = -1; + static int numa_node_parse(NumaNodeOptions *opts) { uint16_t nodenr; uint16List *cpus = NULL; + Error *local_err = NULL; if (opts->has_nodeid) { nodenr = opts->nodeid; @@ -60,6 +65,19 @@ static int numa_node_parse(NumaNodeOptions *opts) bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); } + if (opts->has_mem && opts->has_memdev) { + fprintf(stderr, "qemu: cannot specify both mem= and memdev=\n"); + return -1; + } + + if (have_memdevs == -1) { + have_memdevs = opts->has_memdev; + } + if (opts->has_memdev != have_memdevs) { + fprintf(stderr, "qemu: memdev option must be specified for either " + "all or no nodes\n"); + } + if (opts->has_mem) { int64_t mem_size; char *endptr; @@ -70,7 +88,19 @@ static int numa_node_parse(NumaNodeOptions *opts) } numa_info[nodenr].node_mem = mem_size; } + if (opts->has_memdev) { + Object *o; + o = object_resolve_path_type(opts->memdev, TYPE_MEMORY_BACKEND, NULL); + if (!o) { + error_setg(&local_err, "memdev=%s is ambiguous", opts->memdev); + qerror_report_err(local_err); + return -1; + } + object_ref(o); + numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL); + numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); + } return 0; } @@ -189,12 +219,42 @@ void set_numa_modes(void) } } +static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, + const char *name, + QEMUMachineInitArgs *args) +{ + uint64_t ram_size = args->ram_size; + + memory_region_init_ram(mr, owner, name, ram_size); + vmstate_register_ram_global(mr); +} + void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, const char *name, QEMUMachineInitArgs *args) { uint64_t ram_size = args->ram_size; + uint64_t addr = 0; + int i; - memory_region_init_ram(mr, owner, name, ram_size); - vmstate_register_ram_global(mr); + if (nb_numa_nodes == 0 || !have_memdevs) { + allocate_system_memory_nonnuma(mr, owner, name, args); + return; + } + + memory_region_init(mr, owner, name, ram_size); + for (i = 0; i < nb_numa_nodes; i++) { + Error *local_err = NULL; + uint64_t size = numa_info[i].node_mem; + HostMemoryBackend *backend = numa_info[i].node_memdev; + MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err); + if (local_err) { + qerror_report_err(local_err); + exit(1); + } + + memory_region_add_subregion(mr, addr, seg); + vmstate_register_ram_global(seg); + addr += size; + } } diff --git a/qapi-schema.json b/qapi-schema.json index a2839b8..498ea9b 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4441,7 +4441,10 @@ # # @cpus: #optional VCPUs belong to this node # -# @mem: #optional memory size of this node +# @memdev: #optional memory backend object. If specified for one node, +# it must be specified for all nodes. +# +# @mem: #optional memory size of this node; mutually exclusive with @memdev. # # Since: 2.0 ## @@ -4449,4 +4452,5 @@ 'data': { '*nodeid': 'uint16', '*cpus': ['uint16'], + '*memdev': 'str', '*mem': 'str' }}