Message ID | 417b83d0e074e2004aa35bef337d32ac2c89f559.1467904342.git.pkrempa@redhat.com |
---|---|
State | New |
Headers | show |
On Thu, Jul 07, 2016 at 05:17:14PM +0200, Peter Krempa wrote: > Add a helper that looks up the NUMA node for a given CPU and use it to > fill the node_id in the PPC and X86 impls of query-hotpluggable-cpus. > > Signed-off-by: Peter Krempa <pkrempa@redhat.com> > --- > hw/i386/pc.c | 7 +++++++ > hw/ppc/spapr.c | 8 ++++++-- > include/sysemu/numa.h | 1 + > numa.c | 13 +++++++++++++ > 4 files changed, 27 insertions(+), 2 deletions(-) The helper function should be a separate patch, and Igor beat you to it, see https://www.mail-archive.com/qemu-devel@nongnu.org/msg383461.html drew > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 4ba02c4..a0b9507 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -2115,6 +2115,7 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) > HotpluggableCPUList *head = NULL; > PCMachineState *pcms = PC_MACHINE(machine); > const char *cpu_type; > + int node_id; > > cpu = pcms->possible_cpus->cpus[0].cpu; > assert(cpu); /* BSP is always present */ > @@ -2138,6 +2139,12 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) > cpu_props->core_id = topo.core_id; > cpu_props->has_thread_id = true; > cpu_props->thread_id = topo.smt_id; > + > + if ((node_id = numa_node_get_by_cpu_index(i)) >= 0) { > + cpu_props->has_node_id = true; > + cpu_props->node_id = node_id; > + } > + > cpu_item->props = cpu_props; > > cpu = pcms->possible_cpus->cpus[i].cpu; > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index d1f5195..06ba7fc 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -2370,6 +2370,7 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) > sPAPRMachineState *spapr = SPAPR_MACHINE(machine); > int spapr_max_cores = max_cpus / smp_threads; > int smt = kvmppc_smt_threads(); > + int node_id; > > for (i = 0; i < spapr_max_cores; i++) { > HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1); > @@ -2381,8 +2382,11 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) > cpu_item->vcpu_id = i; > cpu_props->has_core_id = true; > cpu_props->core_id = i * smt; > - /* TODO: add 'has_node/node' here to describe > - to which node core belongs */ > + > + if ((node_id = numa_node_get_by_cpu_index(i)) >= 0) { > + cpu_props->has_node_id = true; > + cpu_props->node_id = node_id; > + } > > cpu_item->props = cpu_props; > if (spapr->cores[i]) { > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h > index bb184c9..04d7097 100644 > --- a/include/sysemu/numa.h > +++ b/include/sysemu/numa.h > @@ -31,5 +31,6 @@ extern QemuOptsList qemu_numa_opts; > void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > uint32_t numa_get_node(ram_addr_t addr, Error **errp); > +int numa_node_get_by_cpu_index(int cpu_index); > > #endif > diff --git a/numa.c b/numa.c > index cbae430..365738a 100644 > --- a/numa.c > +++ b/numa.c > @@ -506,6 +506,19 @@ void query_numa_node_mem(uint64_t node_mem[]) > } > } > > +int numa_node_get_by_cpu_index(int cpu_index) > +{ > + int i; > + > + for (i = 0; i < nb_numa_nodes; i++) { > + if (test_bit(cpu_index, numa_info[i].node_cpu)) { > + return i; > + } > + } > + > + return -1; > +} > + > static int query_memdev(Object *obj, void *opaque) > { > MemdevList **list = opaque; > -- > 2.9.0 > >
On Thu, 7 Jul 2016 17:17:14 +0200 Peter Krempa <pkrempa@redhat.com> wrote: > Add a helper that looks up the NUMA node for a given CPU and use it to > fill the node_id in the PPC and X86 impls of query-hotpluggable-cpus. IIUC how the query thing works this means that the node id issued by query-hotpluggable-cpus will be echoed back to device add by libvirt. I'm not sure we actually process that information in the core at present, so I don't know that that's right. We need to be clear on which direction information is flowing here. Does query-hotpluggable-cpus *define* the NUMA node allocation which is then passed to the core device which implements it. Or is the NUMA allocation defined elsewhere, and query-hotpluggable-cpus just reports it. > Signed-off-by: Peter Krempa <pkrempa@redhat.com> > --- > hw/i386/pc.c | 7 +++++++ > hw/ppc/spapr.c | 8 ++++++-- > include/sysemu/numa.h | 1 + > numa.c | 13 +++++++++++++ > 4 files changed, 27 insertions(+), 2 deletions(-) > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 4ba02c4..a0b9507 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -2115,6 +2115,7 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) > HotpluggableCPUList *head = NULL; > PCMachineState *pcms = PC_MACHINE(machine); > const char *cpu_type; > + int node_id; > > cpu = pcms->possible_cpus->cpus[0].cpu; > assert(cpu); /* BSP is always present */ > @@ -2138,6 +2139,12 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) > cpu_props->core_id = topo.core_id; > cpu_props->has_thread_id = true; > cpu_props->thread_id = topo.smt_id; > + > + if ((node_id = numa_node_get_by_cpu_index(i)) >= 0) { > + cpu_props->has_node_id = true; > + cpu_props->node_id = node_id; > + } > + > cpu_item->props = cpu_props; > > cpu = pcms->possible_cpus->cpus[i].cpu; > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index d1f5195..06ba7fc 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -2370,6 +2370,7 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) > sPAPRMachineState *spapr = SPAPR_MACHINE(machine); > int spapr_max_cores = max_cpus / smp_threads; > int smt = kvmppc_smt_threads(); > + int node_id; > > for (i = 0; i < spapr_max_cores; i++) { > HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1); > @@ -2381,8 +2382,11 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) > cpu_item->vcpu_id = i; > cpu_props->has_core_id = true; > cpu_props->core_id = i * smt; > - /* TODO: add 'has_node/node' here to describe > - to which node core belongs */ > + > + if ((node_id = numa_node_get_by_cpu_index(i)) >= 0) { As with the previous patch this is incorrect, becauyse numa_node_get_by_cpu_index() is working from a vcpu (i.e. thread) index, but you're passing a core index. > + cpu_props->has_node_id = true; > + cpu_props->node_id = node_id; > + } > > cpu_item->props = cpu_props; > if (spapr->cores[i]) { > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h > index bb184c9..04d7097 100644 > --- a/include/sysemu/numa.h > +++ b/include/sysemu/numa.h > @@ -31,5 +31,6 @@ extern QemuOptsList qemu_numa_opts; > void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > uint32_t numa_get_node(ram_addr_t addr, Error **errp); > +int numa_node_get_by_cpu_index(int cpu_index); > > #endif > diff --git a/numa.c b/numa.c > index cbae430..365738a 100644 > --- a/numa.c > +++ b/numa.c > @@ -506,6 +506,19 @@ void query_numa_node_mem(uint64_t node_mem[]) > } > } > > +int numa_node_get_by_cpu_index(int cpu_index) > +{ > + int i; > + > + for (i = 0; i < nb_numa_nodes; i++) { > + if (test_bit(cpu_index, numa_info[i].node_cpu)) { > + return i; > + } > + } > + > + return -1; > +} > + > static int query_memdev(Object *obj, void *opaque) > { > MemdevList **list = opaque; > -- > 2.9.0 >
On Fri, Jul 08, 2016 at 12:23:08 +1000, David Gibson wrote: > On Thu, 7 Jul 2016 17:17:14 +0200 > Peter Krempa <pkrempa@redhat.com> wrote: > > > Add a helper that looks up the NUMA node for a given CPU and use it to > > fill the node_id in the PPC and X86 impls of query-hotpluggable-cpus. > > > IIUC how the query thing works this means that the node id issued by > query-hotpluggable-cpus will be echoed back to device add by libvirt. It will be echoed back, but the problem is that it's configured separately ... > I'm not sure we actually process that information in the core at > present, so I don't know that that's right. > > We need to be clear on which direction information is flowing here. > Does query-hotpluggable-cpus *define* the NUMA node allocation which is > then passed to the core device which implements it. Or is the NUMA > allocation defined elsewhere, and query-hotpluggable-cpus just reports > it. Currently (in the pre-hotplug era) the NUMA topology is defined by specifying a CPU numbers (see previous patch) on the commandline: -numa node=1,cpus=1-5,cpus=8,cpus=11... This is then reported to the guest. For a machine started with: -smp 5,maxcpus=8,sockets=2,cores=2,threads=2 -numa node,nodeid=0,cpus=0,cpus=2,cpus=4,cpus=6,mem=500 -numa node,nodeid=1,cpus=1,cpus=3,cpus=5,cpus=7,mem=500 you get the following topology that is not really possible with hardware: # lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 5 On-line CPU(s) list: 0-4 Thread(s) per core: 1 Core(s) per socket: 2 Socket(s): 2 NUMA node(s): 2 Vendor ID: GenuineIntel CPU family: 6 Model: 6 Model name: QEMU Virtual CPU version 2.5+ Stepping: 3 CPU MHz: 3504.318 BogoMIPS: 7008.63 Hypervisor vendor: KVM Virtualization type: full L1d cache: 32K L1i cache: 32K L2 cache: 4096K NUMA node0 CPU(s): 0,2,4 NUMA node1 CPU(s): 1,3 Note that the count of cpus per numa node does not need to be identical. As of the above 'query-hotpluggable-cpus' will need to report the data that was configured above even if it doesn't make much sense in a real world. I did not try the above on a PPC host and thus I'm not sure whether the config above is allowed or not. While for the hotplug cpus it would be possible to plug in the correct one according to the requested use the queried data but with the current approach it's impossible to set the initial vcpus differently. Peter Note: For libvirt it's a no-go to start a throwaway qemu process just to query the information and thus it's desired to have a way to configure all this without the need to query with a specific machine type/topology.
On Thu, 7 Jul 2016 17:17:14 +0200 Peter Krempa <pkrempa@redhat.com> wrote: > Add a helper that looks up the NUMA node for a given CPU and use it to > fill the node_id in the PPC and X86 impls of query-hotpluggable-cpus. > > Signed-off-by: Peter Krempa <pkrempa@redhat.com> > --- > hw/i386/pc.c | 7 +++++++ > hw/ppc/spapr.c | 8 ++++++-- > include/sysemu/numa.h | 1 + > numa.c | 13 +++++++++++++ > 4 files changed, 27 insertions(+), 2 deletions(-) > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 4ba02c4..a0b9507 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -2115,6 +2115,7 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) > HotpluggableCPUList *head = NULL; > PCMachineState *pcms = PC_MACHINE(machine); > const char *cpu_type; > + int node_id; > > cpu = pcms->possible_cpus->cpus[0].cpu; > assert(cpu); /* BSP is always present */ > @@ -2138,6 +2139,12 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) > cpu_props->core_id = topo.core_id; > cpu_props->has_thread_id = true; > cpu_props->thread_id = topo.smt_id; > + > + if ((node_id = numa_node_get_by_cpu_index(i)) >= 0) { > + cpu_props->has_node_id = true; > + cpu_props->node_id = node_id; > + } I've not included node_id for a reason, "-numa cpus=1,2,3..." looks to me hopelessly broken now but I've not came up with an idea how to redo it in nice and clean way yet. Alternative could be CLI-less numa configuration, where QEMU is started without "-numa cpus" but with "-S" then mgmt could call query_hotpluggable_cpus() to get possible CPUs and then map them to numa nodes with a new QMP command using attributes it got from query_hotpluggable_cpus(). it's along the way start QEMU -smp 1,maxcpus=X and then add remaining CPUs with device_add after getting properties from query_hotpluggable_cpus(). then at machine_done time we can adjust DT/ACPI data to reflect configured mapping. > cpu_item->props = cpu_props; > > cpu = pcms->possible_cpus->cpus[i].cpu; > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index d1f5195..06ba7fc 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -2370,6 +2370,7 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) > sPAPRMachineState *spapr = SPAPR_MACHINE(machine); > int spapr_max_cores = max_cpus / smp_threads; > int smt = kvmppc_smt_threads(); > + int node_id; > > for (i = 0; i < spapr_max_cores; i++) { > HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1); > @@ -2381,8 +2382,11 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) > cpu_item->vcpu_id = i; > cpu_props->has_core_id = true; > cpu_props->core_id = i * smt; > - /* TODO: add 'has_node/node' here to describe > - to which node core belongs */ > + > + if ((node_id = numa_node_get_by_cpu_index(i)) >= 0) { > + cpu_props->has_node_id = true; > + cpu_props->node_id = node_id; > + } > > cpu_item->props = cpu_props; > if (spapr->cores[i]) { > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h > index bb184c9..04d7097 100644 > --- a/include/sysemu/numa.h > +++ b/include/sysemu/numa.h > @@ -31,5 +31,6 @@ extern QemuOptsList qemu_numa_opts; > void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > uint32_t numa_get_node(ram_addr_t addr, Error **errp); > +int numa_node_get_by_cpu_index(int cpu_index); > > #endif > diff --git a/numa.c b/numa.c > index cbae430..365738a 100644 > --- a/numa.c > +++ b/numa.c > @@ -506,6 +506,19 @@ void query_numa_node_mem(uint64_t node_mem[]) > } > } > > +int numa_node_get_by_cpu_index(int cpu_index) > +{ > + int i; > + > + for (i = 0; i < nb_numa_nodes; i++) { > + if (test_bit(cpu_index, numa_info[i].node_cpu)) { > + return i; > + } > + } > + > + return -1; > +} > + > static int query_memdev(Object *obj, void *opaque) > { > MemdevList **list = opaque;
On Fri, Jul 08, 2016 at 13:54:58 +0200, Igor Mammedov wrote: > On Thu, 7 Jul 2016 17:17:14 +0200 > Peter Krempa <pkrempa@redhat.com> wrote: > > > Add a helper that looks up the NUMA node for a given CPU and use it to > > fill the node_id in the PPC and X86 impls of query-hotpluggable-cpus. > > > > Signed-off-by: Peter Krempa <pkrempa@redhat.com> > > --- > > hw/i386/pc.c | 7 +++++++ > > hw/ppc/spapr.c | 8 ++++++-- > > include/sysemu/numa.h | 1 + > > numa.c | 13 +++++++++++++ > > 4 files changed, 27 insertions(+), 2 deletions(-) > > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > > index 4ba02c4..a0b9507 100644 > > --- a/hw/i386/pc.c > > +++ b/hw/i386/pc.c > > @@ -2115,6 +2115,7 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) > > HotpluggableCPUList *head = NULL; > > PCMachineState *pcms = PC_MACHINE(machine); > > const char *cpu_type; > > + int node_id; > > > > cpu = pcms->possible_cpus->cpus[0].cpu; > > assert(cpu); /* BSP is always present */ > > @@ -2138,6 +2139,12 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) > > cpu_props->core_id = topo.core_id; > > cpu_props->has_thread_id = true; > > cpu_props->thread_id = topo.smt_id; > > + > > + if ((node_id = numa_node_get_by_cpu_index(i)) >= 0) { > > + cpu_props->has_node_id = true; > > + cpu_props->node_id = node_id; > > + } > I've not included node_id for a reason, > "-numa cpus=1,2,3..." looks to me hopelessly broken now but > I've not came up with an idea how to redo it in nice and clean way yet. > > Alternative could be CLI-less numa configuration, where QEMU is started > without "-numa cpus" but with "-S" then mgmt could call > query_hotpluggable_cpus() to get possible CPUs and then > map them to numa nodes with a new QMP command using attributes > it got from query_hotpluggable_cpus(). I think this could work for libvirt. The CPU index we currently expose in the XML would become just a libvirt internal detail and the new QMP command would be used to do the setup. Adding some QMP calls during VM startup is okay. > it's along the way start QEMU -smp 1,maxcpus=X and then add > remaining CPUs with device_add after getting properties from > query_hotpluggable_cpus(). I'm going to use a similar approach even for the hotpluggable cpus so I can query the data for a new VM. On the other hand I can't make libvirt use the approach with -smp 1,... all the time since we guarantee that a XML that worked on a older version will be migratable back to the older version. > then at machine_done time we can adjust DT/ACPI data to reflect > configured mapping. In such case this series can be dropped since it provides what I need differently. Thanks, Peter
On Fri, 8 Jul 2016 09:46:00 +0200 Peter Krempa <pkrempa@redhat.com> wrote: [...] > Note: For libvirt it's a no-go to start a throwaway qemu process just to > query the information and thus it's desired to have a way to configure > all this without the need to query with a specific machine > type/topology. Is it no-go to start throwaway qemu on the new domain creation time? i.e. user create a new domain, with specific -machine/-smp layout libvirt stores results of query-hotpluggable-cpus and then allow user in (eg)virt-manager to pick which CPUs are enabled and optionally to which numa nodes they are mapped.
On Fri, 8 Jul 2016 14:04:23 +0200 Peter Krempa <pkrempa@redhat.com> wrote: > On Fri, Jul 08, 2016 at 13:54:58 +0200, Igor Mammedov wrote: > > On Thu, 7 Jul 2016 17:17:14 +0200 > > Peter Krempa <pkrempa@redhat.com> wrote: > > > > > Add a helper that looks up the NUMA node for a given CPU and use it to > > > fill the node_id in the PPC and X86 impls of query-hotpluggable-cpus. > > > > > > Signed-off-by: Peter Krempa <pkrempa@redhat.com> > > > --- > > > hw/i386/pc.c | 7 +++++++ > > > hw/ppc/spapr.c | 8 ++++++-- > > > include/sysemu/numa.h | 1 + > > > numa.c | 13 +++++++++++++ > > > 4 files changed, 27 insertions(+), 2 deletions(-) > > > > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > > > index 4ba02c4..a0b9507 100644 > > > --- a/hw/i386/pc.c > > > +++ b/hw/i386/pc.c > > > @@ -2115,6 +2115,7 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) > > > HotpluggableCPUList *head = NULL; > > > PCMachineState *pcms = PC_MACHINE(machine); > > > const char *cpu_type; > > > + int node_id; > > > > > > cpu = pcms->possible_cpus->cpus[0].cpu; > > > assert(cpu); /* BSP is always present */ > > > @@ -2138,6 +2139,12 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) > > > cpu_props->core_id = topo.core_id; > > > cpu_props->has_thread_id = true; > > > cpu_props->thread_id = topo.smt_id; > > > + > > > + if ((node_id = numa_node_get_by_cpu_index(i)) >= 0) { > > > + cpu_props->has_node_id = true; > > > + cpu_props->node_id = node_id; > > > + } > > I've not included node_id for a reason, > > "-numa cpus=1,2,3..." looks to me hopelessly broken now but > > I've not came up with an idea how to redo it in nice and clean way yet. > > > > Alternative could be CLI-less numa configuration, where QEMU is started > > without "-numa cpus" but with "-S" then mgmt could call > > query_hotpluggable_cpus() to get possible CPUs and then > > map them to numa nodes with a new QMP command using attributes > > it got from query_hotpluggable_cpus(). > > I think this could work for libvirt. The CPU index we currently expose > in the XML would become just a libvirt internal detail and the new QMP > command would be used to do the setup. Adding some QMP calls during VM > startup is okay. > > > it's along the way start QEMU -smp 1,maxcpus=X and then add > > remaining CPUs with device_add after getting properties from > > query_hotpluggable_cpus(). > > I'm going to use a similar approach even for the hotpluggable cpus so I > can query the data for a new VM. On the other hand I can't make libvirt > use the approach with -smp 1,... all the time since we guarantee that a > XML that worked on a older version will be migratable back to the older > version. Could you explain a little bit more about issue? > > > then at machine_done time we can adjust DT/ACPI data to reflect > > configured mapping. > > In such case this series can be dropped since it provides what I need > differently. > > Thanks, > > Peter >
On Fri, Jul 08, 2016 at 14:06:31 +0200, Igor Mammedov wrote: > On Fri, 8 Jul 2016 09:46:00 +0200 > Peter Krempa <pkrempa@redhat.com> wrote: > > [...] > > Note: For libvirt it's a no-go to start a throwaway qemu process just to > > query the information and thus it's desired to have a way to configure > > all this without the need to query with a specific machine > > type/topology. > Is it no-go to start throwaway qemu on the new domain creation time? Yes. The policy is that once we are starting the VM the qemu process that we create is the one running the VM later. We can tweak stuff using QMP and/or kill it if the configuration requested by the user can't be achieved, but starting a different qemu process would not be accepted upstream. Capability querying is done prior to that with a qemu process with -M none and the results are cached for the given combination of qemu binary and libvirtd binary (even across restarts of the libvirtd binary). > i.e. user create a new domain, with specific -machine/-smp layout > libvirt stores results of query-hotpluggable-cpus and then allow user > in (eg)virt-manager to pick which CPUs are enabled and optionally to > which numa nodes they are mapped. We can update certain stuff that was not explicitly configured by the user to the state that will then be used by qemu.
On Fri, Jul 08, 2016 at 14:10:59 +0200, Igor Mammedov wrote: > On Fri, 8 Jul 2016 14:04:23 +0200 > Peter Krempa <pkrempa@redhat.com> wrote: > > > On Fri, Jul 08, 2016 at 13:54:58 +0200, Igor Mammedov wrote: > > > On Thu, 7 Jul 2016 17:17:14 +0200 > > > Peter Krempa <pkrempa@redhat.com> wrote: [...] > > > it's along the way start QEMU -smp 1,maxcpus=X and then add > > > remaining CPUs with device_add after getting properties from > > > query_hotpluggable_cpus(). > > > > I'm going to use a similar approach even for the hotpluggable cpus so I > > can query the data for a new VM. On the other hand I can't make libvirt > > use the approach with -smp 1,... all the time since we guarantee that a > > XML that worked on a older version will be migratable back to the older > > version. > Could you explain a little bit more about issue? Libvirt attempts to maintain compatibility of the XML definition file even for migrations to older versions. If you are able to start a VM with a XML on libvirt version 'A' then when you use the same XML to start it on a newer version 'B' we still need to be able to migrate the VM back to 'A'. This means that vCPUs added via -device/device_add can be used only for configurations that will explicitly have configuration enabled. This shouldn't be a problem generally, but this means that we still either the 'old' way to work properly or a approach that is compatible with migration. For the specific case of vCPU hotplug this means that if you don't hotplug any CPU it needs to work with older libvirt including the numa topology and all other possible stuff.
On Fri, 8 Jul 2016 09:46:00 +0200 Peter Krempa <pkrempa@redhat.com> wrote: > On Fri, Jul 08, 2016 at 12:23:08 +1000, David Gibson wrote: > > On Thu, 7 Jul 2016 17:17:14 +0200 > > Peter Krempa <pkrempa@redhat.com> wrote: > > > > > Add a helper that looks up the NUMA node for a given CPU and use it to > > > fill the node_id in the PPC and X86 impls of query-hotpluggable-cpus. > > > > > > IIUC how the query thing works this means that the node id issued by > > query-hotpluggable-cpus will be echoed back to device add by libvirt. > > It will be echoed back, but the problem is that it's configured > separately ... > > > I'm not sure we actually process that information in the core at > > present, so I don't know that that's right. > > > > We need to be clear on which direction information is flowing here. > > Does query-hotpluggable-cpus *define* the NUMA node allocation which is > > then passed to the core device which implements it. Or is the NUMA > > allocation defined elsewhere, and query-hotpluggable-cpus just reports > > it. > > Currently (in the pre-hotplug era) the NUMA topology is defined by > specifying a CPU numbers (see previous patch) on the commandline: > > -numa node=1,cpus=1-5,cpus=8,cpus=11... > > This is then reported to the guest. > > For a machine started with: > > -smp 5,maxcpus=8,sockets=2,cores=2,threads=2 > -numa node,nodeid=0,cpus=0,cpus=2,cpus=4,cpus=6,mem=500 > -numa node,nodeid=1,cpus=1,cpus=3,cpus=5,cpus=7,mem=500 > > you get the following topology that is not really possible with > hardware: > > # lscpu > Architecture: x86_64 > CPU op-mode(s): 32-bit, 64-bit > Byte Order: Little Endian > CPU(s): 5 > On-line CPU(s) list: 0-4 > Thread(s) per core: 1 > Core(s) per socket: 2 > Socket(s): 2 > NUMA node(s): 2 > Vendor ID: GenuineIntel > CPU family: 6 > Model: 6 > Model name: QEMU Virtual CPU version 2.5+ > Stepping: 3 > CPU MHz: 3504.318 > BogoMIPS: 7008.63 > Hypervisor vendor: KVM > Virtualization type: full > L1d cache: 32K > L1i cache: 32K > L2 cache: 4096K > NUMA node0 CPU(s): 0,2,4 > NUMA node1 CPU(s): 1,3 > > Note that the count of cpus per numa node does not need to be identical. > > As of the above 'query-hotpluggable-cpus' will need to report the data > that was configured above even if it doesn't make much sense in a real > world. > > I did not try the above on a PPC host and thus I'm not sure whether the > config above is allowed or not. It's not - although I'm not sure that we actually have something enforcing this. However, single cores *must* be in the same NUMA node - there's no way of reporting to the guest anything finer grained. > While for the hotplug cpus it would be possible to plug in the correct > one according to the requested use the queried data but with the current > approach it's impossible to set the initial vcpus differently. > > Peter > > Note: For libvirt it's a no-go to start a throwaway qemu process just to > query the information and thus it's desired to have a way to configure > all this without the need to query with a specific machine > type/topology.
diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 4ba02c4..a0b9507 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2115,6 +2115,7 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) HotpluggableCPUList *head = NULL; PCMachineState *pcms = PC_MACHINE(machine); const char *cpu_type; + int node_id; cpu = pcms->possible_cpus->cpus[0].cpu; assert(cpu); /* BSP is always present */ @@ -2138,6 +2139,12 @@ static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) cpu_props->core_id = topo.core_id; cpu_props->has_thread_id = true; cpu_props->thread_id = topo.smt_id; + + if ((node_id = numa_node_get_by_cpu_index(i)) >= 0) { + cpu_props->has_node_id = true; + cpu_props->node_id = node_id; + } + cpu_item->props = cpu_props; cpu = pcms->possible_cpus->cpus[i].cpu; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d1f5195..06ba7fc 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2370,6 +2370,7 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) sPAPRMachineState *spapr = SPAPR_MACHINE(machine); int spapr_max_cores = max_cpus / smp_threads; int smt = kvmppc_smt_threads(); + int node_id; for (i = 0; i < spapr_max_cores; i++) { HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1); @@ -2381,8 +2382,11 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) cpu_item->vcpu_id = i; cpu_props->has_core_id = true; cpu_props->core_id = i * smt; - /* TODO: add 'has_node/node' here to describe - to which node core belongs */ + + if ((node_id = numa_node_get_by_cpu_index(i)) >= 0) { + cpu_props->has_node_id = true; + cpu_props->node_id = node_id; + } cpu_item->props = cpu_props; if (spapr->cores[i]) { diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index bb184c9..04d7097 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -31,5 +31,6 @@ extern QemuOptsList qemu_numa_opts; void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); uint32_t numa_get_node(ram_addr_t addr, Error **errp); +int numa_node_get_by_cpu_index(int cpu_index); #endif diff --git a/numa.c b/numa.c index cbae430..365738a 100644 --- a/numa.c +++ b/numa.c @@ -506,6 +506,19 @@ void query_numa_node_mem(uint64_t node_mem[]) } } +int numa_node_get_by_cpu_index(int cpu_index) +{ + int i; + + for (i = 0; i < nb_numa_nodes; i++) { + if (test_bit(cpu_index, numa_info[i].node_cpu)) { + return i; + } + } + + return -1; +} + static int query_memdev(Object *obj, void *opaque) { MemdevList **list = opaque;
Add a helper that looks up the NUMA node for a given CPU and use it to fill the node_id in the PPC and X86 impls of query-hotpluggable-cpus. Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- hw/i386/pc.c | 7 +++++++ hw/ppc/spapr.c | 8 ++++++-- include/sysemu/numa.h | 1 + numa.c | 13 +++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-)