Message ID | 1340145927-22544-1-git-send-email-chegu_vinod@hp.com |
---|---|
State | New |
Headers | show |
(removed root@hydra11.kio from CC) On Tue, Jun 19, 2012 at 03:45:27PM -0700, Chegu Vinod wrote: > From: root <root@hydra11.kio> > > Changes since v1: > ---------------- > > - Use bitmap functions that are already in qemu (instead > of cpu_set_t macro's) > - Added a check for endvalue >= max_cpus. > - Fix to address the round-robbing assignment (for the case > when cpu's are not explicitly specified) Tested-by: Eduardo Habkost <ehabkost@redhat.com> It works as expected, now. > > Note: Continuing to use a new constant for > allocation of the cpumask (max_cpus was > not getting set early enough). You could simply change the code so that the bitmaps are created only after max_cpus is set. Except for that, the rest of the patch looks OK, now. > [...] > diff --git a/cpus.c b/cpus.c > index b182b3d..89ce04d 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -36,6 +36,7 @@ > #include "cpus.h" > #include "qtest.h" > #include "main-loop.h" > +#include "bitmap.h" > > #ifndef _WIN32 > #include "compatfd.h" > @@ -1145,7 +1146,7 @@ void set_numa_modes(void) > > for (env = first_cpu; env != NULL; env = env->next_cpu) { > for (i = 0; i < nb_numa_nodes; i++) { > - if (node_cpumask[i] & (1 << env->cpu_index)) { > + if (test_bit(env->cpu_index, node_cpumask[i])) { > env->numa_node = i; > } > } > diff --git a/hw/pc.c b/hw/pc.c > index c7e9ab3..8800fc0 100644 > --- a/hw/pc.c > +++ b/hw/pc.c > @@ -48,6 +48,7 @@ > #include "memory.h" > #include "exec-memory.h" > #include "arch_init.h" > +#include "bitmap.h" > > /* output Bochs bios info messages */ > //#define DEBUG_BIOS > @@ -637,9 +638,10 @@ static void *bochs_bios_init(void) > */ > numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8); > numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); > + > for (i = 0; i < max_cpus; i++) { > for (j = 0; j < nb_numa_nodes; j++) { > - if (node_cpumask[j] & (1 << i)) { > + if (test_bit(i, node_cpumask[j])) { > numa_fw_cfg[i + 1] = cpu_to_le64(j); > break; > } > diff --git a/sysemu.h b/sysemu.h > index bc2c788..6813115 100644 > --- a/sysemu.h > +++ b/sysemu.h > @@ -133,9 +133,10 @@ extern uint8_t qemu_extra_params_fw[2]; > extern QEMUClock *rtc_clock; > > #define MAX_NODES 64 > +#define MAX_CPUMASK_BITS 254 Why 254, specifically? > extern int nb_numa_nodes; > extern uint64_t node_mem[MAX_NODES]; > -extern uint64_t node_cpumask[MAX_NODES]; > +extern uint64_t *node_cpumask[MAX_NODES]; > > #define MAX_OPTION_ROMS 16 > typedef struct QEMUOptionRom { > diff --git a/vl.c b/vl.c > index 1329c30..dd4ba75 100644 > --- a/vl.c > +++ b/vl.c > @@ -28,6 +28,7 @@ > #include <errno.h> > #include <sys/time.h> > #include <zlib.h> > +#include "bitmap.h" > > /* Needed early for CONFIG_BSD etc. */ > #include "config-host.h" > @@ -240,7 +241,7 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order > > int nb_numa_nodes; > uint64_t node_mem[MAX_NODES]; > -uint64_t node_cpumask[MAX_NODES]; > +uint64_t *node_cpumask[MAX_NODES]; > > uint8_t qemu_uuid[16]; > > @@ -950,6 +951,9 @@ static void numa_add(const char *optarg) > char *endptr; > unsigned long long value, endvalue; > int nodenr; > + int i; > + > + value = endvalue = 0; > > optarg = get_opt_name(option, 128, optarg, ',') + 1; > if (!strcmp(option, "node")) { > @@ -970,27 +974,24 @@ static void numa_add(const char *optarg) > } > node_mem[nodenr] = sval; > } > - if (get_param_value(option, 128, "cpus", optarg) == 0) { > - node_cpumask[nodenr] = 0; > - } else { > + if (get_param_value(option, 128, "cpus", optarg) != 0) { > value = strtoull(option, &endptr, 10); > - if (value >= 64) { > - value = 63; > - fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n"); > + if (*endptr == '-') { > + endvalue = strtoull(endptr+1, &endptr, 10); > } else { > - if (*endptr == '-') { > - endvalue = strtoull(endptr+1, &endptr, 10); > - if (endvalue >= 63) { > - endvalue = 62; > - fprintf(stderr, > - "only 63 CPUs in NUMA mode supported.\n"); > - } > - value = (2ULL << endvalue) - (1ULL << value); > - } else { > - value = 1ULL << value; > - } > + endvalue = value; > + } > + > + if (endvalue >= max_cpus) { > + endvalue = max_cpus - 1; > + fprintf(stderr, > + "A max of %d CPUs are supported in a guest on this host\n", > + max_cpus); > + } > + > + for (i = value; i <= endvalue; ++i) { > + set_bit(i, node_cpumask[nodenr]); > } > - node_cpumask[nodenr] = value; You changed behavior, here. Now the following command-line: "qemu -numa node,nodeid=0,cpus=1 -numa node,nodeid=0,cpus=2" will get both cpus 1 and 2 on node 0. Before your patch, node 0 would get only CPU 2. I think it is a desirable change, and the above command-line was already ambiguous, but this could be documented in the patch description. > } > nb_numa_nodes++; > } > @@ -2331,7 +2332,8 @@ int main(int argc, char **argv, char **envp) > > for (i = 0; i < MAX_NODES; i++) { > node_mem[i] = 0; > - node_cpumask[i] = 0; > + node_cpumask[i] = (unsigned long *) g_malloc0(MAX_CPUMASK_BITS); You are allocating 254 bytes here, not 254 bits. > + bitmap_zero(node_cpumask[i], MAX_CPUMASK_BITS); > } If MAX_CPUMASK_BITS is really going to be a constant (instead of using max_cpus), you don't even need to allocate the bitmaps dynamically. They could be static arrays. > > nb_numa_nodes = 0; > @@ -3469,16 +3471,18 @@ int main(int argc, char **argv, char **envp) > } > > for (i = 0; i < nb_numa_nodes; i++) { > - if (node_cpumask[i] != 0) > + if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) { > break; > + } > } > + > /* assigning the VCPUs round-robin is easier to implement, guest OSes > * must cope with this anyway, because there are BIOSes out there in > * real machines which also use this scheme. > */ > if (i == nb_numa_nodes) { > for (i = 0; i < max_cpus; i++) { > - node_cpumask[i % nb_numa_nodes] |= 1 << i; > + set_bit(i, node_cpumask[i % nb_numa_nodes]); > } > } > } > -- > 1.7.1 >
Just found another issue: On Wed, Jun 20, 2012 at 05:33:29PM -0300, Eduardo Habkost wrote: [...] > > @@ -970,27 +974,24 @@ static void numa_add(const char *optarg) > > } > > node_mem[nodenr] = sval; > > } > > - if (get_param_value(option, 128, "cpus", optarg) == 0) { > > - node_cpumask[nodenr] = 0; > > - } else { > > + if (get_param_value(option, 128, "cpus", optarg) != 0) { > > value = strtoull(option, &endptr, 10); > > - if (value >= 64) { > > - value = 63; > > - fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n"); > > + if (*endptr == '-') { > > + endvalue = strtoull(endptr+1, &endptr, 10); > > } else { > > - if (*endptr == '-') { > > - endvalue = strtoull(endptr+1, &endptr, 10); > > - if (endvalue >= 63) { > > - endvalue = 62; > > - fprintf(stderr, > > - "only 63 CPUs in NUMA mode supported.\n"); > > - } > > - value = (2ULL << endvalue) - (1ULL << value); > > - } else { > > - value = 1ULL << value; > > - } > > + endvalue = value; > > + } > > + > > + if (endvalue >= max_cpus) { > > + endvalue = max_cpus - 1; > > + fprintf(stderr, > > + "A max of %d CPUs are supported in a guest on this host\n", > > + max_cpus); > > + } This makes the following command segfault: $ qemu-system-x86_64 -numa 'node,cpus=1-3' -smp 100 max_cpus may be not initialized yet at the call to numa_add(), as you are still parsing the command-line options.
-----Original Message----- From: Eduardo Habkost [mailto:ehabkost@redhat.com] Sent: Monday, June 25, 2012 1:01 PM To: Vinod, Chegu Cc: Hada, Craig M; Hull, Jim; qemu-devel@nongnu.org; kvm@vger.kernel.org Subject: Re: [Qemu-devel] [PATCH v2] Fixes related to processing of qemu's -numa option Just found another issue: On Wed, Jun 20, 2012 at 05:33:29PM -0300, Eduardo Habkost wrote: [...] > > @@ -970,27 +974,24 @@ static void numa_add(const char *optarg) > > } > > node_mem[nodenr] = sval; > > } > > - if (get_param_value(option, 128, "cpus", optarg) == 0) { > > - node_cpumask[nodenr] = 0; > > - } else { > > + if (get_param_value(option, 128, "cpus", optarg) != 0) { > > value = strtoull(option, &endptr, 10); > > - if (value >= 64) { > > - value = 63; > > - fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n"); > > + if (*endptr == '-') { > > + endvalue = strtoull(endptr+1, &endptr, 10); > > } else { > > - if (*endptr == '-') { > > - endvalue = strtoull(endptr+1, &endptr, 10); > > - if (endvalue >= 63) { > > - endvalue = 62; > > - fprintf(stderr, > > - "only 63 CPUs in NUMA mode supported.\n"); > > - } > > - value = (2ULL << endvalue) - (1ULL << value); > > - } else { > > - value = 1ULL << value; > > - } > > + endvalue = value; > > + } > > + > > + if (endvalue >= max_cpus) { > > + endvalue = max_cpus - 1; > > + fprintf(stderr, > > + "A max of %d CPUs are supported in a guest on this host\n", > > + max_cpus); > > + } >This makes the following command segfault: > >$ qemu-system-x86_64 -numa 'node,cpus=1-3' -smp 100 > >max_cpus may be not initialized yet at the call to numa_add(), as you are still parsing the command-line options. Yes. I am aware of this issue too... and shall post another version of the patch next week (along with another fix) . Thanks Vinod -- Eduardo
diff --git a/cpus.c b/cpus.c index b182b3d..89ce04d 100644 --- a/cpus.c +++ b/cpus.c @@ -36,6 +36,7 @@ #include "cpus.h" #include "qtest.h" #include "main-loop.h" +#include "bitmap.h" #ifndef _WIN32 #include "compatfd.h" @@ -1145,7 +1146,7 @@ void set_numa_modes(void) for (env = first_cpu; env != NULL; env = env->next_cpu) { for (i = 0; i < nb_numa_nodes; i++) { - if (node_cpumask[i] & (1 << env->cpu_index)) { + if (test_bit(env->cpu_index, node_cpumask[i])) { env->numa_node = i; } } diff --git a/hw/pc.c b/hw/pc.c index c7e9ab3..8800fc0 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -48,6 +48,7 @@ #include "memory.h" #include "exec-memory.h" #include "arch_init.h" +#include "bitmap.h" /* output Bochs bios info messages */ //#define DEBUG_BIOS @@ -637,9 +638,10 @@ static void *bochs_bios_init(void) */ numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); + for (i = 0; i < max_cpus; i++) { for (j = 0; j < nb_numa_nodes; j++) { - if (node_cpumask[j] & (1 << i)) { + if (test_bit(i, node_cpumask[j])) { numa_fw_cfg[i + 1] = cpu_to_le64(j); break; } diff --git a/sysemu.h b/sysemu.h index bc2c788..6813115 100644 --- a/sysemu.h +++ b/sysemu.h @@ -133,9 +133,10 @@ extern uint8_t qemu_extra_params_fw[2]; extern QEMUClock *rtc_clock; #define MAX_NODES 64 +#define MAX_CPUMASK_BITS 254 extern int nb_numa_nodes; extern uint64_t node_mem[MAX_NODES]; -extern uint64_t node_cpumask[MAX_NODES]; +extern uint64_t *node_cpumask[MAX_NODES]; #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { diff --git a/vl.c b/vl.c index 1329c30..dd4ba75 100644 --- a/vl.c +++ b/vl.c @@ -28,6 +28,7 @@ #include <errno.h> #include <sys/time.h> #include <zlib.h> +#include "bitmap.h" /* Needed early for CONFIG_BSD etc. */ #include "config-host.h" @@ -240,7 +241,7 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order int nb_numa_nodes; uint64_t node_mem[MAX_NODES]; -uint64_t node_cpumask[MAX_NODES]; +uint64_t *node_cpumask[MAX_NODES]; uint8_t qemu_uuid[16]; @@ -950,6 +951,9 @@ static void numa_add(const char *optarg) char *endptr; unsigned long long value, endvalue; int nodenr; + int i; + + value = endvalue = 0; optarg = get_opt_name(option, 128, optarg, ',') + 1; if (!strcmp(option, "node")) { @@ -970,27 +974,24 @@ static void numa_add(const char *optarg) } node_mem[nodenr] = sval; } - if (get_param_value(option, 128, "cpus", optarg) == 0) { - node_cpumask[nodenr] = 0; - } else { + if (get_param_value(option, 128, "cpus", optarg) != 0) { value = strtoull(option, &endptr, 10); - if (value >= 64) { - value = 63; - fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n"); + if (*endptr == '-') { + endvalue = strtoull(endptr+1, &endptr, 10); } else { - if (*endptr == '-') { - endvalue = strtoull(endptr+1, &endptr, 10); - if (endvalue >= 63) { - endvalue = 62; - fprintf(stderr, - "only 63 CPUs in NUMA mode supported.\n"); - } - value = (2ULL << endvalue) - (1ULL << value); - } else { - value = 1ULL << value; - } + endvalue = value; + } + + if (endvalue >= max_cpus) { + endvalue = max_cpus - 1; + fprintf(stderr, + "A max of %d CPUs are supported in a guest on this host\n", + max_cpus); + } + + for (i = value; i <= endvalue; ++i) { + set_bit(i, node_cpumask[nodenr]); } - node_cpumask[nodenr] = value; } nb_numa_nodes++; } @@ -2331,7 +2332,8 @@ int main(int argc, char **argv, char **envp) for (i = 0; i < MAX_NODES; i++) { node_mem[i] = 0; - node_cpumask[i] = 0; + node_cpumask[i] = (unsigned long *) g_malloc0(MAX_CPUMASK_BITS); + bitmap_zero(node_cpumask[i], MAX_CPUMASK_BITS); } nb_numa_nodes = 0; @@ -3469,16 +3471,18 @@ int main(int argc, char **argv, char **envp) } for (i = 0; i < nb_numa_nodes; i++) { - if (node_cpumask[i] != 0) + if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) { break; + } } + /* assigning the VCPUs round-robin is easier to implement, guest OSes * must cope with this anyway, because there are BIOSes out there in * real machines which also use this scheme. */ if (i == nb_numa_nodes) { for (i = 0; i < max_cpus; i++) { - node_cpumask[i % nb_numa_nodes] |= 1 << i; + set_bit(i, node_cpumask[i % nb_numa_nodes]); } } }