Message ID | 1371795960-10478-5-git-send-email-gaowanlong@cn.fujitsu.com |
---|---|
State | New |
Headers | show |
Wanlong Gao <gaowanlong@cn.fujitsu.com> writes: > The memory policy setting format is like: > mem-policy={membind|interleave|preferred},mem-hostnode=[+|!]{all|N-N} > And we are adding this setting as a suboption of "-numa", > the memory policy then can be set like following: > -numa node,nodeid=0,mem=1024,cpus=0,mem-policy=membind,mem-hostnode=0-1 > -numa node,nodeid=1,mem=1024,cpus=1,mem-policy=interleave,mem-hostnode=!1 > > Signed-off-by: Andre Przywara <andre.przywara@amd.com> > Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com> > --- > include/sysemu/sysemu.h | 8 ++++ > vl.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 118 insertions(+) > > diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h > index 70fd2ed..993b8e0 100644 > --- a/include/sysemu/sysemu.h > +++ b/include/sysemu/sysemu.h > @@ -130,10 +130,18 @@ extern QEMUClock *rtc_clock; > > #define MAX_NODES 64 > #define MAX_CPUMASK_BITS 255 > +#define NODE_HOST_NONE 0x00 > +#define NODE_HOST_BIND 0x01 > +#define NODE_HOST_INTERLEAVE 0x02 > +#define NODE_HOST_PREFERRED 0x03 > +#define NODE_HOST_POLICY_MASK 0x03 > +#define NODE_HOST_RELATIVE 0x04 > extern int nb_numa_nodes; > struct node_info { > uint64_t node_mem; > DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); > + DECLARE_BITMAP(host_mem, MAX_CPUMASK_BITS); > + unsigned int flags; > }; > extern struct node_info numa_info[MAX_NODES]; > > diff --git a/vl.c b/vl.c > index 357137b..4dbf5cc 100644 > --- a/vl.c > +++ b/vl.c > @@ -536,6 +536,14 @@ static QemuOptsList qemu_numa_opts = { > .name = "cpus", > .type = QEMU_OPT_STRING, > .help = "cpu number or range" > + },{ > + .name = "mem-policy", > + .type = QEMU_OPT_STRING, > + .help = "memory policy" > + },{ > + .name = "mem-hostnode", > + .type = QEMU_OPT_STRING, > + .help = "host node number or range for memory policy" > }, > { /* end of list */ } > }, > @@ -1374,6 +1382,79 @@ error: > exit(1); > } > > +static void numa_node_parse_mpol(int nodenr, const char *mpol) > +{ > + if (!mpol) { > + return; > + } > + > + if (!strcmp(mpol, "interleave")) { > + numa_info[nodenr].flags |= NODE_HOST_INTERLEAVE; > + } else if (!strcmp(mpol, "preferred")) { > + numa_info[nodenr].flags |= NODE_HOST_PREFERRED; > + } else if (!strcmp(mpol, "membind")) { > + numa_info[nodenr].flags |= NODE_HOST_BIND; > + } else { > + fprintf(stderr, "qemu: Invalid memory policy: %s\n", mpol); > + } > +} > + > +static void numa_node_parse_hostnode(int nodenr, const char *hostnode) > +{ > + unsigned long long value, endvalue; > + char *endptr; > + bool clear = false; > + unsigned long *bm = numa_info[nodenr].host_mem; > + > + if (hostnode[0] == '!') { > + clear = true; > + bitmap_fill(bm, MAX_CPUMASK_BITS); > + hostnode++; > + } > + if (hostnode[0] == '+') { > + numa_info[nodenr].flags |= NODE_HOST_RELATIVE; > + hostnode++; > + } > + > + if (!strcmp(hostnode, "all")) { > + bitmap_fill(bm, MAX_CPUMASK_BITS); > + return; > + } > + > + if (parse_uint(hostnode, &value, &endptr, 10) < 0) > + goto error; > + if (*endptr == '-') { > + if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) { > + goto error; > + } > + } else if (*endptr == '\0') { > + endvalue = value; > + } else { > + goto error; > + } > + > + if (endvalue >= MAX_CPUMASK_BITS) { > + endvalue = MAX_CPUMASK_BITS - 1; > + fprintf(stderr, > + "qemu: NUMA: A max of %d host nodes are supported\n", > + MAX_CPUMASK_BITS); > + } > + > + if (endvalue < value) { > + goto error; > + } > + > + if (clear) > + bitmap_clear(bm, value, endvalue - value + 1); > + else > + bitmap_set(bm, value, endvalue - value + 1); > + > + return; > + > +error: > + fprintf(stderr, "qemu: Invalid host NUMA nodes range: %s\n", hostnode); > + return; > +} Some of the parsing being done here is also being done in numa_node_parse_cpus(). To avoid code duplication, may be we should just have a common function that can be called from both the places. Bandan > static int numa_add_cpus(const char *name, const char *value, void *opaque) > { > @@ -1385,6 +1466,25 @@ static int numa_add_cpus(const char *name, const char *value, void *opaque) > return 0; > } > > +static int numa_add_mpol(const char *name, const char *value, void *opaque) > +{ > + int *nodenr = opaque; > + > + if (!strcmp(name, "mem-policy")) { > + numa_node_parse_mpol(*nodenr, value); > + } > + return 0; > +} > + > +static int numa_add_hostnode(const char *name, const char *value, void *opaque) > +{ > + int *nodenr = opaque; > + if (!strcmp(name, "mem-hostnode")) { > + numa_node_parse_hostnode(*nodenr, value); > + } > + return 0; > +} > + > static int numa_init_func(QemuOpts *opts, void *opaque) > { > uint64_t nodenr, mem_size; > @@ -1404,6 +1504,14 @@ static int numa_init_func(QemuOpts *opts, void *opaque) > return -1; > } > > + if (qemu_opt_foreach(opts, numa_add_mpol, &nodenr, 1) < 0) { > + return -1; > + } > + > + if (qemu_opt_foreach(opts, numa_add_hostnode, &nodenr, 1) < 0) { > + return -1; > + } > + > return 0; > } > > @@ -2930,6 +3038,8 @@ int main(int argc, char **argv, char **envp) > for (i = 0; i < MAX_NODES; i++) { > numa_info[i].node_mem = 0; > bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS); > + bitmap_zero(numa_info[i].host_mem, MAX_CPUMASK_BITS); > + numa_info[i].flags = NODE_HOST_NONE; > } > > nb_numa_nodes = 0;
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 70fd2ed..993b8e0 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -130,10 +130,18 @@ extern QEMUClock *rtc_clock; #define MAX_NODES 64 #define MAX_CPUMASK_BITS 255 +#define NODE_HOST_NONE 0x00 +#define NODE_HOST_BIND 0x01 +#define NODE_HOST_INTERLEAVE 0x02 +#define NODE_HOST_PREFERRED 0x03 +#define NODE_HOST_POLICY_MASK 0x03 +#define NODE_HOST_RELATIVE 0x04 extern int nb_numa_nodes; struct node_info { uint64_t node_mem; DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); + DECLARE_BITMAP(host_mem, MAX_CPUMASK_BITS); + unsigned int flags; }; extern struct node_info numa_info[MAX_NODES]; diff --git a/vl.c b/vl.c index 357137b..4dbf5cc 100644 --- a/vl.c +++ b/vl.c @@ -536,6 +536,14 @@ static QemuOptsList qemu_numa_opts = { .name = "cpus", .type = QEMU_OPT_STRING, .help = "cpu number or range" + },{ + .name = "mem-policy", + .type = QEMU_OPT_STRING, + .help = "memory policy" + },{ + .name = "mem-hostnode", + .type = QEMU_OPT_STRING, + .help = "host node number or range for memory policy" }, { /* end of list */ } }, @@ -1374,6 +1382,79 @@ error: exit(1); } +static void numa_node_parse_mpol(int nodenr, const char *mpol) +{ + if (!mpol) { + return; + } + + if (!strcmp(mpol, "interleave")) { + numa_info[nodenr].flags |= NODE_HOST_INTERLEAVE; + } else if (!strcmp(mpol, "preferred")) { + numa_info[nodenr].flags |= NODE_HOST_PREFERRED; + } else if (!strcmp(mpol, "membind")) { + numa_info[nodenr].flags |= NODE_HOST_BIND; + } else { + fprintf(stderr, "qemu: Invalid memory policy: %s\n", mpol); + } +} + +static void numa_node_parse_hostnode(int nodenr, const char *hostnode) +{ + unsigned long long value, endvalue; + char *endptr; + bool clear = false; + unsigned long *bm = numa_info[nodenr].host_mem; + + if (hostnode[0] == '!') { + clear = true; + bitmap_fill(bm, MAX_CPUMASK_BITS); + hostnode++; + } + if (hostnode[0] == '+') { + numa_info[nodenr].flags |= NODE_HOST_RELATIVE; + hostnode++; + } + + if (!strcmp(hostnode, "all")) { + bitmap_fill(bm, MAX_CPUMASK_BITS); + return; + } + + if (parse_uint(hostnode, &value, &endptr, 10) < 0) + goto error; + if (*endptr == '-') { + if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) { + goto error; + } + } else if (*endptr == '\0') { + endvalue = value; + } else { + goto error; + } + + if (endvalue >= MAX_CPUMASK_BITS) { + endvalue = MAX_CPUMASK_BITS - 1; + fprintf(stderr, + "qemu: NUMA: A max of %d host nodes are supported\n", + MAX_CPUMASK_BITS); + } + + if (endvalue < value) { + goto error; + } + + if (clear) + bitmap_clear(bm, value, endvalue - value + 1); + else + bitmap_set(bm, value, endvalue - value + 1); + + return; + +error: + fprintf(stderr, "qemu: Invalid host NUMA nodes range: %s\n", hostnode); + return; +} static int numa_add_cpus(const char *name, const char *value, void *opaque) { @@ -1385,6 +1466,25 @@ static int numa_add_cpus(const char *name, const char *value, void *opaque) return 0; } +static int numa_add_mpol(const char *name, const char *value, void *opaque) +{ + int *nodenr = opaque; + + if (!strcmp(name, "mem-policy")) { + numa_node_parse_mpol(*nodenr, value); + } + return 0; +} + +static int numa_add_hostnode(const char *name, const char *value, void *opaque) +{ + int *nodenr = opaque; + if (!strcmp(name, "mem-hostnode")) { + numa_node_parse_hostnode(*nodenr, value); + } + return 0; +} + static int numa_init_func(QemuOpts *opts, void *opaque) { uint64_t nodenr, mem_size; @@ -1404,6 +1504,14 @@ static int numa_init_func(QemuOpts *opts, void *opaque) return -1; } + if (qemu_opt_foreach(opts, numa_add_mpol, &nodenr, 1) < 0) { + return -1; + } + + if (qemu_opt_foreach(opts, numa_add_hostnode, &nodenr, 1) < 0) { + return -1; + } + return 0; } @@ -2930,6 +3038,8 @@ int main(int argc, char **argv, char **envp) for (i = 0; i < MAX_NODES; i++) { numa_info[i].node_mem = 0; bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS); + bitmap_zero(numa_info[i].host_mem, MAX_CPUMASK_BITS); + numa_info[i].flags = NODE_HOST_NONE; } nb_numa_nodes = 0;