Message ID | 20240329180742.384709-1-gregory.price@memverge.com |
---|---|
State | Changes Requested |
Headers | show |
Series | mempolicy/mbind: update syscall tests for weighted interleave | expand |
Hi Gregory, Thank you for starting this, comments are inline below. On Tue, Apr 2, 2024 at 5:22 PM Gregory Price <gourry.memverge@gmail.com> wrote: > Add the MPOL_WEIGHTED_INTERLEAVE policy to the syscall test suite for > get_mempolicy, set_mempolicy, and mbind. > > The functional memory distribution test only tests that the default > behavior of WEIGHTED_INTERLEAVE is the same as INTERLEAVE, since the > values for each node default to '1', and because behavior actually > depends on what node the test runs on. This test at least demonstrates > the interleave mechanism is in use and works by default as intended. > > MPOL_WEIGHTED_INTERLEAVE is ifdef defined because it is not upstream > in libnuma yet, so this ensures compilation. > > Signed-off-by: Gregory Price <gregory.price@memverge.com> > --- > .../syscalls/get_mempolicy/get_mempolicy01.c | 15 ++ > testcases/kernel/syscalls/mbind/mbind01.c | 17 ++ > testcases/kernel/syscalls/mbind/mbind02.c | 5 + > testcases/kernel/syscalls/mbind/mbind03.c | 5 + > testcases/kernel/syscalls/mbind/mbind04.c | 5 + > .../kernel/syscalls/set_mempolicy/.gitignore | 2 + > .../kernel/syscalls/set_mempolicy/Makefile | 2 +- > .../syscalls/set_mempolicy/set_mempolicy06.c | 122 +++++++++++++++ > .../syscalls/set_mempolicy/set_mempolicy07.c | 148 ++++++++++++++++++ > 9 files changed, 320 insertions(+), 1 deletion(-) > create mode 100644 > testcases/kernel/syscalls/set_mempolicy/set_mempolicy06.c > create mode 100644 > testcases/kernel/syscalls/set_mempolicy/set_mempolicy07.c > > diff --git a/testcases/kernel/syscalls/get_mempolicy/get_mempolicy01.c > b/testcases/kernel/syscalls/get_mempolicy/get_mempolicy01.c > index 23f62091a..8b0aa60bf 100644 > --- a/testcases/kernel/syscalls/get_mempolicy/get_mempolicy01.c > +++ b/testcases/kernel/syscalls/get_mempolicy/get_mempolicy01.c > @@ -26,6 +26,10 @@ > #include <errno.h> > #include "tst_numa.h" > > +#ifndef > #ifndef MPOL_WEIGHTED_INTERLEAVE ? > +#define MPOL_WEIGHTED_INTERLEAVE 6 > +#endif > And can we move this common part into include/lapi/numaif.h, to avoid defining this again and again in test cases? + > #define MEM_LENGTH (4 * 1024 * 1024) > #define PAGES_ALLOCATED 16u > > @@ -68,6 +72,11 @@ static struct test_case tcase[] = { > .alloc = test_set_mempolicy_default, > .exp_nodemask = &nodemask, > }, > + { > + POLICY_DESC(MPOL_WEIGHTED_INTERLEAVE), > + .alloc = test_set_mempolicy_default, > + .exp_nodemask = &nodemask, > + }, > { > POLICY_DESC_NO_TARGET(MPOL_PREFERRED), > .alloc = test_set_mempolicy_none, > @@ -95,6 +104,12 @@ static struct test_case tcase[] = { > .alloc = test_set_mempolicy_default, > .exp_nodemask = &nodemask, > }, > + { > + POLICY_DESC_FLAGS(MPOL_WEIGHTED_INTERLEAVE, MPOL_F_ADDR), > + .pre_test = test_mbind_default, > + .alloc = test_set_mempolicy_default, > + .exp_nodemask = &nodemask, > + }, > { > POLICY_DESC_FLAGS_NO_TARGET(MPOL_PREFERRED, MPOL_F_ADDR), > .pre_test = test_mbind_none, > diff --git a/testcases/kernel/syscalls/mbind/mbind01.c > b/testcases/kernel/syscalls/mbind/mbind01.c > index 4b8d168cd..d3d3b0049 100644 > --- a/testcases/kernel/syscalls/mbind/mbind01.c > +++ b/testcases/kernel/syscalls/mbind/mbind01.c > @@ -20,6 +20,10 @@ > #include "tst_numa.h" > #include "lapi/numaif.h" > > +#ifndef > +#define MPOL_WEIGHTED_INTERLEAVE 6 > +#endif > + > #ifdef HAVE_NUMA_V2 > > #define MEM_LENGTH (4 * 1024 * 1024) > @@ -80,6 +84,12 @@ static struct test_case tcase[] = { > .err = EINVAL, > .test = test_none, > }, > + { > + POLICY_DESC_TEXT(MPOL_WEIGHTED_INTERLEAVE, "no target"), > + .ret = -1, > + .err = EINVAL, > + .test = test_none, > + }, > { > POLICY_DESC(MPOL_INTERLEAVE), > .ret = 0, > @@ -87,6 +97,13 @@ static struct test_case tcase[] = { > .test = test_default, > .exp_nodemask = &nodemask, > }, > + { > + POLICY_DESC(MPOL_WEIGHTED_INTERLEAVE), > + .ret = 0, > + .err = 0, > + .test = test_default, > + .exp_nodemask = &nodemask, > + }, > { > POLICY_DESC_TEXT(MPOL_PREFERRED, "no target"), > .ret = 0, > diff --git a/testcases/kernel/syscalls/mbind/mbind02.c > b/testcases/kernel/syscalls/mbind/mbind02.c > index cd6a03226..66d9cd826 100644 > --- a/testcases/kernel/syscalls/mbind/mbind02.c > +++ b/testcases/kernel/syscalls/mbind/mbind02.c > @@ -23,6 +23,10 @@ > #include "tst_test.h" > #include "tst_numa.h" > > +#ifndef MPOL_WEIGHTED_INTERLEAVE > +#define MPOL_WEIGHTED_INTERLEAVE 6 > +#endif > ^ + > #ifdef HAVE_NUMA_V2 > > static size_t page_size; > @@ -92,6 +96,7 @@ static const int modes[] = { > MPOL_PREFERRED, > MPOL_BIND, > MPOL_INTERLEAVE, > + MPOL_WEIGHTED_INTERLEAVE, > }; > > static void verify_mbind(unsigned int n) > diff --git a/testcases/kernel/syscalls/mbind/mbind03.c > b/testcases/kernel/syscalls/mbind/mbind03.c > index 3d477c9cb..7da3c217d 100644 > --- a/testcases/kernel/syscalls/mbind/mbind03.c > +++ b/testcases/kernel/syscalls/mbind/mbind03.c > @@ -20,6 +20,10 @@ > #include "tst_test.h" > #include "tst_numa.h" > > +#ifndef MPOL_WEIGHTED_INTERLEAVE > +#define MPOL_WEIGHTED_INTERLEAVE 6 > +#endif > ^ + > #ifdef HAVE_NUMA_V2 > > static size_t page_size; > @@ -103,6 +107,7 @@ static const int modes[] = { > MPOL_PREFERRED, > MPOL_BIND, > MPOL_INTERLEAVE, > + MPOL_WEIGHTED_INTERLEAVE, > }; > > static void verify_mbind(unsigned int n) > diff --git a/testcases/kernel/syscalls/mbind/mbind04.c > b/testcases/kernel/syscalls/mbind/mbind04.c > index 50028835f..db41f9e29 100644 > --- a/testcases/kernel/syscalls/mbind/mbind04.c > +++ b/testcases/kernel/syscalls/mbind/mbind04.c > @@ -20,6 +20,10 @@ > #include "tst_test.h" > #include "tst_numa.h" > > +#ifndef MPOL_WEIGHTED_INTERLEAVE > +#define MPOL_WEIGHTED_INTERLEAVE 6 > +#endif > ^ + > #ifdef HAVE_NUMA_V2 > > static size_t page_size; > @@ -114,6 +118,7 @@ static const int modes[] = { > MPOL_PREFERRED, > MPOL_BIND, > MPOL_INTERLEAVE, > + MPOL_WEIGHTED_INTERLEAVE, > }; > > static void verify_mbind(unsigned int n) > diff --git a/testcases/kernel/syscalls/set_mempolicy/.gitignore > b/testcases/kernel/syscalls/set_mempolicy/.gitignore > index 4c121d2e0..3af10e9f9 100644 > --- a/testcases/kernel/syscalls/set_mempolicy/.gitignore > +++ b/testcases/kernel/syscalls/set_mempolicy/.gitignore > @@ -3,3 +3,5 @@ > /set_mempolicy03 > /set_mempolicy04 > /set_mempolicy05 > > +/set_mempolicy06 > > +/set_mempolicy07 > First, we do not suggest adding any new tests by applying one "big" patch especially since this contains too many other irrelevant modifications. We'd better separate them in single to guarantee everything goes well for traceability of the commit. Second, I don't see any new code in set_mempolicy06/07, since you only copied them from set_mempolicy02/04, even without any change of the comments, this is bad for test maintenance work and involves redundant stuff. The recommended way is to add MPOL_WEIGHTED_INTERLEAVE in the original case if you just want to validate the behavior similarly with MPOL_INTERLEAVE. But if you want to test something special/new of MPOL_WEIGHTED_INTERLEAVE, I think that's necessary to create set_mempolicy06/07, and do something validate that interleaving behavior is executed based on weights set in ' /sys/kernel/mm/mempolicy/weighted_interleave/'. > diff --git a/testcases/kernel/syscalls/set_mempolicy/Makefile > b/testcases/kernel/syscalls/set_mempolicy/Makefile > index 100780dc3..04208837d 100644 > --- a/testcases/kernel/syscalls/set_mempolicy/Makefile > +++ b/testcases/kernel/syscalls/set_mempolicy/Makefile > @@ -5,7 +5,7 @@ LTPLIBS = ltpnuma > > include $(top_srcdir)/include/mk/testcases.mk > > -NEEDS_LIBS = set_mempolicy01 set_mempolicy02 set_mempolicy03 > set_mempolicy04 > +NEEDS_LIBS = set_mempolicy01 set_mempolicy02 set_mempolicy03 > set_mempolicy04 set_mempolicy05 set_mempolicy06 set_mempolicy07 > > $(NEEDS_LIBS): LDLIBS += $(NUMA_LIBS) > $(NEEDS_LIBS): LTPLDLIBS = -lltpnuma > diff --git a/testcases/kernel/syscalls/set_mempolicy/set_mempolicy06.c > b/testcases/kernel/syscalls/set_mempolicy/set_mempolicy06.c > new file mode 100644 > index 000000000..287fd316a > --- /dev/null > +++ b/testcases/kernel/syscalls/set_mempolicy/set_mempolicy06.c > @@ -0,0 +1,122 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Copyright (c) 2018 Cyril Hrubis <chrubis@suse.cz> > + */ > + > +/* > + * We are testing set_mempolicy() with MPOL_WEIGHTED_INTERLEAVE. > + * > + * The test tries different subsets of memory nodes, sets the mask with > + * memopolicy, and checks that the memory was interleaved between the > nodes > + * accordingly. > + */ > + > +#include <errno.h> > +#include "config.h" > +#ifdef HAVE_NUMA_V2 > +# include <numa.h> > +# include <numaif.h> > +#endif > +#include "tst_test.h" > +#include "tst_numa.h" > + > +#ifdef HAVE_NUMA_V2 > + > +#include "set_mempolicy.h" > + > +#ifndef MPOL_WEIGHTED_INTERLEAVE > +#define MPOL_WEIGHTED_INTERLEAVE 6 > +#endif > + > +#define ALLOC_ON_NODE 8 > + > +static size_t page_size; > +static struct tst_nodemap *nodes; > + > +static void setup(void) > +{ > + page_size = getpagesize(); > + > + nodes = tst_get_nodemap(TST_NUMA_MEM, 2 * ALLOC_ON_NODE * > page_size / 1024); > + if (nodes->cnt <= 1) > + tst_brk(TCONF, "Test requires at least two NUMA memory > nodes"); > +} > + > +static void cleanup(void) > +{ > + tst_nodemap_free(nodes); > +} > + > +static void alloc_and_check(size_t size, unsigned int *exp_alloc) > +{ > + unsigned int i; > + const char *prefix = "child: "; > + > + if (SAFE_FORK()) { > + prefix = "parent: "; > + tst_reap_children(); > + } > + > + tst_nodemap_reset_counters(nodes); > + alloc_fault_count(nodes, NULL, size * page_size); > + > + for (i = 0; i < nodes->cnt; i++) { > + if (nodes->counters[i] == exp_alloc[i]) { > + tst_res(TPASS, "%sNode %u allocated %u", > + prefix, nodes->map[i], > exp_alloc[i]); > + } else { > + tst_res(TFAIL, "%sNode %u allocated %u, expected > %u", > + prefix, nodes->map[i], > nodes->counters[i], > + exp_alloc[i]); > + } > + } > +} > + > +static void verify_set_mempolicy(unsigned int n) > +{ > + struct bitmask *bm = numa_allocate_nodemask(); > + unsigned int exp_alloc[nodes->cnt]; > + unsigned int alloc_per_node = n ? ALLOC_ON_NODE : 2; > + unsigned int alloc_on_nodes = n ? 2 : nodes->cnt; > + unsigned int alloc_total = alloc_per_node * alloc_on_nodes; > + unsigned int i; > + > + memset(exp_alloc, 0, sizeof(exp_alloc)); > + > + for (i = 0; i < alloc_on_nodes; i++) { > + exp_alloc[i] = alloc_per_node; > + numa_bitmask_setbit(bm, nodes->map[i]); > + } > + > + TEST(set_mempolicy(MPOL_WEIGHTED_INTERLEAVE, bm->maskp, > bm->size+1)); > + > + tst_res(TINFO, "Allocating on nodes 1-%u - %u pages", > + alloc_on_nodes, alloc_total); > + > + if (TST_RET) { > + tst_res(TFAIL | TTERRNO, > + "set_mempolicy(MPOL_WEIGHTED_INTERLEAVE)"); > + return; > + } > + > + tst_res(TPASS, "set_mempolicy(MPOL_WEIGHTED_INTERLEAVE)"); > + > + numa_free_nodemask(bm); > + > + alloc_and_check(alloc_total, exp_alloc); > +} > + > +static struct tst_test test = { > + .setup = setup, > + .cleanup = cleanup, > + .test = verify_set_mempolicy, > + .tcnt = 2, > + .forks_child = 1, > + .needs_checkpoints = 1, > +}; > + > +#else > + > +TST_TEST_TCONF(NUMA_ERROR_MSG); > + > +#endif /* HAVE_NUMA_V2 */ > diff --git a/testcases/kernel/syscalls/set_mempolicy/set_mempolicy07.c > b/testcases/kernel/syscalls/set_mempolicy/set_mempolicy07.c > new file mode 100644 > index 000000000..d88998ed5 > --- /dev/null > +++ b/testcases/kernel/syscalls/set_mempolicy/set_mempolicy07.c > @@ -0,0 +1,148 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Copyright (c) 2018 Cyril Hrubis <chrubis@suse.cz> > + */ > + > +/* > + * We are testing set_mempolicy() with MPOL_WEIGHTED_INTERLEAVE on mmaped > buffers > + * backed by files. > + * > + * Apparently it takes a larger sample for the allocations to be correctly > + * interleaved. The reason for this is that buffers for file metadata are > + * allocated in batches in order not to loose performance. Also the pages > + * cannot be interleaved completely evenly unless the number of pages is > + * divideable by the number of nodes, which will not happen even if we > tried > + * hard since we do not have control over metadata blocks for instance. > Hence > + * we cannot really expect to allocate a single file and have the memory > + * interleaved precisely but it works well if we allocate statistic for a > more > + * than a few files. > + */ > + > +#include <stdio.h> > +#include <errno.h> > +#include "config.h" > +#ifdef HAVE_NUMA_V2 > +# include <numa.h> > +# include <numaif.h> > +#endif > +#include "tst_test.h" > +#include "tst_numa.h" > + > +#define MNTPOINT "mntpoint" > +#define FILES 10 > + > +#ifdef HAVE_NUMA_V2 > + > +#ifndef MPOL_WEIGHTED_INTERLEAVE > +#define MPOL_WEIGHTED_INTERLEAVE 6 > +#endif > + > +#include "set_mempolicy.h" > + > +static size_t page_size; > +static struct tst_nodemap *nodes; > + > +static void setup(void) > +{ > + int node_min_pages = FILES * (FILES + 1) / 2 * 10 + FILES * 10; > + > + page_size = getpagesize(); > + > + nodes = tst_get_nodemap(TST_NUMA_MEM, node_min_pages * page_size / > 1024); > + if (nodes->cnt <= 1) > + tst_brk(TCONF, "Test requires at least two NUMA memory > nodes"); > +} > + > +static void cleanup(void) > +{ > + tst_nodemap_free(nodes); > +} > + > +static void alloc_and_check(void) > +{ > + unsigned int i, j; > + char path[1024]; > + unsigned int total_pages = 0; > + unsigned int sum_pages = 0; > + > + tst_nodemap_reset_counters(nodes); > + > + /* > + * The inner loop loops node->cnt times to ensure the sum could > + * be evenly distributed among the nodes. > + */ > + for (i = 1; i <= FILES; i++) { > + for (j = 1; j <= nodes->cnt; j++) { > + size_t size = 10 * i + j % 10; > + > + snprintf(path, sizeof(path), MNTPOINT > "/numa-test-file-%i-%i", i, j); > + alloc_fault_count(nodes, path, size * page_size); > + total_pages += size; > + } > + } > + > + for (i = 0; i < nodes->cnt; i++) { > + float threshold = 1.00 * total_pages / 60; /* five > percents */ > + float min_pages = 1.00 * total_pages / nodes->cnt - > threshold; > + float max_pages = 1.00 * total_pages / nodes->cnt + > threshold; > + > + if (nodes->counters[i] > min_pages && nodes->counters[i] < > max_pages) { > + tst_res(TPASS, "Node %u allocated %u <%.2f,%.2f>", > + nodes->map[i], nodes->counters[i], > min_pages, max_pages); > + } else { > + tst_res(TFAIL, "Node %u allocated %u, expected > <%.2f,%.2f>", > + nodes->map[i], nodes->counters[i], > min_pages, max_pages); > + } > + > + sum_pages += nodes->counters[i]; > + } > + > + if (sum_pages != total_pages) { > + tst_res(TFAIL, "Sum of nodes %u != allocated pages %u", > + sum_pages, total_pages); > + return; > + } > + > + tst_res(TPASS, "Sum of nodes equals to allocated pages (%u)", > total_pages); > +} > + > +static void verify_set_mempolicy(void) > +{ > + struct bitmask *bm = numa_allocate_nodemask(); > + unsigned int alloc_on_nodes = nodes->cnt; > + unsigned int i; > + > + for (i = 0; i < alloc_on_nodes; i++) > + numa_bitmask_setbit(bm, nodes->map[i]); > + > + TEST(set_mempolicy(MPOL_WEIGHTED_INTERLEAVE, bm->maskp, > bm->size+1)); > + > + if (TST_RET) { > + tst_res(TFAIL | TTERRNO, > + "set_mempolicy(MPOL_WEIGHTED_INTERLEAVE)"); > + return; > + } > + > + tst_res(TPASS, "set_mempolicy(MPOL_WEIGHTED_INTERLEAVE)"); > + > + alloc_and_check(); > + > + numa_free_nodemask(bm); > +} > + > +static struct tst_test test = { > + .setup = setup, > + .cleanup = cleanup, > + .test_all = verify_set_mempolicy, > + .forks_child = 1, > + .needs_root = 1, > + .all_filesystems = 1, > + .mntpoint = MNTPOINT, > + .needs_checkpoints = 1, > +}; > + > +#else > + > +TST_TEST_TCONF(NUMA_ERROR_MSG); > + > +#endif /* HAVE_NUMA_V2 */ > -- > 2.39.1 > > > -- > Mailing list info: https://lists.linux.it/listinfo/ltp > >
On Mon, Apr 08, 2024 at 03:40:38PM +0800, Li Wang wrote: > Hi Gregory, > > Thank you for starting this, comments are inline below. > > > +#define MPOL_WEIGHTED_INTERLEAVE 6 > > +#endif > > > > And can we move this common part into include/lapi/numaif.h, > to avoid defining this again and again in test cases? > I have a pending patch to do just that, but it is not upstream yet. This was a comment in the changelog: > > MPOL_WEIGHTED_INTERLEAVE is ifdef defined because it is not upstream > > in libnuma yet, so this ensures compilation. Thought it was useful to shoot out a version of this before it all lands for the sake of getting ahead of the curve a bit. > > First, we do not suggest adding any new tests by applying one "big" > patch especially since this contains too many other irrelevant > modifications. > We'd better separate them in single to guarantee everything goes > well for traceability of the commit. > Will do. > Second, I don't see any new code in set_mempolicy06/07, since you > only copied them from set_mempolicy02/04, even without any change of the > comments, this is bad for test maintenance work and involves redundant > stuff. > the only major differences between the tests, presently, are that the policy applied is weighted interleave TEST(set_mempolicy(MPOL_WEIGHTED_INTERLEAVE, bm->maskp, bm->size+1)); ^^^^^^^^^^^^^^^^^^^^^^^^ In truth, this test isn't really completely, as it should also: 1) Set the sysfs values located at /sys/kernel/mm/mempolicy/weighted_interleave/ 2) Validate allocations actually match those settings However, this test is quite complicated to write and make fully reliable, as you also need to know 1) The environment (available nodes, cpu nodes, memory-only nodes) 2) The node the test will be run on (which can be forced) 3) Where allocations will start from (node X or node Y) as this can ultimately affect the final distribution. In my tests separately, the test itself can also cause allocation (stack, other regions) which may result in an unexpected distribution of memory on the target region. This is because those allocations are credited as part of the interleaving, but the existing code of the test cannot adjust for that. This may cause failures for no obvious reason. This is ultimately why I left the tests mostly unchanged, because I found it only reasonable to test the default behavior. > The recommended way is to add MPOL_WEIGHTED_INTERLEAVE in > the original case if you just want to validate the behavior similarly with > MPOL_INTERLEAVE. > > But if you want to test something special/new of MPOL_WEIGHTED_INTERLEAVE, > I think that's necessary to create set_mempolicy06/07, and do something > validate that interleaving behavior is executed based on weights set in ' > /sys/kernel/mm/mempolicy/weighted_interleave/'. > Was hoping to get input on this. I think probably trying to write a test to track page-distribution of real weighted interleave will be difficult to make reliable, so maybe I should just fold these tests back into the original and note that this simply tests the default behavior (which is equivalent to MPOL_INTERLEAVE). That may require changing the original tests to ensure the sysfs files are set to 1 to explicitly avoid failures. But I wasn't sure if that was ok since it would be making silent system-wide changes, and would require root. Thank you for the feedback ~Gregory
Hi Gregory, On Wed, Apr 10, 2024 at 7:40 AM Gregory Price <gregory.price@memverge.com> wrote: > On Mon, Apr 08, 2024 at 03:40:38PM +0800, Li Wang wrote: > > Hi Gregory, > > > > Thank you for starting this, comments are inline below. > > > > > +#define MPOL_WEIGHTED_INTERLEAVE 6 > > > +#endif > > > > > > > And can we move this common part into include/lapi/numaif.h, > > to avoid defining this again and again in test cases? > > > > I have a pending patch to do just that, but it is not upstream yet. > > This was a comment in the changelog: > > > > MPOL_WEIGHTED_INTERLEAVE is ifdef defined because it is not upstream > > > in libnuma yet, so this ensures compilation. > > Thought it was useful to shoot out a version of this before it all lands > for the sake of getting ahead of the curve a bit. > > > > > First, we do not suggest adding any new tests by applying one "big" > > patch especially since this contains too many other irrelevant > > modifications. > > We'd better separate them in single to guarantee everything goes > > well for traceability of the commit. > > > > Will do. > > > Second, I don't see any new code in set_mempolicy06/07, since you > > only copied them from set_mempolicy02/04, even without any change of the > > comments, this is bad for test maintenance work and involves redundant > > stuff. > > > > the only major differences between the tests, presently, are that the > policy applied is weighted interleave > > TEST(set_mempolicy(MPOL_WEIGHTED_INTERLEAVE, bm->maskp, bm->size+1)); > ^^^^^^^^^^^^^^^^^^^^^^^^ > > In truth, this test isn't really completely, as it should also: > > 1) Set the sysfs values located at > /sys/kernel/mm/mempolicy/weighted_interleave/ > > 2) Validate allocations actually match those settings > > However, this test is quite complicated to write and make fully > reliable, as you also need to know > > 1) The environment (available nodes, cpu nodes, memory-only nodes) > 2) The node the test will be run on (which can be forced) > 3) Where allocations will start from (node X or node Y) as this can > ultimately affect the final distribution. > > In my tests separately, the test itself can also cause allocation > (stack, other regions) which may result in an unexpected distribution of > memory on the target region. This is because those allocations are > credited as part of the interleaving, but the existing code of the test > cannot adjust for that. This may cause failures for no obvious reason. > > This is ultimately why I left the tests mostly unchanged, because I > found it only reasonable to test the default behavior. > > > The recommended way is to add MPOL_WEIGHTED_INTERLEAVE in > > the original case if you just want to validate the behavior similarly > with > > MPOL_INTERLEAVE. > > > > But if you want to test something special/new of > MPOL_WEIGHTED_INTERLEAVE, > > I think that's necessary to create set_mempolicy06/07, and do something > > validate that interleaving behavior is executed based on weights set in ' > > /sys/kernel/mm/mempolicy/weighted_interleave/'. > > > > Was hoping to get input on this. I think probably trying to write a > test to track page-distribution of real weighted interleave will be > difficult to make reliable, so maybe I should just fold these tests back > into the original and note that this simply tests the default behavior > (which is equivalent to MPOL_INTERLEAVE). > Ok, sure. If so merge the MPOL_WEIGHTED_INTERLEAVE into MPOL_INTERLEAVE test should be enough. > > That may require changing the original tests to ensure the sysfs files > are set to 1 to explicitly avoid failures. But I wasn't sure if that > was ok since it would be making silent system-wide changes, and would > require root. > LTP lib provides a way to save/restore the '/proc | /sys' value before and after doing the test, so it would not be difficult to satisfy the requirement if it is just set 1 to a relative file. see: https://github.com/linux-test-project/ltp/blob/master/doc/old/C-Test-API.asciidoc#127-saving--restoring-procsys-values Or, use tst_sys_conf_save() separately to traverse and save the value of "mempolicy/weighted_interleave/node*" file if we are unsure how many nodes are on the system, then set the value via: SAFE_FILE_PRINTF("../weighted_interleave/node*", "%d", 1); Ultimately, LTP lib will restore all of the values to the original. And set ".needs_root = 1," in the struct tst_test will be the requested root permission.
diff --git a/testcases/kernel/syscalls/get_mempolicy/get_mempolicy01.c b/testcases/kernel/syscalls/get_mempolicy/get_mempolicy01.c index 23f62091a..8b0aa60bf 100644 --- a/testcases/kernel/syscalls/get_mempolicy/get_mempolicy01.c +++ b/testcases/kernel/syscalls/get_mempolicy/get_mempolicy01.c @@ -26,6 +26,10 @@ #include <errno.h> #include "tst_numa.h" +#ifndef +#define MPOL_WEIGHTED_INTERLEAVE 6 +#endif + #define MEM_LENGTH (4 * 1024 * 1024) #define PAGES_ALLOCATED 16u @@ -68,6 +72,11 @@ static struct test_case tcase[] = { .alloc = test_set_mempolicy_default, .exp_nodemask = &nodemask, }, + { + POLICY_DESC(MPOL_WEIGHTED_INTERLEAVE), + .alloc = test_set_mempolicy_default, + .exp_nodemask = &nodemask, + }, { POLICY_DESC_NO_TARGET(MPOL_PREFERRED), .alloc = test_set_mempolicy_none, @@ -95,6 +104,12 @@ static struct test_case tcase[] = { .alloc = test_set_mempolicy_default, .exp_nodemask = &nodemask, }, + { + POLICY_DESC_FLAGS(MPOL_WEIGHTED_INTERLEAVE, MPOL_F_ADDR), + .pre_test = test_mbind_default, + .alloc = test_set_mempolicy_default, + .exp_nodemask = &nodemask, + }, { POLICY_DESC_FLAGS_NO_TARGET(MPOL_PREFERRED, MPOL_F_ADDR), .pre_test = test_mbind_none, diff --git a/testcases/kernel/syscalls/mbind/mbind01.c b/testcases/kernel/syscalls/mbind/mbind01.c index 4b8d168cd..d3d3b0049 100644 --- a/testcases/kernel/syscalls/mbind/mbind01.c +++ b/testcases/kernel/syscalls/mbind/mbind01.c @@ -20,6 +20,10 @@ #include "tst_numa.h" #include "lapi/numaif.h" +#ifndef +#define MPOL_WEIGHTED_INTERLEAVE 6 +#endif + #ifdef HAVE_NUMA_V2 #define MEM_LENGTH (4 * 1024 * 1024) @@ -80,6 +84,12 @@ static struct test_case tcase[] = { .err = EINVAL, .test = test_none, }, + { + POLICY_DESC_TEXT(MPOL_WEIGHTED_INTERLEAVE, "no target"), + .ret = -1, + .err = EINVAL, + .test = test_none, + }, { POLICY_DESC(MPOL_INTERLEAVE), .ret = 0, @@ -87,6 +97,13 @@ static struct test_case tcase[] = { .test = test_default, .exp_nodemask = &nodemask, }, + { + POLICY_DESC(MPOL_WEIGHTED_INTERLEAVE), + .ret = 0, + .err = 0, + .test = test_default, + .exp_nodemask = &nodemask, + }, { POLICY_DESC_TEXT(MPOL_PREFERRED, "no target"), .ret = 0, diff --git a/testcases/kernel/syscalls/mbind/mbind02.c b/testcases/kernel/syscalls/mbind/mbind02.c index cd6a03226..66d9cd826 100644 --- a/testcases/kernel/syscalls/mbind/mbind02.c +++ b/testcases/kernel/syscalls/mbind/mbind02.c @@ -23,6 +23,10 @@ #include "tst_test.h" #include "tst_numa.h" +#ifndef MPOL_WEIGHTED_INTERLEAVE +#define MPOL_WEIGHTED_INTERLEAVE 6 +#endif + #ifdef HAVE_NUMA_V2 static size_t page_size; @@ -92,6 +96,7 @@ static const int modes[] = { MPOL_PREFERRED, MPOL_BIND, MPOL_INTERLEAVE, + MPOL_WEIGHTED_INTERLEAVE, }; static void verify_mbind(unsigned int n) diff --git a/testcases/kernel/syscalls/mbind/mbind03.c b/testcases/kernel/syscalls/mbind/mbind03.c index 3d477c9cb..7da3c217d 100644 --- a/testcases/kernel/syscalls/mbind/mbind03.c +++ b/testcases/kernel/syscalls/mbind/mbind03.c @@ -20,6 +20,10 @@ #include "tst_test.h" #include "tst_numa.h" +#ifndef MPOL_WEIGHTED_INTERLEAVE +#define MPOL_WEIGHTED_INTERLEAVE 6 +#endif + #ifdef HAVE_NUMA_V2 static size_t page_size; @@ -103,6 +107,7 @@ static const int modes[] = { MPOL_PREFERRED, MPOL_BIND, MPOL_INTERLEAVE, + MPOL_WEIGHTED_INTERLEAVE, }; static void verify_mbind(unsigned int n) diff --git a/testcases/kernel/syscalls/mbind/mbind04.c b/testcases/kernel/syscalls/mbind/mbind04.c index 50028835f..db41f9e29 100644 --- a/testcases/kernel/syscalls/mbind/mbind04.c +++ b/testcases/kernel/syscalls/mbind/mbind04.c @@ -20,6 +20,10 @@ #include "tst_test.h" #include "tst_numa.h" +#ifndef MPOL_WEIGHTED_INTERLEAVE +#define MPOL_WEIGHTED_INTERLEAVE 6 +#endif + #ifdef HAVE_NUMA_V2 static size_t page_size; @@ -114,6 +118,7 @@ static const int modes[] = { MPOL_PREFERRED, MPOL_BIND, MPOL_INTERLEAVE, + MPOL_WEIGHTED_INTERLEAVE, }; static void verify_mbind(unsigned int n) diff --git a/testcases/kernel/syscalls/set_mempolicy/.gitignore b/testcases/kernel/syscalls/set_mempolicy/.gitignore index 4c121d2e0..3af10e9f9 100644 --- a/testcases/kernel/syscalls/set_mempolicy/.gitignore +++ b/testcases/kernel/syscalls/set_mempolicy/.gitignore @@ -3,3 +3,5 @@ /set_mempolicy03 /set_mempolicy04 /set_mempolicy05 +/set_mempolicy06 +/set_mempolicy07 diff --git a/testcases/kernel/syscalls/set_mempolicy/Makefile b/testcases/kernel/syscalls/set_mempolicy/Makefile index 100780dc3..04208837d 100644 --- a/testcases/kernel/syscalls/set_mempolicy/Makefile +++ b/testcases/kernel/syscalls/set_mempolicy/Makefile @@ -5,7 +5,7 @@ LTPLIBS = ltpnuma include $(top_srcdir)/include/mk/testcases.mk -NEEDS_LIBS = set_mempolicy01 set_mempolicy02 set_mempolicy03 set_mempolicy04 +NEEDS_LIBS = set_mempolicy01 set_mempolicy02 set_mempolicy03 set_mempolicy04 set_mempolicy05 set_mempolicy06 set_mempolicy07 $(NEEDS_LIBS): LDLIBS += $(NUMA_LIBS) $(NEEDS_LIBS): LTPLDLIBS = -lltpnuma diff --git a/testcases/kernel/syscalls/set_mempolicy/set_mempolicy06.c b/testcases/kernel/syscalls/set_mempolicy/set_mempolicy06.c new file mode 100644 index 000000000..287fd316a --- /dev/null +++ b/testcases/kernel/syscalls/set_mempolicy/set_mempolicy06.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018 Cyril Hrubis <chrubis@suse.cz> + */ + +/* + * We are testing set_mempolicy() with MPOL_WEIGHTED_INTERLEAVE. + * + * The test tries different subsets of memory nodes, sets the mask with + * memopolicy, and checks that the memory was interleaved between the nodes + * accordingly. + */ + +#include <errno.h> +#include "config.h" +#ifdef HAVE_NUMA_V2 +# include <numa.h> +# include <numaif.h> +#endif +#include "tst_test.h" +#include "tst_numa.h" + +#ifdef HAVE_NUMA_V2 + +#include "set_mempolicy.h" + +#ifndef MPOL_WEIGHTED_INTERLEAVE +#define MPOL_WEIGHTED_INTERLEAVE 6 +#endif + +#define ALLOC_ON_NODE 8 + +static size_t page_size; +static struct tst_nodemap *nodes; + +static void setup(void) +{ + page_size = getpagesize(); + + nodes = tst_get_nodemap(TST_NUMA_MEM, 2 * ALLOC_ON_NODE * page_size / 1024); + if (nodes->cnt <= 1) + tst_brk(TCONF, "Test requires at least two NUMA memory nodes"); +} + +static void cleanup(void) +{ + tst_nodemap_free(nodes); +} + +static void alloc_and_check(size_t size, unsigned int *exp_alloc) +{ + unsigned int i; + const char *prefix = "child: "; + + if (SAFE_FORK()) { + prefix = "parent: "; + tst_reap_children(); + } + + tst_nodemap_reset_counters(nodes); + alloc_fault_count(nodes, NULL, size * page_size); + + for (i = 0; i < nodes->cnt; i++) { + if (nodes->counters[i] == exp_alloc[i]) { + tst_res(TPASS, "%sNode %u allocated %u", + prefix, nodes->map[i], exp_alloc[i]); + } else { + tst_res(TFAIL, "%sNode %u allocated %u, expected %u", + prefix, nodes->map[i], nodes->counters[i], + exp_alloc[i]); + } + } +} + +static void verify_set_mempolicy(unsigned int n) +{ + struct bitmask *bm = numa_allocate_nodemask(); + unsigned int exp_alloc[nodes->cnt]; + unsigned int alloc_per_node = n ? ALLOC_ON_NODE : 2; + unsigned int alloc_on_nodes = n ? 2 : nodes->cnt; + unsigned int alloc_total = alloc_per_node * alloc_on_nodes; + unsigned int i; + + memset(exp_alloc, 0, sizeof(exp_alloc)); + + for (i = 0; i < alloc_on_nodes; i++) { + exp_alloc[i] = alloc_per_node; + numa_bitmask_setbit(bm, nodes->map[i]); + } + + TEST(set_mempolicy(MPOL_WEIGHTED_INTERLEAVE, bm->maskp, bm->size+1)); + + tst_res(TINFO, "Allocating on nodes 1-%u - %u pages", + alloc_on_nodes, alloc_total); + + if (TST_RET) { + tst_res(TFAIL | TTERRNO, + "set_mempolicy(MPOL_WEIGHTED_INTERLEAVE)"); + return; + } + + tst_res(TPASS, "set_mempolicy(MPOL_WEIGHTED_INTERLEAVE)"); + + numa_free_nodemask(bm); + + alloc_and_check(alloc_total, exp_alloc); +} + +static struct tst_test test = { + .setup = setup, + .cleanup = cleanup, + .test = verify_set_mempolicy, + .tcnt = 2, + .forks_child = 1, + .needs_checkpoints = 1, +}; + +#else + +TST_TEST_TCONF(NUMA_ERROR_MSG); + +#endif /* HAVE_NUMA_V2 */ diff --git a/testcases/kernel/syscalls/set_mempolicy/set_mempolicy07.c b/testcases/kernel/syscalls/set_mempolicy/set_mempolicy07.c new file mode 100644 index 000000000..d88998ed5 --- /dev/null +++ b/testcases/kernel/syscalls/set_mempolicy/set_mempolicy07.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018 Cyril Hrubis <chrubis@suse.cz> + */ + +/* + * We are testing set_mempolicy() with MPOL_WEIGHTED_INTERLEAVE on mmaped buffers + * backed by files. + * + * Apparently it takes a larger sample for the allocations to be correctly + * interleaved. The reason for this is that buffers for file metadata are + * allocated in batches in order not to loose performance. Also the pages + * cannot be interleaved completely evenly unless the number of pages is + * divideable by the number of nodes, which will not happen even if we tried + * hard since we do not have control over metadata blocks for instance. Hence + * we cannot really expect to allocate a single file and have the memory + * interleaved precisely but it works well if we allocate statistic for a more + * than a few files. + */ + +#include <stdio.h> +#include <errno.h> +#include "config.h" +#ifdef HAVE_NUMA_V2 +# include <numa.h> +# include <numaif.h> +#endif +#include "tst_test.h" +#include "tst_numa.h" + +#define MNTPOINT "mntpoint" +#define FILES 10 + +#ifdef HAVE_NUMA_V2 + +#ifndef MPOL_WEIGHTED_INTERLEAVE +#define MPOL_WEIGHTED_INTERLEAVE 6 +#endif + +#include "set_mempolicy.h" + +static size_t page_size; +static struct tst_nodemap *nodes; + +static void setup(void) +{ + int node_min_pages = FILES * (FILES + 1) / 2 * 10 + FILES * 10; + + page_size = getpagesize(); + + nodes = tst_get_nodemap(TST_NUMA_MEM, node_min_pages * page_size / 1024); + if (nodes->cnt <= 1) + tst_brk(TCONF, "Test requires at least two NUMA memory nodes"); +} + +static void cleanup(void) +{ + tst_nodemap_free(nodes); +} + +static void alloc_and_check(void) +{ + unsigned int i, j; + char path[1024]; + unsigned int total_pages = 0; + unsigned int sum_pages = 0; + + tst_nodemap_reset_counters(nodes); + + /* + * The inner loop loops node->cnt times to ensure the sum could + * be evenly distributed among the nodes. + */ + for (i = 1; i <= FILES; i++) { + for (j = 1; j <= nodes->cnt; j++) { + size_t size = 10 * i + j % 10; + + snprintf(path, sizeof(path), MNTPOINT "/numa-test-file-%i-%i", i, j); + alloc_fault_count(nodes, path, size * page_size); + total_pages += size; + } + } + + for (i = 0; i < nodes->cnt; i++) { + float threshold = 1.00 * total_pages / 60; /* five percents */ + float min_pages = 1.00 * total_pages / nodes->cnt - threshold; + float max_pages = 1.00 * total_pages / nodes->cnt + threshold; + + if (nodes->counters[i] > min_pages && nodes->counters[i] < max_pages) { + tst_res(TPASS, "Node %u allocated %u <%.2f,%.2f>", + nodes->map[i], nodes->counters[i], min_pages, max_pages); + } else { + tst_res(TFAIL, "Node %u allocated %u, expected <%.2f,%.2f>", + nodes->map[i], nodes->counters[i], min_pages, max_pages); + } + + sum_pages += nodes->counters[i]; + } + + if (sum_pages != total_pages) { + tst_res(TFAIL, "Sum of nodes %u != allocated pages %u", + sum_pages, total_pages); + return; + } + + tst_res(TPASS, "Sum of nodes equals to allocated pages (%u)", total_pages); +} + +static void verify_set_mempolicy(void) +{ + struct bitmask *bm = numa_allocate_nodemask(); + unsigned int alloc_on_nodes = nodes->cnt; + unsigned int i; + + for (i = 0; i < alloc_on_nodes; i++) + numa_bitmask_setbit(bm, nodes->map[i]); + + TEST(set_mempolicy(MPOL_WEIGHTED_INTERLEAVE, bm->maskp, bm->size+1)); + + if (TST_RET) { + tst_res(TFAIL | TTERRNO, + "set_mempolicy(MPOL_WEIGHTED_INTERLEAVE)"); + return; + } + + tst_res(TPASS, "set_mempolicy(MPOL_WEIGHTED_INTERLEAVE)"); + + alloc_and_check(); + + numa_free_nodemask(bm); +} + +static struct tst_test test = { + .setup = setup, + .cleanup = cleanup, + .test_all = verify_set_mempolicy, + .forks_child = 1, + .needs_root = 1, + .all_filesystems = 1, + .mntpoint = MNTPOINT, + .needs_checkpoints = 1, +}; + +#else + +TST_TEST_TCONF(NUMA_ERROR_MSG); + +#endif /* HAVE_NUMA_V2 */
Add the MPOL_WEIGHTED_INTERLEAVE policy to the syscall test suite for get_mempolicy, set_mempolicy, and mbind. The functional memory distribution test only tests that the default behavior of WEIGHTED_INTERLEAVE is the same as INTERLEAVE, since the values for each node default to '1', and because behavior actually depends on what node the test runs on. This test at least demonstrates the interleave mechanism is in use and works by default as intended. MPOL_WEIGHTED_INTERLEAVE is ifdef defined because it is not upstream in libnuma yet, so this ensures compilation. Signed-off-by: Gregory Price <gregory.price@memverge.com> --- .../syscalls/get_mempolicy/get_mempolicy01.c | 15 ++ testcases/kernel/syscalls/mbind/mbind01.c | 17 ++ testcases/kernel/syscalls/mbind/mbind02.c | 5 + testcases/kernel/syscalls/mbind/mbind03.c | 5 + testcases/kernel/syscalls/mbind/mbind04.c | 5 + .../kernel/syscalls/set_mempolicy/.gitignore | 2 + .../kernel/syscalls/set_mempolicy/Makefile | 2 +- .../syscalls/set_mempolicy/set_mempolicy06.c | 122 +++++++++++++++ .../syscalls/set_mempolicy/set_mempolicy07.c | 148 ++++++++++++++++++ 9 files changed, 320 insertions(+), 1 deletion(-) create mode 100644 testcases/kernel/syscalls/set_mempolicy/set_mempolicy06.c create mode 100644 testcases/kernel/syscalls/set_mempolicy/set_mempolicy07.c