Message ID | 20211106183322.3129442-2-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v4,1/5] string: Make tests birdirectional test-memcpy.c | expand |
On Sat, Nov 6, 2021 at 11:33 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > This commit adds more benchmarks for the common memcpy/memmove > benchmarks. The most signifcant cases are the half page offsets. The > current versions leaves dst and src near page aligned which leads to > false 4k aliasing on x86_64. This can add noise due to false > dependencies from one run to the next. As well, this seems like more > of an edge case that common case so it shouldn't be the only thing > Reviewed-by: H.J. Lu <hjl.tools@gmail.com> > --- > benchtests/bench-memcpy.c | 49 +++++++++++++++++++++++++++++++++----- > benchtests/bench-memmove.c | 26 +++++++++++++++++--- > 2 files changed, 66 insertions(+), 9 deletions(-) > > diff --git a/benchtests/bench-memcpy.c b/benchtests/bench-memcpy.c > index d9236a2282..744bea26d3 100644 > --- a/benchtests/bench-memcpy.c > +++ b/benchtests/bench-memcpy.c > @@ -40,7 +40,10 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src, > { > size_t i, iters = INNER_LOOP_ITERS; > timing_t start, stop, cur; > - > + for (i = 0; i < iters / 64; ++i) > + { > + CALL (impl, dst, src, len); > + } > TIMING_NOW (start); > for (i = 0; i < iters; ++i) > { > @@ -60,11 +63,11 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len, > size_t i, j; > char *s1, *s2; > size_t repeats; > - align1 &= 63; > + align1 &= (getpagesize () - 1); > if (align1 + len >= page_size) > return; > > - align2 &= 63; > + align2 &= (getpagesize () - 1); > if (align2 + len >= page_size) > return; > > @@ -99,7 +102,7 @@ test_main (void) > { > json_ctx_t json_ctx; > size_t i; > - > + size_t half_page = getpagesize () / 2; > test_init (); > > json_init (&json_ctx, 0, stdout); > @@ -121,8 +124,15 @@ test_main (void) > { > do_test (&json_ctx, 0, 0, 1 << i, 1); > do_test (&json_ctx, i, 0, 1 << i, 1); > + do_test (&json_ctx, i + 32, 0, 1 << i, 1); > do_test (&json_ctx, 0, i, 1 << i, 1); > + do_test (&json_ctx, 0, i + 32, 1 << i, 1); > do_test (&json_ctx, i, i, 1 << i, 1); > + do_test (&json_ctx, i + 32, i + 32, 1 << i, 1); > + do_test (&json_ctx, half_page, 0, 1 << i, 1); > + do_test (&json_ctx, half_page + i, 0, 1 << i, 1); > + do_test (&json_ctx, half_page, i, 1 << i, 1); > + do_test (&json_ctx, half_page + i, i, 1 << i, 1); > } > > for (i = 0; i < 32; ++i) > @@ -131,16 +141,26 @@ test_main (void) > do_test (&json_ctx, i, 0, i, 0); > do_test (&json_ctx, 0, i, i, 0); > do_test (&json_ctx, i, i, i, 0); > + do_test (&json_ctx, half_page, 0, i, 0); > + do_test (&json_ctx, half_page + i, 0, i, 0); > + do_test (&json_ctx, half_page, i, i, 0); > + do_test (&json_ctx, half_page + i, i, i, 0); > + do_test (&json_ctx, getpagesize () - 1, 0, i, 0); > + do_test (&json_ctx, 0, getpagesize () - 1, i, 0); > } > > for (i = 3; i < 32; ++i) > { > if ((i & (i - 1)) == 0) > - continue; > + continue; > do_test (&json_ctx, 0, 0, 16 * i, 1); > do_test (&json_ctx, i, 0, 16 * i, 1); > do_test (&json_ctx, 0, i, 16 * i, 1); > do_test (&json_ctx, i, i, 16 * i, 1); > + do_test (&json_ctx, half_page, 0, 16 * i, 1); > + do_test (&json_ctx, half_page + i, 0, 16 * i, 1); > + do_test (&json_ctx, half_page, i, 16 * i, 1); > + do_test (&json_ctx, half_page + i, i, 16 * i, 1); > } > > for (i = 32; i < 64; ++i) > @@ -149,16 +169,33 @@ test_main (void) > do_test (&json_ctx, i, 0, 32 * i, 1); > do_test (&json_ctx, 0, i, 32 * i, 1); > do_test (&json_ctx, i, i, 32 * i, 1); > + do_test (&json_ctx, half_page, 0, 32 * i, 1); > + do_test (&json_ctx, half_page + i, 0, 32 * i, 1); > + do_test (&json_ctx, half_page, i, 32 * i, 1); > + do_test (&json_ctx, half_page + i, i, 32 * i, 1); > } > > do_test (&json_ctx, 0, 0, getpagesize (), 1); > > - for (i = 0; i <= 32; ++i) > + for (i = 0; i <= 48; ++i) > { > do_test (&json_ctx, 0, 0, 2048 + 64 * i, 1); > do_test (&json_ctx, i, 0, 2048 + 64 * i, 1); > + do_test (&json_ctx, i + 32, 0, 2048 + 64 * i, 1); > do_test (&json_ctx, 0, i, 2048 + 64 * i, 1); > + do_test (&json_ctx, 0, i + 32, 2048 + 64 * i, 1); > do_test (&json_ctx, i, i, 2048 + 64 * i, 1); > + do_test (&json_ctx, i + 32, i + 32, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page, 0, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page + i, 0, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page, i, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page + i, i, 2048 + 64 * i, 1); > + do_test (&json_ctx, i, 1, 2048 + 64 * i, 1); > + do_test (&json_ctx, 1, i, 2048 + 64 * i, 1); > + do_test (&json_ctx, i + 32, 1, 2048 + 64 * i, 1); > + do_test (&json_ctx, 1, i + 32, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page + i, 1, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page + 1, i, 2048 + 64 * i, 1); > } > > json_array_end (&json_ctx); > diff --git a/benchtests/bench-memmove.c b/benchtests/bench-memmove.c > index 6becbf4782..855f4d0649 100644 > --- a/benchtests/bench-memmove.c > +++ b/benchtests/bench-memmove.c > @@ -34,7 +34,10 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, > { > size_t i, iters = INNER_LOOP_ITERS; > timing_t start, stop, cur; > - > + for (i = 0; i < iters / 64; ++i) > + { > + CALL (impl, dst, src, len); > + } > TIMING_NOW (start); > for (i = 0; i < iters; ++i) > { > @@ -53,11 +56,11 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len) > size_t i, j; > char *s1, *s2; > > - align1 &= 63; > + align1 &= (getpagesize () - 1); > if (align1 + len >= page_size) > return; > > - align2 &= 63; > + align2 &= (getpagesize () - 1); > if (align2 + len >= page_size) > return; > > @@ -85,6 +88,7 @@ test_main (void) > { > json_ctx_t json_ctx; > size_t i; > + size_t half_page = getpagesize () / 2; > > test_init (); > > @@ -138,6 +142,22 @@ test_main (void) > do_test (&json_ctx, i, i, 32 * i); > } > > + for (i = 0; i <= 48; ++i) > + { > + do_test (&json_ctx, 0, 0, 2048 + 64 * i); > + do_test (&json_ctx, i, 0, 2048 + 64 * i); > + do_test (&json_ctx, 0, i, 2048 + 64 * i); > + do_test (&json_ctx, i, i, 2048 + 64 * i); > + do_test (&json_ctx, half_page, 0, 2048 + 64 * i); > + do_test (&json_ctx, 0, half_page, 2048 + 64 * i); > + do_test (&json_ctx, half_page + i, 0, 2048 + 64 * i); > + do_test (&json_ctx, i, half_page, 2048 + 64 * i); > + do_test (&json_ctx, half_page, i, 2048 + 64 * i); > + do_test (&json_ctx, 0, half_page + i, 2048 + 64 * i); > + do_test (&json_ctx, half_page + i, i, 2048 + 64 * i); > + do_test (&json_ctx, i, half_page + i, 2048 + 64 * i); > + } > + > json_array_end (&json_ctx); > json_attr_object_end (&json_ctx); > json_attr_object_end (&json_ctx); > -- > 2.25.1 > LGTM. Reviewed-by: H.J. Lu <hjl.tools@gmail.com> Thanks.
diff --git a/benchtests/bench-memcpy.c b/benchtests/bench-memcpy.c index d9236a2282..744bea26d3 100644 --- a/benchtests/bench-memcpy.c +++ b/benchtests/bench-memcpy.c @@ -40,7 +40,10 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src, { size_t i, iters = INNER_LOOP_ITERS; timing_t start, stop, cur; - + for (i = 0; i < iters / 64; ++i) + { + CALL (impl, dst, src, len); + } TIMING_NOW (start); for (i = 0; i < iters; ++i) { @@ -60,11 +63,11 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len, size_t i, j; char *s1, *s2; size_t repeats; - align1 &= 63; + align1 &= (getpagesize () - 1); if (align1 + len >= page_size) return; - align2 &= 63; + align2 &= (getpagesize () - 1); if (align2 + len >= page_size) return; @@ -99,7 +102,7 @@ test_main (void) { json_ctx_t json_ctx; size_t i; - + size_t half_page = getpagesize () / 2; test_init (); json_init (&json_ctx, 0, stdout); @@ -121,8 +124,15 @@ test_main (void) { do_test (&json_ctx, 0, 0, 1 << i, 1); do_test (&json_ctx, i, 0, 1 << i, 1); + do_test (&json_ctx, i + 32, 0, 1 << i, 1); do_test (&json_ctx, 0, i, 1 << i, 1); + do_test (&json_ctx, 0, i + 32, 1 << i, 1); do_test (&json_ctx, i, i, 1 << i, 1); + do_test (&json_ctx, i + 32, i + 32, 1 << i, 1); + do_test (&json_ctx, half_page, 0, 1 << i, 1); + do_test (&json_ctx, half_page + i, 0, 1 << i, 1); + do_test (&json_ctx, half_page, i, 1 << i, 1); + do_test (&json_ctx, half_page + i, i, 1 << i, 1); } for (i = 0; i < 32; ++i) @@ -131,16 +141,26 @@ test_main (void) do_test (&json_ctx, i, 0, i, 0); do_test (&json_ctx, 0, i, i, 0); do_test (&json_ctx, i, i, i, 0); + do_test (&json_ctx, half_page, 0, i, 0); + do_test (&json_ctx, half_page + i, 0, i, 0); + do_test (&json_ctx, half_page, i, i, 0); + do_test (&json_ctx, half_page + i, i, i, 0); + do_test (&json_ctx, getpagesize () - 1, 0, i, 0); + do_test (&json_ctx, 0, getpagesize () - 1, i, 0); } for (i = 3; i < 32; ++i) { if ((i & (i - 1)) == 0) - continue; + continue; do_test (&json_ctx, 0, 0, 16 * i, 1); do_test (&json_ctx, i, 0, 16 * i, 1); do_test (&json_ctx, 0, i, 16 * i, 1); do_test (&json_ctx, i, i, 16 * i, 1); + do_test (&json_ctx, half_page, 0, 16 * i, 1); + do_test (&json_ctx, half_page + i, 0, 16 * i, 1); + do_test (&json_ctx, half_page, i, 16 * i, 1); + do_test (&json_ctx, half_page + i, i, 16 * i, 1); } for (i = 32; i < 64; ++i) @@ -149,16 +169,33 @@ test_main (void) do_test (&json_ctx, i, 0, 32 * i, 1); do_test (&json_ctx, 0, i, 32 * i, 1); do_test (&json_ctx, i, i, 32 * i, 1); + do_test (&json_ctx, half_page, 0, 32 * i, 1); + do_test (&json_ctx, half_page + i, 0, 32 * i, 1); + do_test (&json_ctx, half_page, i, 32 * i, 1); + do_test (&json_ctx, half_page + i, i, 32 * i, 1); } do_test (&json_ctx, 0, 0, getpagesize (), 1); - for (i = 0; i <= 32; ++i) + for (i = 0; i <= 48; ++i) { do_test (&json_ctx, 0, 0, 2048 + 64 * i, 1); do_test (&json_ctx, i, 0, 2048 + 64 * i, 1); + do_test (&json_ctx, i + 32, 0, 2048 + 64 * i, 1); do_test (&json_ctx, 0, i, 2048 + 64 * i, 1); + do_test (&json_ctx, 0, i + 32, 2048 + 64 * i, 1); do_test (&json_ctx, i, i, 2048 + 64 * i, 1); + do_test (&json_ctx, i + 32, i + 32, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page, 0, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page + i, 0, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page, i, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page + i, i, 2048 + 64 * i, 1); + do_test (&json_ctx, i, 1, 2048 + 64 * i, 1); + do_test (&json_ctx, 1, i, 2048 + 64 * i, 1); + do_test (&json_ctx, i + 32, 1, 2048 + 64 * i, 1); + do_test (&json_ctx, 1, i + 32, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page + i, 1, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page + 1, i, 2048 + 64 * i, 1); } json_array_end (&json_ctx); diff --git a/benchtests/bench-memmove.c b/benchtests/bench-memmove.c index 6becbf4782..855f4d0649 100644 --- a/benchtests/bench-memmove.c +++ b/benchtests/bench-memmove.c @@ -34,7 +34,10 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, { size_t i, iters = INNER_LOOP_ITERS; timing_t start, stop, cur; - + for (i = 0; i < iters / 64; ++i) + { + CALL (impl, dst, src, len); + } TIMING_NOW (start); for (i = 0; i < iters; ++i) { @@ -53,11 +56,11 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len) size_t i, j; char *s1, *s2; - align1 &= 63; + align1 &= (getpagesize () - 1); if (align1 + len >= page_size) return; - align2 &= 63; + align2 &= (getpagesize () - 1); if (align2 + len >= page_size) return; @@ -85,6 +88,7 @@ test_main (void) { json_ctx_t json_ctx; size_t i; + size_t half_page = getpagesize () / 2; test_init (); @@ -138,6 +142,22 @@ test_main (void) do_test (&json_ctx, i, i, 32 * i); } + for (i = 0; i <= 48; ++i) + { + do_test (&json_ctx, 0, 0, 2048 + 64 * i); + do_test (&json_ctx, i, 0, 2048 + 64 * i); + do_test (&json_ctx, 0, i, 2048 + 64 * i); + do_test (&json_ctx, i, i, 2048 + 64 * i); + do_test (&json_ctx, half_page, 0, 2048 + 64 * i); + do_test (&json_ctx, 0, half_page, 2048 + 64 * i); + do_test (&json_ctx, half_page + i, 0, 2048 + 64 * i); + do_test (&json_ctx, i, half_page, 2048 + 64 * i); + do_test (&json_ctx, half_page, i, 2048 + 64 * i); + do_test (&json_ctx, 0, half_page + i, 2048 + 64 * i); + do_test (&json_ctx, half_page + i, i, 2048 + 64 * i); + do_test (&json_ctx, i, half_page + i, 2048 + 64 * i); + } + json_array_end (&json_ctx); json_attr_object_end (&json_ctx); json_attr_object_end (&json_ctx);
This commit adds more benchmarks for the common memcpy/memmove benchmarks. The most signifcant cases are the half page offsets. The current versions leaves dst and src near page aligned which leads to false 4k aliasing on x86_64. This can add noise due to false dependencies from one run to the next. As well, this seems like more of an edge case that common case so it shouldn't be the only thing Reviewed-by: H.J. Lu <hjl.tools@gmail.com> --- benchtests/bench-memcpy.c | 49 +++++++++++++++++++++++++++++++++----- benchtests/bench-memmove.c | 26 +++++++++++++++++--- 2 files changed, 66 insertions(+), 9 deletions(-)