Message ID | 20211101054952.2349590-2-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1,1/5] string: Make tests birdirectional test-memcpy.c | expand |
On Mon, Nov 01, 2021 at 12:49:49AM -0500, Noah Goldstein wrote: > This commit adds more benchmarks for the common memcpy/memmove > benchmarks. The most signifcant cases are the half page offsets. The > current versions leaves dst and src near page aligned which leads to > false 4k aliasing on x86_64. This can add noise due to false > dependencies from one run to the next. As well, this seems like more > of an edge case that common case so it shouldn't be the only thing > --- > benchtests/bench-memcpy.c | 49 +++++++++++++++++++++++++++++++++----- > benchtests/bench-memmove.c | 26 +++++++++++++++++--- > 2 files changed, 66 insertions(+), 9 deletions(-) > > diff --git a/benchtests/bench-memcpy.c b/benchtests/bench-memcpy.c > index d9236a2282..744bea26d3 100644 > --- a/benchtests/bench-memcpy.c > +++ b/benchtests/bench-memcpy.c > @@ -40,7 +40,10 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src, > { > size_t i, iters = INNER_LOOP_ITERS; > timing_t start, stop, cur; > - > + for (i = 0; i < iters / 64; ++i) > + { > + CALL (impl, dst, src, len); > + } > TIMING_NOW (start); > for (i = 0; i < iters; ++i) > { > @@ -60,11 +63,11 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len, > size_t i, j; > char *s1, *s2; > size_t repeats; > - align1 &= 63; > + align1 &= (getpagesize () - 1); > if (align1 + len >= page_size) > return; > > - align2 &= 63; > + align2 &= (getpagesize () - 1); > if (align2 + len >= page_size) > return; > > @@ -99,7 +102,7 @@ test_main (void) > { > json_ctx_t json_ctx; > size_t i; > - > + size_t half_page = getpagesize () / 2; > test_init (); > > json_init (&json_ctx, 0, stdout); > @@ -121,8 +124,15 @@ test_main (void) > { > do_test (&json_ctx, 0, 0, 1 << i, 1); > do_test (&json_ctx, i, 0, 1 << i, 1); > + do_test (&json_ctx, i + 32, 0, 1 << i, 1); > do_test (&json_ctx, 0, i, 1 << i, 1); > + do_test (&json_ctx, 0, i + 32, 1 << i, 1); > do_test (&json_ctx, i, i, 1 << i, 1); > + do_test (&json_ctx, i + 32, i + 32, 1 << i, 1); > + do_test (&json_ctx, half_page, 0, 1 << i, 1); > + do_test (&json_ctx, half_page + i, 0, 1 << i, 1); > + do_test (&json_ctx, half_page, i, 1 << i, 1); > + do_test (&json_ctx, half_page + i, i, 1 << i, 1); > } > > for (i = 0; i < 32; ++i) > @@ -131,16 +141,26 @@ test_main (void) > do_test (&json_ctx, i, 0, i, 0); > do_test (&json_ctx, 0, i, i, 0); > do_test (&json_ctx, i, i, i, 0); > + do_test (&json_ctx, half_page, 0, i, 0); > + do_test (&json_ctx, half_page + i, 0, i, 0); > + do_test (&json_ctx, half_page, i, i, 0); > + do_test (&json_ctx, half_page + i, i, i, 0); > + do_test (&json_ctx, getpagesize () - 1, 0, i, 0); > + do_test (&json_ctx, 0, getpagesize () - 1, i, 0); > } > > for (i = 3; i < 32; ++i) > { > if ((i & (i - 1)) == 0) > - continue; > + continue; > do_test (&json_ctx, 0, 0, 16 * i, 1); > do_test (&json_ctx, i, 0, 16 * i, 1); > do_test (&json_ctx, 0, i, 16 * i, 1); > do_test (&json_ctx, i, i, 16 * i, 1); > + do_test (&json_ctx, half_page, 0, 16 * i, 1); > + do_test (&json_ctx, half_page + i, 0, 16 * i, 1); > + do_test (&json_ctx, half_page, i, 16 * i, 1); > + do_test (&json_ctx, half_page + i, i, 16 * i, 1); > } > > for (i = 32; i < 64; ++i) > @@ -149,16 +169,33 @@ test_main (void) > do_test (&json_ctx, i, 0, 32 * i, 1); > do_test (&json_ctx, 0, i, 32 * i, 1); > do_test (&json_ctx, i, i, 32 * i, 1); > + do_test (&json_ctx, half_page, 0, 32 * i, 1); > + do_test (&json_ctx, half_page + i, 0, 32 * i, 1); > + do_test (&json_ctx, half_page, i, 32 * i, 1); > + do_test (&json_ctx, half_page + i, i, 32 * i, 1); > } > > do_test (&json_ctx, 0, 0, getpagesize (), 1); > > - for (i = 0; i <= 32; ++i) > + for (i = 0; i <= 48; ++i) > { > do_test (&json_ctx, 0, 0, 2048 + 64 * i, 1); > do_test (&json_ctx, i, 0, 2048 + 64 * i, 1); > + do_test (&json_ctx, i + 32, 0, 2048 + 64 * i, 1); > do_test (&json_ctx, 0, i, 2048 + 64 * i, 1); > + do_test (&json_ctx, 0, i + 32, 2048 + 64 * i, 1); > do_test (&json_ctx, i, i, 2048 + 64 * i, 1); > + do_test (&json_ctx, i + 32, i + 32, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page, 0, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page + i, 0, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page, i, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page + i, i, 2048 + 64 * i, 1); > + do_test (&json_ctx, i, 1, 2048 + 64 * i, 1); > + do_test (&json_ctx, 1, i, 2048 + 64 * i, 1); > + do_test (&json_ctx, i + 32, 1, 2048 + 64 * i, 1); > + do_test (&json_ctx, 1, i + 32, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page + i, 1, 2048 + 64 * i, 1); > + do_test (&json_ctx, half_page + 1, i, 2048 + 64 * i, 1); > } > > json_array_end (&json_ctx); > diff --git a/benchtests/bench-memmove.c b/benchtests/bench-memmove.c > index 6becbf4782..855f4d0649 100644 > --- a/benchtests/bench-memmove.c > +++ b/benchtests/bench-memmove.c > @@ -34,7 +34,10 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, > { > size_t i, iters = INNER_LOOP_ITERS; > timing_t start, stop, cur; > - > + for (i = 0; i < iters / 64; ++i) > + { > + CALL (impl, dst, src, len); > + } > TIMING_NOW (start); > for (i = 0; i < iters; ++i) > { > @@ -53,11 +56,11 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len) > size_t i, j; > char *s1, *s2; > > - align1 &= 63; > + align1 &= (getpagesize () - 1); > if (align1 + len >= page_size) > return; > > - align2 &= 63; > + align2 &= (getpagesize () - 1); > if (align2 + len >= page_size) > return; > > @@ -85,6 +88,7 @@ test_main (void) > { > json_ctx_t json_ctx; > size_t i; > + size_t half_page = getpagesize () / 2; > > test_init (); > > @@ -138,6 +142,22 @@ test_main (void) > do_test (&json_ctx, i, i, 32 * i); > } > > + for (i = 0; i <= 48; ++i) > + { > + do_test (&json_ctx, 0, 0, 2048 + 64 * i); > + do_test (&json_ctx, i, 0, 2048 + 64 * i); > + do_test (&json_ctx, 0, i, 2048 + 64 * i); > + do_test (&json_ctx, i, i, 2048 + 64 * i); > + do_test (&json_ctx, half_page, 0, 2048 + 64 * i); > + do_test (&json_ctx, 0, half_page, 2048 + 64 * i); > + do_test (&json_ctx, half_page + i, 0, 2048 + 64 * i); > + do_test (&json_ctx, i, half_page, 2048 + 64 * i); > + do_test (&json_ctx, half_page, i, 2048 + 64 * i); > + do_test (&json_ctx, 0, half_page + i, 2048 + 64 * i); > + do_test (&json_ctx, half_page + i, i, 2048 + 64 * i); > + do_test (&json_ctx, i, half_page + i, 2048 + 64 * i); > + } > + > json_array_end (&json_ctx); > json_attr_object_end (&json_ctx); > json_attr_object_end (&json_ctx); > -- > 2.25.1 > LGTM. Reviewed-by: H.J. Lu <hjl.tools@gmail.com> Thanks. H.J.
diff --git a/benchtests/bench-memcpy.c b/benchtests/bench-memcpy.c index d9236a2282..744bea26d3 100644 --- a/benchtests/bench-memcpy.c +++ b/benchtests/bench-memcpy.c @@ -40,7 +40,10 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src, { size_t i, iters = INNER_LOOP_ITERS; timing_t start, stop, cur; - + for (i = 0; i < iters / 64; ++i) + { + CALL (impl, dst, src, len); + } TIMING_NOW (start); for (i = 0; i < iters; ++i) { @@ -60,11 +63,11 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len, size_t i, j; char *s1, *s2; size_t repeats; - align1 &= 63; + align1 &= (getpagesize () - 1); if (align1 + len >= page_size) return; - align2 &= 63; + align2 &= (getpagesize () - 1); if (align2 + len >= page_size) return; @@ -99,7 +102,7 @@ test_main (void) { json_ctx_t json_ctx; size_t i; - + size_t half_page = getpagesize () / 2; test_init (); json_init (&json_ctx, 0, stdout); @@ -121,8 +124,15 @@ test_main (void) { do_test (&json_ctx, 0, 0, 1 << i, 1); do_test (&json_ctx, i, 0, 1 << i, 1); + do_test (&json_ctx, i + 32, 0, 1 << i, 1); do_test (&json_ctx, 0, i, 1 << i, 1); + do_test (&json_ctx, 0, i + 32, 1 << i, 1); do_test (&json_ctx, i, i, 1 << i, 1); + do_test (&json_ctx, i + 32, i + 32, 1 << i, 1); + do_test (&json_ctx, half_page, 0, 1 << i, 1); + do_test (&json_ctx, half_page + i, 0, 1 << i, 1); + do_test (&json_ctx, half_page, i, 1 << i, 1); + do_test (&json_ctx, half_page + i, i, 1 << i, 1); } for (i = 0; i < 32; ++i) @@ -131,16 +141,26 @@ test_main (void) do_test (&json_ctx, i, 0, i, 0); do_test (&json_ctx, 0, i, i, 0); do_test (&json_ctx, i, i, i, 0); + do_test (&json_ctx, half_page, 0, i, 0); + do_test (&json_ctx, half_page + i, 0, i, 0); + do_test (&json_ctx, half_page, i, i, 0); + do_test (&json_ctx, half_page + i, i, i, 0); + do_test (&json_ctx, getpagesize () - 1, 0, i, 0); + do_test (&json_ctx, 0, getpagesize () - 1, i, 0); } for (i = 3; i < 32; ++i) { if ((i & (i - 1)) == 0) - continue; + continue; do_test (&json_ctx, 0, 0, 16 * i, 1); do_test (&json_ctx, i, 0, 16 * i, 1); do_test (&json_ctx, 0, i, 16 * i, 1); do_test (&json_ctx, i, i, 16 * i, 1); + do_test (&json_ctx, half_page, 0, 16 * i, 1); + do_test (&json_ctx, half_page + i, 0, 16 * i, 1); + do_test (&json_ctx, half_page, i, 16 * i, 1); + do_test (&json_ctx, half_page + i, i, 16 * i, 1); } for (i = 32; i < 64; ++i) @@ -149,16 +169,33 @@ test_main (void) do_test (&json_ctx, i, 0, 32 * i, 1); do_test (&json_ctx, 0, i, 32 * i, 1); do_test (&json_ctx, i, i, 32 * i, 1); + do_test (&json_ctx, half_page, 0, 32 * i, 1); + do_test (&json_ctx, half_page + i, 0, 32 * i, 1); + do_test (&json_ctx, half_page, i, 32 * i, 1); + do_test (&json_ctx, half_page + i, i, 32 * i, 1); } do_test (&json_ctx, 0, 0, getpagesize (), 1); - for (i = 0; i <= 32; ++i) + for (i = 0; i <= 48; ++i) { do_test (&json_ctx, 0, 0, 2048 + 64 * i, 1); do_test (&json_ctx, i, 0, 2048 + 64 * i, 1); + do_test (&json_ctx, i + 32, 0, 2048 + 64 * i, 1); do_test (&json_ctx, 0, i, 2048 + 64 * i, 1); + do_test (&json_ctx, 0, i + 32, 2048 + 64 * i, 1); do_test (&json_ctx, i, i, 2048 + 64 * i, 1); + do_test (&json_ctx, i + 32, i + 32, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page, 0, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page + i, 0, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page, i, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page + i, i, 2048 + 64 * i, 1); + do_test (&json_ctx, i, 1, 2048 + 64 * i, 1); + do_test (&json_ctx, 1, i, 2048 + 64 * i, 1); + do_test (&json_ctx, i + 32, 1, 2048 + 64 * i, 1); + do_test (&json_ctx, 1, i + 32, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page + i, 1, 2048 + 64 * i, 1); + do_test (&json_ctx, half_page + 1, i, 2048 + 64 * i, 1); } json_array_end (&json_ctx); diff --git a/benchtests/bench-memmove.c b/benchtests/bench-memmove.c index 6becbf4782..855f4d0649 100644 --- a/benchtests/bench-memmove.c +++ b/benchtests/bench-memmove.c @@ -34,7 +34,10 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, { size_t i, iters = INNER_LOOP_ITERS; timing_t start, stop, cur; - + for (i = 0; i < iters / 64; ++i) + { + CALL (impl, dst, src, len); + } TIMING_NOW (start); for (i = 0; i < iters; ++i) { @@ -53,11 +56,11 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len) size_t i, j; char *s1, *s2; - align1 &= 63; + align1 &= (getpagesize () - 1); if (align1 + len >= page_size) return; - align2 &= 63; + align2 &= (getpagesize () - 1); if (align2 + len >= page_size) return; @@ -85,6 +88,7 @@ test_main (void) { json_ctx_t json_ctx; size_t i; + size_t half_page = getpagesize () / 2; test_init (); @@ -138,6 +142,22 @@ test_main (void) do_test (&json_ctx, i, i, 32 * i); } + for (i = 0; i <= 48; ++i) + { + do_test (&json_ctx, 0, 0, 2048 + 64 * i); + do_test (&json_ctx, i, 0, 2048 + 64 * i); + do_test (&json_ctx, 0, i, 2048 + 64 * i); + do_test (&json_ctx, i, i, 2048 + 64 * i); + do_test (&json_ctx, half_page, 0, 2048 + 64 * i); + do_test (&json_ctx, 0, half_page, 2048 + 64 * i); + do_test (&json_ctx, half_page + i, 0, 2048 + 64 * i); + do_test (&json_ctx, i, half_page, 2048 + 64 * i); + do_test (&json_ctx, half_page, i, 2048 + 64 * i); + do_test (&json_ctx, 0, half_page + i, 2048 + 64 * i); + do_test (&json_ctx, half_page + i, i, 2048 + 64 * i); + do_test (&json_ctx, i, half_page + i, 2048 + 64 * i); + } + json_array_end (&json_ctx); json_attr_object_end (&json_ctx); json_attr_object_end (&json_ctx);