diff mbox series

Benchtests: Improve large memcpy/memset benchmarks

Message ID PAWPR08MB8982ECEBEEDAD6683348BE6F832D2@PAWPR08MB8982.eurprd08.prod.outlook.com
State New
Headers show
Series Benchtests: Improve large memcpy/memset benchmarks | expand

Commit Message

Wilco Dijkstra March 18, 2024, 3:08 p.m. UTC
Adjust sizes between 64KB and 16MB and iterations based on length. 
Remove incorrect uses of alloc_bufs (since we're not interested in measuring
Linux clear_page time).

OK for commit?

---

Comments

Noah Goldstein March 18, 2024, 7:29 p.m. UTC | #1
On Mon, Mar 18, 2024 at 10:08 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>
> Adjust sizes between 64KB and 16MB and iterations based on length.
> Remove incorrect uses of alloc_bufs (since we're not interested in measuring
> Linux clear_page time).
>
> OK for commit?
>
> ---
>
> diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
> index 7076c0a17b00d76c26aa6adb4c3cc0aedcf69955..dea414ec8d1bb160409725996244178ee4fa93fd 100644
> --- a/benchtests/bench-bzero-large.c
> +++ b/benchtests/bench-bzero-large.c
> @@ -22,9 +22,8 @@
>  #else
>  # define TEST_NAME "bzero"
>  #endif
> -#define START_SIZE (128 * 1024)
> -#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
> -#define TIMEOUT (20 * 60)
> +#define START_SIZE (64 * 1024)
> +#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #include "bench-string.h"
>
>  #include "json-lib.h"
> @@ -52,7 +51,7 @@ IMPL (memset_zero, 0)
>  static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -74,20 +73,13 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
>  static void
>  do_test (json_ctx_t *json_ctx, size_t align, size_t len)
>  {
> -  align &= 63;
> -  if ((align + len) * sizeof (CHAR) > page_size)
> -    return;
> -
>    json_element_object_begin (json_ctx);
>    json_attr_uint (json_ctx, "length", len);
>    json_attr_uint (json_ctx, "alignment", align);
>    json_array_begin (json_ctx, "timings");
>
>    FOR_EACH_IMPL (impl, 0)
> -    {
> -      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
> -      alloc_bufs ();
> -    }
> +    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
>
>    json_array_end (json_ctx);
>    json_element_object_end (json_ctx);
> diff --git a/benchtests/bench-memcpy-large.c b/benchtests/bench-memcpy-large.c
> index 7b2c5530af3883acc4b3895cb11667e6ff1a55ff..9e544a4729197f886f6cb092ec225004b586d197 100644
> --- a/benchtests/bench-memcpy-large.c
> +++ b/benchtests/bench-memcpy-large.c
> @@ -19,10 +19,9 @@
>  #ifndef MEMCPY_RESULT
>  # define MEMCPY_RESULT(dst, len) dst
>  # define START_SIZE (64 * 1024)
> -# define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024)
> +# define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  # define TEST_MAIN
>  # define TEST_NAME "memcpy"
> -# define TIMEOUT (20 * 60)
>  # include "bench-string.h"
>
>  IMPL (memcpy, 1)
> @@ -36,7 +35,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src,
>              size_t len)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -59,12 +58,7 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len,
>    char *s1, *s2;
>    size_t repeats;
>    align1 &= 4095;
> -  if (align1 + len >= page_size)
> -    return;
> -
>    align2 &= 4095;
> -  if (align2 + len >= page_size)
> -    return;
>
>    s1 = (char *) (buf1 + align1);
>    s2 = (char *) (buf2 + align2);
> diff --git a/benchtests/bench-memmove-large.c b/benchtests/bench-memmove-large.c
> index a09dd3678848a3bf8612732439700eb8ef5d82ea..fd504653f681b172800cc34e054cc745180aa4fa 100644
> --- a/benchtests/bench-memmove-large.c
> +++ b/benchtests/bench-memmove-large.c
> @@ -16,12 +16,10 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define BASE_PAGE_SIZE (1024 * 1024)
> -#define START_SIZE (4 * 1024)
> +#define START_SIZE (64 * 1024)
>  #define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #define TEST_MAIN
>  #define TEST_NAME "memmove"
> -#define TIMEOUT (20 * 60)
>  #include "bench-string.h"
>  #include "json-lib.h"
>
> @@ -33,7 +31,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
>              size_t len)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
maybe MAX(16, (MIN_PAGE_SIZE * )/ len). Likewise above.
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -54,13 +52,8 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len)
>    size_t i, j;
>    char *s1, *s2;
>
> -  align1 &= 127;
> -  if (align1 + len >= page_size)
> -    return;
> -
> -  align2 &= 127;
> -  if (align2 + len >= page_size)
> -    return;
> +  align1 &= 4095;
> +  align2 &= 4095;
>
>    s1 = (char *) (buf2 + align1);
>    s2 = (char *) (buf2 + align2);
> diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
> index a1f33245d4960c1e82fc441b5faf322204035202..c3f9ee0cd75d3cb30f8b3df15672f895c111fc28 100644
> --- a/benchtests/bench-memset-large.c
> +++ b/benchtests/bench-memset-large.c
> @@ -18,9 +18,8 @@
>
>  #define TEST_MAIN
>  #define TEST_NAME "memset"
> -#define START_SIZE (128 * 1024)
> -#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
> -#define TIMEOUT (20 * 60)
> +#define START_SIZE (64 * 1024)
> +#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #include "bench-string.h"
>
>  #include "json-lib.h"
> @@ -35,7 +34,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
>              int c __attribute ((unused)), size_t n)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -53,10 +52,6 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
>  static void
>  do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
>  {
> -  align &= 63;
> -  if ((align + len) * sizeof (CHAR) > page_size)
> -    return;
> -
>    json_element_object_begin (json_ctx);
>    json_attr_uint (json_ctx, "length", len);
>    json_attr_uint (json_ctx, "alignment", align);
> @@ -64,10 +59,7 @@ do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
>    json_array_begin (json_ctx, "timings");
>
>    FOR_EACH_IMPL (impl, 0)
> -    {
> -      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
> -      alloc_bufs ();
> -    }
> +    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
>
>    json_array_end (json_ctx);
>    json_element_object_end (json_ctx);
>
Noah Goldstein March 18, 2024, 7:34 p.m. UTC | #2
On Mon, Mar 18, 2024 at 2:29 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Mon, Mar 18, 2024 at 10:08 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> >
> > Adjust sizes between 64KB and 16MB and iterations based on length.
> > Remove incorrect uses of alloc_bufs (since we're not interested in measuring
> > Linux clear_page time).
> >
> > OK for commit?
> >
> > ---
> >
> > diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
> > index 7076c0a17b00d76c26aa6adb4c3cc0aedcf69955..dea414ec8d1bb160409725996244178ee4fa93fd 100644
> > --- a/benchtests/bench-bzero-large.c
> > +++ b/benchtests/bench-bzero-large.c
> > @@ -22,9 +22,8 @@
> >  #else
> >  # define TEST_NAME "bzero"
> >  #endif
> > -#define START_SIZE (128 * 1024)
> > -#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
> > -#define TIMEOUT (20 * 60)
> > +#define START_SIZE (64 * 1024)
> > +#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
> >  #include "bench-string.h"
> >
> >  #include "json-lib.h"
> > @@ -52,7 +51,7 @@ IMPL (memset_zero, 0)
> >  static void
> >  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
> >  {
> > -  size_t i, iters = 16;
> > +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
> >    timing_t start, stop, cur;
> >
> >    TIMING_NOW (start);
> > @@ -74,20 +73,13 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
> >  static void
> >  do_test (json_ctx_t *json_ctx, size_t align, size_t len)
> >  {
> > -  align &= 63;
> > -  if ((align + len) * sizeof (CHAR) > page_size)
> > -    return;
> > -
> >    json_element_object_begin (json_ctx);
> >    json_attr_uint (json_ctx, "length", len);
> >    json_attr_uint (json_ctx, "alignment", align);
> >    json_array_begin (json_ctx, "timings");
> >
> >    FOR_EACH_IMPL (impl, 0)
> > -    {
> > -      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
> > -      alloc_bufs ();
> > -    }
> > +    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
> >
> >    json_array_end (json_ctx);
> >    json_element_object_end (json_ctx);
> > diff --git a/benchtests/bench-memcpy-large.c b/benchtests/bench-memcpy-large.c
> > index 7b2c5530af3883acc4b3895cb11667e6ff1a55ff..9e544a4729197f886f6cb092ec225004b586d197 100644
> > --- a/benchtests/bench-memcpy-large.c
> > +++ b/benchtests/bench-memcpy-large.c
> > @@ -19,10 +19,9 @@
> >  #ifndef MEMCPY_RESULT
> >  # define MEMCPY_RESULT(dst, len) dst
> >  # define START_SIZE (64 * 1024)
> > -# define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024)
> > +# define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
> >  # define TEST_MAIN
> >  # define TEST_NAME "memcpy"
> > -# define TIMEOUT (20 * 60)
> >  # include "bench-string.h"
> >
> >  IMPL (memcpy, 1)
> > @@ -36,7 +35,7 @@ static void
> >  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src,
> >              size_t len)
> >  {
> > -  size_t i, iters = 16;
> > +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
> >    timing_t start, stop, cur;
> >
> >    TIMING_NOW (start);
> > @@ -59,12 +58,7 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len,
> >    char *s1, *s2;
> >    size_t repeats;
> >    align1 &= 4095;
> > -  if (align1 + len >= page_size)
> > -    return;
> > -
> >    align2 &= 4095;
> > -  if (align2 + len >= page_size)
> > -    return;
> >
> >    s1 = (char *) (buf1 + align1);
> >    s2 = (char *) (buf2 + align2);
> > diff --git a/benchtests/bench-memmove-large.c b/benchtests/bench-memmove-large.c
> > index a09dd3678848a3bf8612732439700eb8ef5d82ea..fd504653f681b172800cc34e054cc745180aa4fa 100644
> > --- a/benchtests/bench-memmove-large.c
> > +++ b/benchtests/bench-memmove-large.c
> > @@ -16,12 +16,10 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#define BASE_PAGE_SIZE (1024 * 1024)
> > -#define START_SIZE (4 * 1024)
> > +#define START_SIZE (64 * 1024)
> >  #define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
> >  #define TEST_MAIN
> >  #define TEST_NAME "memmove"
> > -#define TIMEOUT (20 * 60)
> >  #include "bench-string.h"
> >  #include "json-lib.h"
> >
> > @@ -33,7 +31,7 @@ static void
> >  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
> >              size_t len)
> >  {
> > -  size_t i, iters = 16;
> > +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
> maybe MAX(16, (MIN_PAGE_SIZE * )/ len). Likewise above.

Also, can you include the iter count in the output?
> >    timing_t start, stop, cur;
> >
> >    TIMING_NOW (start);
> > @@ -54,13 +52,8 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len)
> >    size_t i, j;
> >    char *s1, *s2;
> >
> > -  align1 &= 127;
> > -  if (align1 + len >= page_size)
> > -    return;
> > -
> > -  align2 &= 127;
> > -  if (align2 + len >= page_size)
> > -    return;
> > +  align1 &= 4095;
> > +  align2 &= 4095;
> >
> >    s1 = (char *) (buf2 + align1);
> >    s2 = (char *) (buf2 + align2);
> > diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
> > index a1f33245d4960c1e82fc441b5faf322204035202..c3f9ee0cd75d3cb30f8b3df15672f895c111fc28 100644
> > --- a/benchtests/bench-memset-large.c
> > +++ b/benchtests/bench-memset-large.c
> > @@ -18,9 +18,8 @@
> >
> >  #define TEST_MAIN
> >  #define TEST_NAME "memset"
> > -#define START_SIZE (128 * 1024)
> > -#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
> > -#define TIMEOUT (20 * 60)
> > +#define START_SIZE (64 * 1024)
> > +#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
> >  #include "bench-string.h"
> >
> >  #include "json-lib.h"
> > @@ -35,7 +34,7 @@ static void
> >  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
> >              int c __attribute ((unused)), size_t n)
> >  {
> > -  size_t i, iters = 16;
> > +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
> >    timing_t start, stop, cur;
> >
> >    TIMING_NOW (start);
> > @@ -53,10 +52,6 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
> >  static void
> >  do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
> >  {
> > -  align &= 63;
> > -  if ((align + len) * sizeof (CHAR) > page_size)
> > -    return;
> > -
> >    json_element_object_begin (json_ctx);
> >    json_attr_uint (json_ctx, "length", len);
> >    json_attr_uint (json_ctx, "alignment", align);
> > @@ -64,10 +59,7 @@ do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
> >    json_array_begin (json_ctx, "timings");
> >
> >    FOR_EACH_IMPL (impl, 0)
> > -    {
> > -      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
> > -      alloc_bufs ();
> > -    }
> > +    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
> >
> >    json_array_end (json_ctx);
> >    json_element_object_end (json_ctx);
> >
Wilco Dijkstra March 27, 2024, 5:02 p.m. UTC | #3
Hi Noah,

> > @@ -33,7 +31,7 @@ static void
> >  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
> >              size_t len)
> >  {
> > -  size_t i, iters = 16;
> > +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
> maybe MAX(16, (MIN_PAGE_SIZE * )/ len). Likewise above.

I'm not sure how that helps - the issue was that 16 iterations was not only
too small for smaller sizes but also too large for large sizes. This uses 8
iterations for the largest sizes (which takes ~7 seconds on an older machine).

> Also, can you include the iter count in the output?

The time it prints is already divided by iterations, so it's not directly usable.

A better option for these benchmarks would be to print the bandwidth.

Cheers,
Wilco
Noah Goldstein March 28, 2024, 7:28 p.m. UTC | #4
On Wed, Mar 27, 2024 at 12:02 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>
> Hi Noah,
>
> > > @@ -33,7 +31,7 @@ static void
> > >  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
> > >              size_t len)
> > >  {
> > > -  size_t i, iters = 16;
> > > +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
> > maybe MAX(16, (MIN_PAGE_SIZE * )/ len). Likewise above.
>
> I'm not sure how that helps - the issue was that 16 iterations was not only
> too small for smaller sizes but also too large for large sizes. This uses 8
> iterations for the largest sizes (which takes ~7 seconds on an older machine).
I see, didn't realize 16 was too high.
Although do think we should ensure at least 1 iter.

>
> > Also, can you include the iter count in the output?
>
> The time it prints is already divided by iterations, so it's not directly usable.
>
> A better option for these benchmarks would be to print the bandwidth.

iter count allows us to better estimate the variance.
>
> Cheers,
> Wilco
diff mbox series

Patch

diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
index 7076c0a17b00d76c26aa6adb4c3cc0aedcf69955..dea414ec8d1bb160409725996244178ee4fa93fd 100644
--- a/benchtests/bench-bzero-large.c
+++ b/benchtests/bench-bzero-large.c
@@ -22,9 +22,8 @@ 
 #else
 # define TEST_NAME "bzero"
 #endif
-#define START_SIZE (128 * 1024)
-#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
-#define TIMEOUT (20 * 60)
+#define START_SIZE (64 * 1024)
+#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #include "bench-string.h"
 
 #include "json-lib.h"
@@ -52,7 +51,7 @@  IMPL (memset_zero, 0)
 static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -74,20 +73,13 @@  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
 static void
 do_test (json_ctx_t *json_ctx, size_t align, size_t len)
 {
-  align &= 63;
-  if ((align + len) * sizeof (CHAR) > page_size)
-    return;
-
   json_element_object_begin (json_ctx);
   json_attr_uint (json_ctx, "length", len);
   json_attr_uint (json_ctx, "alignment", align);
   json_array_begin (json_ctx, "timings");
 
   FOR_EACH_IMPL (impl, 0)
-    {
-      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
-      alloc_bufs ();
-    }
+    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
 
   json_array_end (json_ctx);
   json_element_object_end (json_ctx);
diff --git a/benchtests/bench-memcpy-large.c b/benchtests/bench-memcpy-large.c
index 7b2c5530af3883acc4b3895cb11667e6ff1a55ff..9e544a4729197f886f6cb092ec225004b586d197 100644
--- a/benchtests/bench-memcpy-large.c
+++ b/benchtests/bench-memcpy-large.c
@@ -19,10 +19,9 @@ 
 #ifndef MEMCPY_RESULT
 # define MEMCPY_RESULT(dst, len) dst
 # define START_SIZE (64 * 1024)
-# define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024)
+# define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 # define TEST_MAIN
 # define TEST_NAME "memcpy"
-# define TIMEOUT (20 * 60)
 # include "bench-string.h"
 
 IMPL (memcpy, 1)
@@ -36,7 +35,7 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src,
 	     size_t len)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -59,12 +58,7 @@  do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len,
   char *s1, *s2;
   size_t repeats;
   align1 &= 4095;
-  if (align1 + len >= page_size)
-    return;
-
   align2 &= 4095;
-  if (align2 + len >= page_size)
-    return;
 
   s1 = (char *) (buf1 + align1);
   s2 = (char *) (buf2 + align2);
diff --git a/benchtests/bench-memmove-large.c b/benchtests/bench-memmove-large.c
index a09dd3678848a3bf8612732439700eb8ef5d82ea..fd504653f681b172800cc34e054cc745180aa4fa 100644
--- a/benchtests/bench-memmove-large.c
+++ b/benchtests/bench-memmove-large.c
@@ -16,12 +16,10 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define BASE_PAGE_SIZE (1024 * 1024)
-#define START_SIZE (4 * 1024)
+#define START_SIZE (64 * 1024)
 #define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #define TEST_MAIN
 #define TEST_NAME "memmove"
-#define TIMEOUT (20 * 60)
 #include "bench-string.h"
 #include "json-lib.h"
 
@@ -33,7 +31,7 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
 	     size_t len)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -54,13 +52,8 @@  do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len)
   size_t i, j;
   char *s1, *s2;
 
-  align1 &= 127;
-  if (align1 + len >= page_size)
-    return;
-
-  align2 &= 127;
-  if (align2 + len >= page_size)
-    return;
+  align1 &= 4095;
+  align2 &= 4095;
 
   s1 = (char *) (buf2 + align1);
   s2 = (char *) (buf2 + align2);
diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
index a1f33245d4960c1e82fc441b5faf322204035202..c3f9ee0cd75d3cb30f8b3df15672f895c111fc28 100644
--- a/benchtests/bench-memset-large.c
+++ b/benchtests/bench-memset-large.c
@@ -18,9 +18,8 @@ 
 
 #define TEST_MAIN
 #define TEST_NAME "memset"
-#define START_SIZE (128 * 1024)
-#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
-#define TIMEOUT (20 * 60)
+#define START_SIZE (64 * 1024)
+#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #include "bench-string.h"
 
 #include "json-lib.h"
@@ -35,7 +34,7 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
 	     int c __attribute ((unused)), size_t n)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -53,10 +52,6 @@  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
 static void
 do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
 {
-  align &= 63;
-  if ((align + len) * sizeof (CHAR) > page_size)
-    return;
-
   json_element_object_begin (json_ctx);
   json_attr_uint (json_ctx, "length", len);
   json_attr_uint (json_ctx, "alignment", align);
@@ -64,10 +59,7 @@  do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
   json_array_begin (json_ctx, "timings");
 
   FOR_EACH_IMPL (impl, 0)
-    {
-      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
-      alloc_bufs ();
-    }
+    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
 
   json_array_end (json_ctx);
   json_element_object_end (json_ctx);