Message ID | 1510204408-1739-2-git-send-email-siddhesh@sourceware.org |
---|---|
State | New |
Headers | show |
Series | memset zva optimization | expand |
Any thoughts on this benchmark fix? I'll push it by the end of the week if there are no objections. Siddhesh On Thursday 09 November 2017 10:43 AM, Siddhesh Poyarekar wrote: > Make the walking benchmarks walk only backwards since copying both > ways is biased in favour of implementations that use non-temporal > stores for larger sizes; falkor is one of them. This also fixes up > bugs in computation of the result which ended up multiplying the > length with the timing result unnecessarily. > > * benchtests/bench-memcpy-walk.c (do_one_test): Copy only > backwards. Fix timing computation. > * benchtests/bench-memmove-walk.c (do_one_test): Likewise. > * benchtests/bench-memset-walk.c (do_one_test): Walk backwards > on memset by N at a time. Fix timing computation. > --- > benchtests/bench-memcpy-walk.c | 14 +++++--------- > benchtests/bench-memmove-walk.c | 15 +++++---------- > benchtests/bench-memset-walk.c | 4 ++-- > 3 files changed, 12 insertions(+), 21 deletions(-) > > diff --git a/benchtests/bench-memcpy-walk.c b/benchtests/bench-memcpy-walk.c > index 69d467d..5b56341 100644 > --- a/benchtests/bench-memcpy-walk.c > +++ b/benchtests/bench-memcpy-walk.c > @@ -47,26 +47,22 @@ static void > do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, > size_t len) > { > - size_t i, iters = MIN_PAGE_SIZE / len; > + size_t i = 0; > timing_t start, stop, cur; > > char *dst_end = dst + MIN_PAGE_SIZE - len; > char *src_end = src + MIN_PAGE_SIZE - len; > > TIMING_NOW (start); > - /* Copy the entire buffer back and forth, LEN at a time. */ > - for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--) > - { > - CALL (impl, dst_end, src, len); > - CALL (impl, src, dst_end, len); > - i += 2; > - } > + /* Copy the entire buffer backwards, LEN at a time. */ > + for (; src_end >= src && dst_end >= dst; src_end -= len, dst_end -= len, i++) > + CALL (impl, src_end, dst_end, len); > TIMING_NOW (stop); > > TIMING_DIFF (cur, start, stop); > > /* Get time taken per function call. */ > - json_element_double (json_ctx, (double) cur * len / i); > + json_element_double (json_ctx, (double) cur / i); > } > > static void > diff --git a/benchtests/bench-memmove-walk.c b/benchtests/bench-memmove-walk.c > index 54dcd64..969ddd9 100644 > --- a/benchtests/bench-memmove-walk.c > +++ b/benchtests/bench-memmove-walk.c > @@ -47,26 +47,22 @@ static void > do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, > size_t len) > { > - size_t i, iters = MIN_PAGE_SIZE / len; > + size_t i = 0; > timing_t start, stop, cur; > > char *dst_end = dst + MIN_PAGE_SIZE - len; > char *src_end = src + MIN_PAGE_SIZE - len; > > TIMING_NOW (start); > - /* Copy the entire buffer back and forth, LEN at a time. */ > - for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--) > - { > - CALL (impl, dst_end, src, len); > - CALL (impl, src, dst_end, len); > - i += 2; > - } > + /* Copy the entire buffer backwards, LEN at a time. */ > + for (; src_end >= src && dst <= dst_end; dst += len, src_end -= len, i++) > + CALL (impl, dst, src_end, len); > TIMING_NOW (stop); > > TIMING_DIFF (cur, start, stop); > > /* Get time taken per function call. */ > - json_element_double (json_ctx, (double) cur * len / i); > + json_element_double (json_ctx, (double) cur / i); > } > > static void > @@ -79,7 +75,6 @@ do_test (json_ctx_t *json_ctx, size_t len, bool overlap) > if (overlap) > buf2 = buf1; > > - /* First the non-overlapping moves. */ > FOR_EACH_IMPL (impl, 0) > do_one_test (json_ctx, impl, (char *) buf2, (char *) buf1, len); > > diff --git a/benchtests/bench-memset-walk.c b/benchtests/bench-memset-walk.c > index 59d2626..80fbe09 100644 > --- a/benchtests/bench-memset-walk.c > +++ b/benchtests/bench-memset-walk.c > @@ -66,14 +66,14 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, CHAR *s_end, > timing_t start, stop, cur; > > TIMING_NOW (start); > - for (i = 0; i < iters && s <= s_end; s++, i++) > + for (i = 0; i < iters && s <= s_end; s_end -= n, i++) > CALL (impl, s, c, n); > TIMING_NOW (stop); > > TIMING_DIFF (cur, start, stop); > > /* Get time taken per function call. */ > - json_element_double (json_ctx, (double) cur * n / i); > + json_element_double (json_ctx, (double) cur / i); > } > > static void >
... and now pushed. Siddhesh On Thursday 09 November 2017 10:43 AM, Siddhesh Poyarekar wrote: > Make the walking benchmarks walk only backwards since copying both > ways is biased in favour of implementations that use non-temporal > stores for larger sizes; falkor is one of them. This also fixes up > bugs in computation of the result which ended up multiplying the > length with the timing result unnecessarily. > > * benchtests/bench-memcpy-walk.c (do_one_test): Copy only > backwards. Fix timing computation. > * benchtests/bench-memmove-walk.c (do_one_test): Likewise. > * benchtests/bench-memset-walk.c (do_one_test): Walk backwards > on memset by N at a time. Fix timing computation. > --- > benchtests/bench-memcpy-walk.c | 14 +++++--------- > benchtests/bench-memmove-walk.c | 15 +++++---------- > benchtests/bench-memset-walk.c | 4 ++-- > 3 files changed, 12 insertions(+), 21 deletions(-) > > diff --git a/benchtests/bench-memcpy-walk.c b/benchtests/bench-memcpy-walk.c > index 69d467d..5b56341 100644 > --- a/benchtests/bench-memcpy-walk.c > +++ b/benchtests/bench-memcpy-walk.c > @@ -47,26 +47,22 @@ static void > do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, > size_t len) > { > - size_t i, iters = MIN_PAGE_SIZE / len; > + size_t i = 0; > timing_t start, stop, cur; > > char *dst_end = dst + MIN_PAGE_SIZE - len; > char *src_end = src + MIN_PAGE_SIZE - len; > > TIMING_NOW (start); > - /* Copy the entire buffer back and forth, LEN at a time. */ > - for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--) > - { > - CALL (impl, dst_end, src, len); > - CALL (impl, src, dst_end, len); > - i += 2; > - } > + /* Copy the entire buffer backwards, LEN at a time. */ > + for (; src_end >= src && dst_end >= dst; src_end -= len, dst_end -= len, i++) > + CALL (impl, src_end, dst_end, len); > TIMING_NOW (stop); > > TIMING_DIFF (cur, start, stop); > > /* Get time taken per function call. */ > - json_element_double (json_ctx, (double) cur * len / i); > + json_element_double (json_ctx, (double) cur / i); > } > > static void > diff --git a/benchtests/bench-memmove-walk.c b/benchtests/bench-memmove-walk.c > index 54dcd64..969ddd9 100644 > --- a/benchtests/bench-memmove-walk.c > +++ b/benchtests/bench-memmove-walk.c > @@ -47,26 +47,22 @@ static void > do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, > size_t len) > { > - size_t i, iters = MIN_PAGE_SIZE / len; > + size_t i = 0; > timing_t start, stop, cur; > > char *dst_end = dst + MIN_PAGE_SIZE - len; > char *src_end = src + MIN_PAGE_SIZE - len; > > TIMING_NOW (start); > - /* Copy the entire buffer back and forth, LEN at a time. */ > - for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--) > - { > - CALL (impl, dst_end, src, len); > - CALL (impl, src, dst_end, len); > - i += 2; > - } > + /* Copy the entire buffer backwards, LEN at a time. */ > + for (; src_end >= src && dst <= dst_end; dst += len, src_end -= len, i++) > + CALL (impl, dst, src_end, len); > TIMING_NOW (stop); > > TIMING_DIFF (cur, start, stop); > > /* Get time taken per function call. */ > - json_element_double (json_ctx, (double) cur * len / i); > + json_element_double (json_ctx, (double) cur / i); > } > > static void > @@ -79,7 +75,6 @@ do_test (json_ctx_t *json_ctx, size_t len, bool overlap) > if (overlap) > buf2 = buf1; > > - /* First the non-overlapping moves. */ > FOR_EACH_IMPL (impl, 0) > do_one_test (json_ctx, impl, (char *) buf2, (char *) buf1, len); > > diff --git a/benchtests/bench-memset-walk.c b/benchtests/bench-memset-walk.c > index 59d2626..80fbe09 100644 > --- a/benchtests/bench-memset-walk.c > +++ b/benchtests/bench-memset-walk.c > @@ -66,14 +66,14 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, CHAR *s_end, > timing_t start, stop, cur; > > TIMING_NOW (start); > - for (i = 0; i < iters && s <= s_end; s++, i++) > + for (i = 0; i < iters && s <= s_end; s_end -= n, i++) > CALL (impl, s, c, n); > TIMING_NOW (stop); > > TIMING_DIFF (cur, start, stop); > > /* Get time taken per function call. */ > - json_element_double (json_ctx, (double) cur * n / i); > + json_element_double (json_ctx, (double) cur / i); > } > > static void >
diff --git a/benchtests/bench-memcpy-walk.c b/benchtests/bench-memcpy-walk.c index 69d467d..5b56341 100644 --- a/benchtests/bench-memcpy-walk.c +++ b/benchtests/bench-memcpy-walk.c @@ -47,26 +47,22 @@ static void do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, size_t len) { - size_t i, iters = MIN_PAGE_SIZE / len; + size_t i = 0; timing_t start, stop, cur; char *dst_end = dst + MIN_PAGE_SIZE - len; char *src_end = src + MIN_PAGE_SIZE - len; TIMING_NOW (start); - /* Copy the entire buffer back and forth, LEN at a time. */ - for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--) - { - CALL (impl, dst_end, src, len); - CALL (impl, src, dst_end, len); - i += 2; - } + /* Copy the entire buffer backwards, LEN at a time. */ + for (; src_end >= src && dst_end >= dst; src_end -= len, dst_end -= len, i++) + CALL (impl, src_end, dst_end, len); TIMING_NOW (stop); TIMING_DIFF (cur, start, stop); /* Get time taken per function call. */ - json_element_double (json_ctx, (double) cur * len / i); + json_element_double (json_ctx, (double) cur / i); } static void diff --git a/benchtests/bench-memmove-walk.c b/benchtests/bench-memmove-walk.c index 54dcd64..969ddd9 100644 --- a/benchtests/bench-memmove-walk.c +++ b/benchtests/bench-memmove-walk.c @@ -47,26 +47,22 @@ static void do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src, size_t len) { - size_t i, iters = MIN_PAGE_SIZE / len; + size_t i = 0; timing_t start, stop, cur; char *dst_end = dst + MIN_PAGE_SIZE - len; char *src_end = src + MIN_PAGE_SIZE - len; TIMING_NOW (start); - /* Copy the entire buffer back and forth, LEN at a time. */ - for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--) - { - CALL (impl, dst_end, src, len); - CALL (impl, src, dst_end, len); - i += 2; - } + /* Copy the entire buffer backwards, LEN at a time. */ + for (; src_end >= src && dst <= dst_end; dst += len, src_end -= len, i++) + CALL (impl, dst, src_end, len); TIMING_NOW (stop); TIMING_DIFF (cur, start, stop); /* Get time taken per function call. */ - json_element_double (json_ctx, (double) cur * len / i); + json_element_double (json_ctx, (double) cur / i); } static void @@ -79,7 +75,6 @@ do_test (json_ctx_t *json_ctx, size_t len, bool overlap) if (overlap) buf2 = buf1; - /* First the non-overlapping moves. */ FOR_EACH_IMPL (impl, 0) do_one_test (json_ctx, impl, (char *) buf2, (char *) buf1, len); diff --git a/benchtests/bench-memset-walk.c b/benchtests/bench-memset-walk.c index 59d2626..80fbe09 100644 --- a/benchtests/bench-memset-walk.c +++ b/benchtests/bench-memset-walk.c @@ -66,14 +66,14 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, CHAR *s_end, timing_t start, stop, cur; TIMING_NOW (start); - for (i = 0; i < iters && s <= s_end; s++, i++) + for (i = 0; i < iters && s <= s_end; s_end -= n, i++) CALL (impl, s, c, n); TIMING_NOW (stop); TIMING_DIFF (cur, start, stop); /* Get time taken per function call. */ - json_element_double (json_ctx, (double) cur * n / i); + json_element_double (json_ctx, (double) cur / i); } static void