Message ID | 1481834313-5626-1-git-send-email-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Ping. On 15/12/2016 18:38, Adhemerval Zanella wrote: > Current optimized memchr for x86_64 does for input arguments pointers > module 64 in range of [49,63] if there is no searchr char in the rest > of 64-byte block a pointer addition which might overflow: > > * sysdeps/x86_64/memchr.S > > 77 .p2align 4 > 78 L(unaligned_no_match): > 79 add %rcx, %rdx > > Add (uintptr_t)s % 16 to n in %rdx. > > 80 sub $16, %rdx > 81 jbe L(return_null) > > This patch fixes by adding a saturated math that sets a maximum pointer > value if it overflows (UINTPTR_MAX). This is similar of the fix for > powerpc64/power7 version [1] and rely on for test-memchr.c changes. > > Checked on x86_64-linux-gnu and powerpc64-linux-gnu. > > [1] https://sourceware.org/ml/libc-alpha/2016-12/msg00576.html > > [BZ# 19387] > * sysdeps/x86_64/memchr.S (memchr): Avoid overflow in pointer > addition. > * string/test-memchr.c (do_test): Remove alignment limitation. > (test_main): Add test that trigger BZ# 19387. > --- > ChangeLog | 6 ++++++ > string/test-memchr.c | 9 ++++----- > sysdeps/x86_64/memchr.S | 8 ++++++++ > 3 files changed, 18 insertions(+), 5 deletions(-) > > diff --git a/string/test-memchr.c b/string/test-memchr.c > index 7431386..6901711 100644 > --- a/string/test-memchr.c > +++ b/string/test-memchr.c > @@ -76,7 +76,6 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char) > size_t i; > CHAR *result; > > - align &= 7; > if ((align + len) * sizeof (CHAR) >= page_size) > return; > > @@ -194,12 +193,12 @@ test_main (void) > do_test (i, 64, 256, SIZE_MAX, 0); > } > > - for (i = 1; i < 16; ++i) > + for (i = 1; i < 64; ++i) > { > - for (j = 1; j < 16; j++) > + for (j = 1; j < 64; j++) > { > - do_test (0, 16 - j, 32, SIZE_MAX, 23); > - do_test (i, 16 - j, 32, SIZE_MAX, 23); > + do_test (0, 64 - j, 64, SIZE_MAX, 23); > + do_test (i, 64 - j, 64, SIZE_MAX, 23); > } > } > > diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S > index 132eacb..b140de1 100644 > --- a/sysdeps/x86_64/memchr.S > +++ b/sysdeps/x86_64/memchr.S > @@ -76,7 +76,15 @@ L(crosscache): > > .p2align 4 > L(unaligned_no_match): > + /* Calculate the last acceptable address and check for possible > + addition overflow by using satured math: > + rdx = rcx + rdx > + rdx |= -(rdx < x) */ > add %rcx, %rdx > + sbb %eax, %eax > + movslq %eax, %rax > + orq %rdx, %rax > + mov %rax, %rdx > sub $16, %rdx > jbe L(return_null) > add $16, %rdi >
On 12/20/2016 06:03 AM, Adhemerval Zanella wrote: > + sbb %eax, %eax > + movslq %eax, %rax This should be "sbb %rax, %rax" in one step. > + orq %rdx, %rax > + mov %rax, %rdx Given that the comment says you wanted the result in %rdx, why not "or %rax, %rdx"? r~
On 20/12/2016 15:07, Richard Henderson wrote: > On 12/20/2016 06:03 AM, Adhemerval Zanella wrote: >> + sbb %eax, %eax >> + movslq %eax, %rax > > This should be "sbb %rax, %rax" in one step. > >> + orq %rdx, %rax >> + mov %rax, %rdx > > Given that the comment says you wanted the result in %rdx, > why not "or %rax, %rdx"? Indeed it seems the 4 instructions can be simplified to just sbb %rax, %rax or %rax, %rdx Thanks for the advice.
diff --git a/string/test-memchr.c b/string/test-memchr.c index 7431386..6901711 100644 --- a/string/test-memchr.c +++ b/string/test-memchr.c @@ -76,7 +76,6 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char) size_t i; CHAR *result; - align &= 7; if ((align + len) * sizeof (CHAR) >= page_size) return; @@ -194,12 +193,12 @@ test_main (void) do_test (i, 64, 256, SIZE_MAX, 0); } - for (i = 1; i < 16; ++i) + for (i = 1; i < 64; ++i) { - for (j = 1; j < 16; j++) + for (j = 1; j < 64; j++) { - do_test (0, 16 - j, 32, SIZE_MAX, 23); - do_test (i, 16 - j, 32, SIZE_MAX, 23); + do_test (0, 64 - j, 64, SIZE_MAX, 23); + do_test (i, 64 - j, 64, SIZE_MAX, 23); } } diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S index 132eacb..b140de1 100644 --- a/sysdeps/x86_64/memchr.S +++ b/sysdeps/x86_64/memchr.S @@ -76,7 +76,15 @@ L(crosscache): .p2align 4 L(unaligned_no_match): + /* Calculate the last acceptable address and check for possible + addition overflow by using satured math: + rdx = rcx + rdx + rdx |= -(rdx < x) */ add %rcx, %rdx + sbb %eax, %eax + movslq %eax, %rax + orq %rdx, %rax + mov %rax, %rdx sub $16, %rdx jbe L(return_null) add $16, %rdi