Message ID | 20220608213459.3348847-1-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v2] x86: Fix page cross case in rawmemchr-avx2 [BZ #29234] | expand |
On Wed, Jun 8, 2022 at 2:35 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > commit 6dcbb7d95dded20153b12d76d2f4e0ef0cda4f35 > Author: Noah Goldstein <goldstein.w.n@gmail.com> > Date: Mon Jun 6 21:11:33 2022 -0700 > > x86: Shrink code size of memchr-avx2.S > > Changed how the page cross case aligned string (rdi) in > rawmemchr. This was incompatible with how > `L(cross_page_continue)` expected the pointer to be aligned and > would cause rawmemchr to read data start started before the > beginning of the string. What it would read was in valid memory > but could count CHAR matches resulting in an incorrect return > value. > > This commit fixes that issue by essentially reverting the changes to > the L(page_cross) case as they didn't really matter. > > Test cases added and all pass with the new code (and where confirmed > to fail with the old code). > --- > string/test-rawmemchr.c | 57 +++++++++++++++++++++++++- > sysdeps/x86_64/multiarch/memchr-avx2.S | 16 ++++---- > 2 files changed, 64 insertions(+), 9 deletions(-) > > diff --git a/string/test-rawmemchr.c b/string/test-rawmemchr.c > index cafb75298a..703e8ec27c 100644 > --- a/string/test-rawmemchr.c > +++ b/string/test-rawmemchr.c > @@ -17,6 +17,7 @@ > <https://www.gnu.org/licenses/>. */ > > #include <assert.h> > +#include <support/xunistd.h> > > #define TEST_MAIN > #define TEST_NAME "rawmemchr" > @@ -50,13 +51,45 @@ do_one_test (impl_t *impl, const char *s, int c, char *exp_res) > } > } > > +static void > +do_test_bz29234 (void) > +{ > + size_t i, j; > + char *ptr_start; > + char *buf = xmmap (0, 8192, PROT_READ | PROT_WRITE, > + MAP_PRIVATE | MAP_ANONYMOUS, -1); > + > + memset (buf, -1, 8192); > + > + ptr_start = buf + 4096 - 8; > + > + /* Out of range matches before the start of a page. */ > + memset (ptr_start - 8, 0x1, 8); > + > + for (j = 0; j < 8; ++j) > + { > + for (i = 0; i < 128; ++i) > + { > + ptr_start[i + j] = 0x1; > + > + FOR_EACH_IMPL (impl, 0) > + do_one_test (impl, (char *) (ptr_start + j), 0x1, > + ptr_start + i + j); > + > + ptr_start[i + j] = 0xff; > + } > + } > + > + xmunmap (buf, 8192); > +} > + > static void > do_test (size_t align, size_t pos, size_t len, int seek_char) > { > size_t i; > char *result; > > - align &= 7; > + align &= getpagesize () - 1; > if (align + len >= page_size) > return; > > @@ -114,6 +147,13 @@ do_random_tests (void) > } > } > > + if (align) > + { > + p[align - 1] = seek_char; > + if (align > 4) > + p[align - 4] = seek_char; > + } > + > assert (pos < len); > size_t r = random (); > if ((r & 31) == 0) > @@ -129,6 +169,13 @@ do_random_tests (void) > result, p); > ret = 1; > } > + > + if (align) > + { > + p[align - 1] = seek_char; > + if (align > 4) > + p[align - 4] = seek_char; > + } > } > } > > @@ -150,14 +197,22 @@ test_main (void) > do_test (i, 64, 256, 23); > do_test (0, 16 << i, 2048, 0); > do_test (i, 64, 256, 0); > + > + do_test (getpagesize () - i, 64, 256, 23); > + do_test (getpagesize () - i, 64, 256, 0); > } > for (i = 1; i < 32; ++i) > { > do_test (0, i, i + 1, 23); > do_test (0, i, i + 1, 0); > + > + do_test (getpagesize () - 7, i, i + 1, 23); > + do_test (getpagesize () - i / 2, i, i + 1, 23); > + do_test (getpagesize () - i, i, i + 1, 23); > } > > do_random_tests (); > + do_test_bz29234 (); > return ret; > } > > diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S > index 28a01280ec..c5a256eb37 100644 > --- a/sysdeps/x86_64/multiarch/memchr-avx2.S > +++ b/sysdeps/x86_64/multiarch/memchr-avx2.S > @@ -409,19 +409,19 @@ L(cross_page_boundary): > computer return address if byte is found or adjusting length if it > is not and this is memchr. */ > movq %rdi, %rcx > - /* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi for > - rawmemchr. */ > - andq $-VEC_SIZE, %ALGN_PTR_REG > - VPCMPEQ (%ALGN_PTR_REG), %ymm0, %ymm1 > + /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr > + and rdi for rawmemchr. */ > + orq $(VEC_SIZE - 1), %ALGN_PTR_REG > + VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1 > vpmovmskb %ymm1, %eax > # ifndef USE_AS_RAWMEMCHR > /* Calculate length until end of page (length checked for a match). */ > - leal VEC_SIZE(%ALGN_PTR_REG), %esi > - subl %ERAW_PTR_REG, %esi > -# ifdef USE_AS_WMEMCHR > + leaq 1(%ALGN_PTR_REG), %rsi > + subq %RRAW_PTR_REG, %rsi > +# ifdef USE_AS_WMEMCHR > /* NB: Divide bytes by 4 to get wchar_t count. */ > shrl $2, %esi > -# endif > +# endif > # endif > /* Remove the leading bytes. */ > sarxl %ERAW_PTR_REG, %eax, %eax > -- > 2.34.1 > LGTM. Reviewed-by: H.J. Lu <hjl.tools@gmail.com> Thanks.
On Wed, Jun 8, 2022 at 3:41 PM H.J. Lu via Libc-alpha <libc-alpha@sourceware.org> wrote: > > On Wed, Jun 8, 2022 at 2:35 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > commit 6dcbb7d95dded20153b12d76d2f4e0ef0cda4f35 > > Author: Noah Goldstein <goldstein.w.n@gmail.com> > > Date: Mon Jun 6 21:11:33 2022 -0700 > > > > x86: Shrink code size of memchr-avx2.S > > > > Changed how the page cross case aligned string (rdi) in > > rawmemchr. This was incompatible with how > > `L(cross_page_continue)` expected the pointer to be aligned and > > would cause rawmemchr to read data start started before the > > beginning of the string. What it would read was in valid memory > > but could count CHAR matches resulting in an incorrect return > > value. > > > > This commit fixes that issue by essentially reverting the changes to > > the L(page_cross) case as they didn't really matter. > > > > Test cases added and all pass with the new code (and where confirmed > > to fail with the old code). > > --- > > string/test-rawmemchr.c | 57 +++++++++++++++++++++++++- > > sysdeps/x86_64/multiarch/memchr-avx2.S | 16 ++++---- > > 2 files changed, 64 insertions(+), 9 deletions(-) > > > > diff --git a/string/test-rawmemchr.c b/string/test-rawmemchr.c > > index cafb75298a..703e8ec27c 100644 > > --- a/string/test-rawmemchr.c > > +++ b/string/test-rawmemchr.c > > @@ -17,6 +17,7 @@ > > <https://www.gnu.org/licenses/>. */ > > > > #include <assert.h> > > +#include <support/xunistd.h> > > > > #define TEST_MAIN > > #define TEST_NAME "rawmemchr" > > @@ -50,13 +51,45 @@ do_one_test (impl_t *impl, const char *s, int c, char *exp_res) > > } > > } > > > > +static void > > +do_test_bz29234 (void) > > +{ > > + size_t i, j; > > + char *ptr_start; > > + char *buf = xmmap (0, 8192, PROT_READ | PROT_WRITE, > > + MAP_PRIVATE | MAP_ANONYMOUS, -1); > > + > > + memset (buf, -1, 8192); > > + > > + ptr_start = buf + 4096 - 8; > > + > > + /* Out of range matches before the start of a page. */ > > + memset (ptr_start - 8, 0x1, 8); > > + > > + for (j = 0; j < 8; ++j) > > + { > > + for (i = 0; i < 128; ++i) > > + { > > + ptr_start[i + j] = 0x1; > > + > > + FOR_EACH_IMPL (impl, 0) > > + do_one_test (impl, (char *) (ptr_start + j), 0x1, > > + ptr_start + i + j); > > + > > + ptr_start[i + j] = 0xff; > > + } > > + } > > + > > + xmunmap (buf, 8192); > > +} > > + > > static void > > do_test (size_t align, size_t pos, size_t len, int seek_char) > > { > > size_t i; > > char *result; > > > > - align &= 7; > > + align &= getpagesize () - 1; > > if (align + len >= page_size) > > return; > > > > @@ -114,6 +147,13 @@ do_random_tests (void) > > } > > } > > > > + if (align) > > + { > > + p[align - 1] = seek_char; > > + if (align > 4) > > + p[align - 4] = seek_char; > > + } > > + > > assert (pos < len); > > size_t r = random (); > > if ((r & 31) == 0) > > @@ -129,6 +169,13 @@ do_random_tests (void) > > result, p); > > ret = 1; > > } > > + > > + if (align) > > + { > > + p[align - 1] = seek_char; > > + if (align > 4) > > + p[align - 4] = seek_char; > > + } > > } > > } > > > > @@ -150,14 +197,22 @@ test_main (void) > > do_test (i, 64, 256, 23); > > do_test (0, 16 << i, 2048, 0); > > do_test (i, 64, 256, 0); > > + > > + do_test (getpagesize () - i, 64, 256, 23); > > + do_test (getpagesize () - i, 64, 256, 0); > > } > > for (i = 1; i < 32; ++i) > > { > > do_test (0, i, i + 1, 23); > > do_test (0, i, i + 1, 0); > > + > > + do_test (getpagesize () - 7, i, i + 1, 23); > > + do_test (getpagesize () - i / 2, i, i + 1, 23); > > + do_test (getpagesize () - i, i, i + 1, 23); > > } > > > > do_random_tests (); > > + do_test_bz29234 (); > > return ret; > > } > > > > diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S > > index 28a01280ec..c5a256eb37 100644 > > --- a/sysdeps/x86_64/multiarch/memchr-avx2.S > > +++ b/sysdeps/x86_64/multiarch/memchr-avx2.S > > @@ -409,19 +409,19 @@ L(cross_page_boundary): > > computer return address if byte is found or adjusting length if it > > is not and this is memchr. */ > > movq %rdi, %rcx > > - /* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi for > > - rawmemchr. */ > > - andq $-VEC_SIZE, %ALGN_PTR_REG > > - VPCMPEQ (%ALGN_PTR_REG), %ymm0, %ymm1 > > + /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr > > + and rdi for rawmemchr. */ > > + orq $(VEC_SIZE - 1), %ALGN_PTR_REG > > + VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1 > > vpmovmskb %ymm1, %eax > > # ifndef USE_AS_RAWMEMCHR > > /* Calculate length until end of page (length checked for a match). */ > > - leal VEC_SIZE(%ALGN_PTR_REG), %esi > > - subl %ERAW_PTR_REG, %esi > > -# ifdef USE_AS_WMEMCHR > > + leaq 1(%ALGN_PTR_REG), %rsi > > + subq %RRAW_PTR_REG, %rsi > > +# ifdef USE_AS_WMEMCHR > > /* NB: Divide bytes by 4 to get wchar_t count. */ > > shrl $2, %esi > > -# endif > > +# endif > > # endif > > /* Remove the leading bytes. */ > > sarxl %ERAW_PTR_REG, %eax, %eax > > -- > > 2.34.1 > > > > LGTM. > > Reviewed-by: H.J. Lu <hjl.tools@gmail.com> > > Thanks. > > -- > H.J. I would like to backport this patch to release branches. Any comments or objections? --Sunil
diff --git a/string/test-rawmemchr.c b/string/test-rawmemchr.c index cafb75298a..703e8ec27c 100644 --- a/string/test-rawmemchr.c +++ b/string/test-rawmemchr.c @@ -17,6 +17,7 @@ <https://www.gnu.org/licenses/>. */ #include <assert.h> +#include <support/xunistd.h> #define TEST_MAIN #define TEST_NAME "rawmemchr" @@ -50,13 +51,45 @@ do_one_test (impl_t *impl, const char *s, int c, char *exp_res) } } +static void +do_test_bz29234 (void) +{ + size_t i, j; + char *ptr_start; + char *buf = xmmap (0, 8192, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + + memset (buf, -1, 8192); + + ptr_start = buf + 4096 - 8; + + /* Out of range matches before the start of a page. */ + memset (ptr_start - 8, 0x1, 8); + + for (j = 0; j < 8; ++j) + { + for (i = 0; i < 128; ++i) + { + ptr_start[i + j] = 0x1; + + FOR_EACH_IMPL (impl, 0) + do_one_test (impl, (char *) (ptr_start + j), 0x1, + ptr_start + i + j); + + ptr_start[i + j] = 0xff; + } + } + + xmunmap (buf, 8192); +} + static void do_test (size_t align, size_t pos, size_t len, int seek_char) { size_t i; char *result; - align &= 7; + align &= getpagesize () - 1; if (align + len >= page_size) return; @@ -114,6 +147,13 @@ do_random_tests (void) } } + if (align) + { + p[align - 1] = seek_char; + if (align > 4) + p[align - 4] = seek_char; + } + assert (pos < len); size_t r = random (); if ((r & 31) == 0) @@ -129,6 +169,13 @@ do_random_tests (void) result, p); ret = 1; } + + if (align) + { + p[align - 1] = seek_char; + if (align > 4) + p[align - 4] = seek_char; + } } } @@ -150,14 +197,22 @@ test_main (void) do_test (i, 64, 256, 23); do_test (0, 16 << i, 2048, 0); do_test (i, 64, 256, 0); + + do_test (getpagesize () - i, 64, 256, 23); + do_test (getpagesize () - i, 64, 256, 0); } for (i = 1; i < 32; ++i) { do_test (0, i, i + 1, 23); do_test (0, i, i + 1, 0); + + do_test (getpagesize () - 7, i, i + 1, 23); + do_test (getpagesize () - i / 2, i, i + 1, 23); + do_test (getpagesize () - i, i, i + 1, 23); } do_random_tests (); + do_test_bz29234 (); return ret; } diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S index 28a01280ec..c5a256eb37 100644 --- a/sysdeps/x86_64/multiarch/memchr-avx2.S +++ b/sysdeps/x86_64/multiarch/memchr-avx2.S @@ -409,19 +409,19 @@ L(cross_page_boundary): computer return address if byte is found or adjusting length if it is not and this is memchr. */ movq %rdi, %rcx - /* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi for - rawmemchr. */ - andq $-VEC_SIZE, %ALGN_PTR_REG - VPCMPEQ (%ALGN_PTR_REG), %ymm0, %ymm1 + /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr + and rdi for rawmemchr. */ + orq $(VEC_SIZE - 1), %ALGN_PTR_REG + VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1 vpmovmskb %ymm1, %eax # ifndef USE_AS_RAWMEMCHR /* Calculate length until end of page (length checked for a match). */ - leal VEC_SIZE(%ALGN_PTR_REG), %esi - subl %ERAW_PTR_REG, %esi -# ifdef USE_AS_WMEMCHR + leaq 1(%ALGN_PTR_REG), %rsi + subq %RRAW_PTR_REG, %rsi +# ifdef USE_AS_WMEMCHR /* NB: Divide bytes by 4 to get wchar_t count. */ shrl $2, %esi -# endif +# endif # endif /* Remove the leading bytes. */ sarxl %ERAW_PTR_REG, %eax, %eax