Message ID | 20220422175800.1262332-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86-64: Fix SSE2 memcmp and SSSE3 memmove for x32 | expand |
On Fri, Apr 22, 2022 at 12:58 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > Clear the upper 32 bits in RDX (memory size) for x32 to fix > > FAIL: string/tst-size_t-memcmp > FAIL: string/tst-size_t-memcmp-2 > FAIL: string/tst-size_t-memcpy > FAIL: wcsmbs/tst-size_t-wmemcmp > > on x32 introduced by > > 8804157ad9 x86: Optimize memcmp SSE2 in memcmp.S > 26b2478322 x86: Reduce code size of mem{move|pcpy|cpy}-ssse3 > --- > sysdeps/x86_64/memcmp.S | 4 ++++ > sysdeps/x86_64/multiarch/memmove-ssse3.S | 4 ++++ > 2 files changed, 8 insertions(+) > > diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S > index b153694048..5718a7da86 100644 > --- a/sysdeps/x86_64/memcmp.S > +++ b/sysdeps/x86_64/memcmp.S > @@ -46,6 +46,10 @@ > > .text > ENTRY(MEMCMP) > +# ifdef __ILP32__ > + /* Clear the upper 32 bits. */ > + movl %edx, %edx > +# endif > #ifdef USE_AS_WMEMCMP > /* Use 0xffff to test for mismatches on pmovmskb bitmask. Store > in ecx for code size. This is preferable to using `incw` as > diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S > index 215583e7bd..310ff62b86 100644 > --- a/sysdeps/x86_64/multiarch/memmove-ssse3.S > +++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S > @@ -27,6 +27,10 @@ ENTRY(MEMMOVE_CHK) > END(MEMMOVE_CHK) > > ENTRY_P2ALIGN(MEMMOVE, 6) > +# ifdef __ILP32__ > + /* Clear the upper 32 bits. */ > + movl %edx, %edx > +# endif > movq %rdi, %rax > L(start): > cmpq $16, %rdx > -- > 2.35.1 > LGTM.
On Fri, Apr 22, 2022 at 11:07 AM Noah Goldstein via Libc-alpha <libc-alpha@sourceware.org> wrote: > > On Fri, Apr 22, 2022 at 12:58 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > Clear the upper 32 bits in RDX (memory size) for x32 to fix > > > > FAIL: string/tst-size_t-memcmp > > FAIL: string/tst-size_t-memcmp-2 > > FAIL: string/tst-size_t-memcpy > > FAIL: wcsmbs/tst-size_t-wmemcmp > > > > on x32 introduced by > > > > 8804157ad9 x86: Optimize memcmp SSE2 in memcmp.S > > 26b2478322 x86: Reduce code size of mem{move|pcpy|cpy}-ssse3 > > --- > > sysdeps/x86_64/memcmp.S | 4 ++++ > > sysdeps/x86_64/multiarch/memmove-ssse3.S | 4 ++++ > > 2 files changed, 8 insertions(+) > > > > diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S > > index b153694048..5718a7da86 100644 > > --- a/sysdeps/x86_64/memcmp.S > > +++ b/sysdeps/x86_64/memcmp.S > > @@ -46,6 +46,10 @@ > > > > .text > > ENTRY(MEMCMP) > > +# ifdef __ILP32__ > > + /* Clear the upper 32 bits. */ > > + movl %edx, %edx > > +# endif > > #ifdef USE_AS_WMEMCMP > > /* Use 0xffff to test for mismatches on pmovmskb bitmask. Store > > in ecx for code size. This is preferable to using `incw` as > > diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S > > index 215583e7bd..310ff62b86 100644 > > --- a/sysdeps/x86_64/multiarch/memmove-ssse3.S > > +++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S > > @@ -27,6 +27,10 @@ ENTRY(MEMMOVE_CHK) > > END(MEMMOVE_CHK) > > > > ENTRY_P2ALIGN(MEMMOVE, 6) > > +# ifdef __ILP32__ > > + /* Clear the upper 32 bits. */ > > + movl %edx, %edx > > +# endif > > movq %rdi, %rax > > L(start): > > cmpq $16, %rdx > > -- > > 2.35.1 > > > > LGTM. I would like to backport this patch to release branches. Any comments or objections? Conflict resolution patch attached. --Sunil
diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S index b153694048..5718a7da86 100644 --- a/sysdeps/x86_64/memcmp.S +++ b/sysdeps/x86_64/memcmp.S @@ -46,6 +46,10 @@ .text ENTRY(MEMCMP) +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +# endif #ifdef USE_AS_WMEMCMP /* Use 0xffff to test for mismatches on pmovmskb bitmask. Store in ecx for code size. This is preferable to using `incw` as diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S index 215583e7bd..310ff62b86 100644 --- a/sysdeps/x86_64/multiarch/memmove-ssse3.S +++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S @@ -27,6 +27,10 @@ ENTRY(MEMMOVE_CHK) END(MEMMOVE_CHK) ENTRY_P2ALIGN(MEMMOVE, 6) +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +# endif movq %rdi, %rax L(start): cmpq $16, %rdx