Message ID | 20220629221349.1242862-1-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v2,1/2] x86: Move mem{p}{mov|cpy}_{chk_}erms to its own file | expand |
On Wed, Jun 29, 2022 at 3:13 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > The primary memmove_{impl}_unaligned_erms implementations don't > interact with this function. Putting them in same file both > wastes space and unnecessarily bloats a hot code section. > --- > sysdeps/x86_64/multiarch/Makefile | 1 + > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++ > .../multiarch/memmove-vec-unaligned-erms.S | 50 ----------------- > 3 files changed, 54 insertions(+), 50 deletions(-) > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > index 666ee4d5d6..62a4d96fb8 100644 > --- a/sysdeps/x86_64/multiarch/Makefile > +++ b/sysdeps/x86_64/multiarch/Makefile > @@ -18,6 +18,7 @@ sysdep_routines += \ > memmove-avx-unaligned-erms-rtm \ > memmove-avx512-no-vzeroupper \ > memmove-avx512-unaligned-erms \ > + memmove-erms \ > memmove-evex-unaligned-erms \ > memmove-sse2-unaligned-erms \ > memmove-ssse3 \ > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > new file mode 100644 > index 0000000000..d98d21644b > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > @@ -0,0 +1,53 @@ Need copyright notice. > +#include <sysdep.h> > + > +#if defined USE_MULTIARCH && IS_IN (libc) > + .text > +ENTRY (__mempcpy_chk_erms) > + cmp %RDX_LP, %RCX_LP > + jb HIDDEN_JUMPTARGET (__chk_fail) > +END (__mempcpy_chk_erms) > + > +/* Only used to measure performance of REP MOVSB. */ > +ENTRY (__mempcpy_erms) > + mov %RDI_LP, %RAX_LP > + /* Skip zero length. */ > + test %RDX_LP, %RDX_LP > + jz 2f > + add %RDX_LP, %RAX_LP > + jmp L(start_movsb) > +END (__mempcpy_erms) > + > +ENTRY (__memmove_chk_erms) > + cmp %RDX_LP, %RCX_LP > + jb HIDDEN_JUMPTARGET (__chk_fail) > +END (__memmove_chk_erms) > + > +ENTRY (__memmove_erms) > + movq %rdi, %rax > + /* Skip zero length. */ > + test %RDX_LP, %RDX_LP > + jz 2f > +L(start_movsb): > + mov %RDX_LP, %RCX_LP > + cmp %RSI_LP, %RDI_LP > + jb 1f > + /* Source == destination is less common. */ > + je 2f > + lea (%rsi,%rcx), %RDX_LP > + cmp %RDX_LP, %RDI_LP > + jb L(movsb_backward) > +1: > + rep movsb > +2: > + ret > +L(movsb_backward): > + leaq -1(%rdi,%rcx), %rdi > + leaq -1(%rsi,%rcx), %rsi > + std > + rep movsb > + cld > + ret > +END (__memmove_erms) > +strong_alias (__memmove_erms, __memcpy_erms) > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > +#endif > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > index d1518b8bab..04747133b7 100644 > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > @@ -239,56 +239,6 @@ L(start): > #endif > #if defined USE_MULTIARCH && IS_IN (libc) > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > -# if VEC_SIZE == 16 > -ENTRY (__mempcpy_chk_erms) > - cmp %RDX_LP, %RCX_LP > - jb HIDDEN_JUMPTARGET (__chk_fail) > -END (__mempcpy_chk_erms) > - > -/* Only used to measure performance of REP MOVSB. */ > -ENTRY (__mempcpy_erms) > - mov %RDI_LP, %RAX_LP > - /* Skip zero length. */ > - test %RDX_LP, %RDX_LP > - jz 2f > - add %RDX_LP, %RAX_LP > - jmp L(start_movsb) > -END (__mempcpy_erms) > - > -ENTRY (__memmove_chk_erms) > - cmp %RDX_LP, %RCX_LP > - jb HIDDEN_JUMPTARGET (__chk_fail) > -END (__memmove_chk_erms) > - > -ENTRY (__memmove_erms) > - movq %rdi, %rax > - /* Skip zero length. */ > - test %RDX_LP, %RDX_LP > - jz 2f > -L(start_movsb): > - mov %RDX_LP, %RCX_LP > - cmp %RSI_LP, %RDI_LP > - jb 1f > - /* Source == destination is less common. */ > - je 2f > - lea (%rsi,%rcx), %RDX_LP > - cmp %RDX_LP, %RDI_LP > - jb L(movsb_backward) > -1: > - rep movsb > -2: > - ret > -L(movsb_backward): > - leaq -1(%rdi,%rcx), %rdi > - leaq -1(%rsi,%rcx), %rsi > - std > - rep movsb > - cld > - ret > -END (__memmove_erms) > -strong_alias (__memmove_erms, __memcpy_erms) > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > -# endif > > # ifdef SHARED > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > -- > 2.34.1 >
On Wed, Jun 29, 2022 at 3:20 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Wed, Jun 29, 2022 at 3:13 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > The primary memmove_{impl}_unaligned_erms implementations don't > > interact with this function. Putting them in same file both > > wastes space and unnecessarily bloats a hot code section. > > --- > > sysdeps/x86_64/multiarch/Makefile | 1 + > > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++ > > .../multiarch/memmove-vec-unaligned-erms.S | 50 ----------------- > > 3 files changed, 54 insertions(+), 50 deletions(-) > > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > > index 666ee4d5d6..62a4d96fb8 100644 > > --- a/sysdeps/x86_64/multiarch/Makefile > > +++ b/sysdeps/x86_64/multiarch/Makefile > > @@ -18,6 +18,7 @@ sysdep_routines += \ > > memmove-avx-unaligned-erms-rtm \ > > memmove-avx512-no-vzeroupper \ > > memmove-avx512-unaligned-erms \ > > + memmove-erms \ > > memmove-evex-unaligned-erms \ > > memmove-sse2-unaligned-erms \ > > memmove-ssse3 \ > > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > > new file mode 100644 > > index 0000000000..d98d21644b > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > > @@ -0,0 +1,53 @@ > > Need copyright notice. Fixed in V3. > > > +#include <sysdep.h> > > + > > +#if defined USE_MULTIARCH && IS_IN (libc) > > + .text > > +ENTRY (__mempcpy_chk_erms) > > + cmp %RDX_LP, %RCX_LP > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > +END (__mempcpy_chk_erms) > > + > > +/* Only used to measure performance of REP MOVSB. */ > > +ENTRY (__mempcpy_erms) > > + mov %RDI_LP, %RAX_LP > > + /* Skip zero length. */ > > + test %RDX_LP, %RDX_LP > > + jz 2f > > + add %RDX_LP, %RAX_LP > > + jmp L(start_movsb) > > +END (__mempcpy_erms) > > + > > +ENTRY (__memmove_chk_erms) > > + cmp %RDX_LP, %RCX_LP > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > +END (__memmove_chk_erms) > > + > > +ENTRY (__memmove_erms) > > + movq %rdi, %rax > > + /* Skip zero length. */ > > + test %RDX_LP, %RDX_LP > > + jz 2f > > +L(start_movsb): > > + mov %RDX_LP, %RCX_LP > > + cmp %RSI_LP, %RDI_LP > > + jb 1f > > + /* Source == destination is less common. */ > > + je 2f > > + lea (%rsi,%rcx), %RDX_LP > > + cmp %RDX_LP, %RDI_LP > > + jb L(movsb_backward) > > +1: > > + rep movsb > > +2: > > + ret > > +L(movsb_backward): > > + leaq -1(%rdi,%rcx), %rdi > > + leaq -1(%rsi,%rcx), %rsi > > + std > > + rep movsb > > + cld > > + ret > > +END (__memmove_erms) > > +strong_alias (__memmove_erms, __memcpy_erms) > > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > index d1518b8bab..04747133b7 100644 > > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > @@ -239,56 +239,6 @@ L(start): > > #endif > > #if defined USE_MULTIARCH && IS_IN (libc) > > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > > -# if VEC_SIZE == 16 > > -ENTRY (__mempcpy_chk_erms) > > - cmp %RDX_LP, %RCX_LP > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > -END (__mempcpy_chk_erms) > > - > > -/* Only used to measure performance of REP MOVSB. */ > > -ENTRY (__mempcpy_erms) > > - mov %RDI_LP, %RAX_LP > > - /* Skip zero length. */ > > - test %RDX_LP, %RDX_LP > > - jz 2f > > - add %RDX_LP, %RAX_LP > > - jmp L(start_movsb) > > -END (__mempcpy_erms) > > - > > -ENTRY (__memmove_chk_erms) > > - cmp %RDX_LP, %RCX_LP > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > -END (__memmove_chk_erms) > > - > > -ENTRY (__memmove_erms) > > - movq %rdi, %rax > > - /* Skip zero length. */ > > - test %RDX_LP, %RDX_LP > > - jz 2f > > -L(start_movsb): > > - mov %RDX_LP, %RCX_LP > > - cmp %RSI_LP, %RDI_LP > > - jb 1f > > - /* Source == destination is less common. */ > > - je 2f > > - lea (%rsi,%rcx), %RDX_LP > > - cmp %RDX_LP, %RDI_LP > > - jb L(movsb_backward) > > -1: > > - rep movsb > > -2: > > - ret > > -L(movsb_backward): > > - leaq -1(%rdi,%rcx), %rdi > > - leaq -1(%rsi,%rcx), %rsi > > - std > > - rep movsb > > - cld > > - ret > > -END (__memmove_erms) > > -strong_alias (__memmove_erms, __memcpy_erms) > > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > -# endif > > > > # ifdef SHARED > > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > > -- > > 2.34.1 > > > > > -- > H.J.
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 666ee4d5d6..62a4d96fb8 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -18,6 +18,7 @@ sysdep_routines += \ memmove-avx-unaligned-erms-rtm \ memmove-avx512-no-vzeroupper \ memmove-avx512-unaligned-erms \ + memmove-erms \ memmove-evex-unaligned-erms \ memmove-sse2-unaligned-erms \ memmove-ssse3 \ diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S new file mode 100644 index 0000000000..d98d21644b --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-erms.S @@ -0,0 +1,53 @@ +#include <sysdep.h> + +#if defined USE_MULTIARCH && IS_IN (libc) + .text +ENTRY (__mempcpy_chk_erms) + cmp %RDX_LP, %RCX_LP + jb HIDDEN_JUMPTARGET (__chk_fail) +END (__mempcpy_chk_erms) + +/* Only used to measure performance of REP MOVSB. */ +ENTRY (__mempcpy_erms) + mov %RDI_LP, %RAX_LP + /* Skip zero length. */ + test %RDX_LP, %RDX_LP + jz 2f + add %RDX_LP, %RAX_LP + jmp L(start_movsb) +END (__mempcpy_erms) + +ENTRY (__memmove_chk_erms) + cmp %RDX_LP, %RCX_LP + jb HIDDEN_JUMPTARGET (__chk_fail) +END (__memmove_chk_erms) + +ENTRY (__memmove_erms) + movq %rdi, %rax + /* Skip zero length. */ + test %RDX_LP, %RDX_LP + jz 2f +L(start_movsb): + mov %RDX_LP, %RCX_LP + cmp %RSI_LP, %RDI_LP + jb 1f + /* Source == destination is less common. */ + je 2f + lea (%rsi,%rcx), %RDX_LP + cmp %RDX_LP, %RDI_LP + jb L(movsb_backward) +1: + rep movsb +2: + ret +L(movsb_backward): + leaq -1(%rdi,%rcx), %rdi + leaq -1(%rsi,%rcx), %rsi + std + rep movsb + cld + ret +END (__memmove_erms) +strong_alias (__memmove_erms, __memcpy_erms) +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) +#endif diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S index d1518b8bab..04747133b7 100644 --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S @@ -239,56 +239,6 @@ L(start): #endif #if defined USE_MULTIARCH && IS_IN (libc) END (MEMMOVE_SYMBOL (__memmove, unaligned)) -# if VEC_SIZE == 16 -ENTRY (__mempcpy_chk_erms) - cmp %RDX_LP, %RCX_LP - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__mempcpy_chk_erms) - -/* Only used to measure performance of REP MOVSB. */ -ENTRY (__mempcpy_erms) - mov %RDI_LP, %RAX_LP - /* Skip zero length. */ - test %RDX_LP, %RDX_LP - jz 2f - add %RDX_LP, %RAX_LP - jmp L(start_movsb) -END (__mempcpy_erms) - -ENTRY (__memmove_chk_erms) - cmp %RDX_LP, %RCX_LP - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memmove_chk_erms) - -ENTRY (__memmove_erms) - movq %rdi, %rax - /* Skip zero length. */ - test %RDX_LP, %RDX_LP - jz 2f -L(start_movsb): - mov %RDX_LP, %RCX_LP - cmp %RSI_LP, %RDI_LP - jb 1f - /* Source == destination is less common. */ - je 2f - lea (%rsi,%rcx), %RDX_LP - cmp %RDX_LP, %RDI_LP - jb L(movsb_backward) -1: - rep movsb -2: - ret -L(movsb_backward): - leaq -1(%rdi,%rcx), %rdi - leaq -1(%rsi,%rcx), %rsi - std - rep movsb - cld - ret -END (__memmove_erms) -strong_alias (__memmove_erms, __memcpy_erms) -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) -# endif # ifdef SHARED ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))