Message ID | 20220628152757.17922-1-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1,1/2] x86: Move mem{p}{mov|cpy}_{chk_}erms to its own file | expand |
On Tue, Jun 28, 2022 at 8:28 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > The primary memmove_{impl}_unaligned_erms implementations don't > interact with this function. Putting them in same file both > wastes space and unnecessarily bloats a hot code section. > --- > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++ > .../multiarch/memmove-vec-unaligned-erms.S | 50 ----------------- > 2 files changed, 53 insertions(+), 50 deletions(-) > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > new file mode 100644 > index 0000000000..d98d21644b > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > @@ -0,0 +1,53 @@ > +#include <sysdep.h> > + > +#if defined USE_MULTIARCH && IS_IN (libc) > + .text > +ENTRY (__mempcpy_chk_erms) > + cmp %RDX_LP, %RCX_LP > + jb HIDDEN_JUMPTARGET (__chk_fail) > +END (__mempcpy_chk_erms) > + > +/* Only used to measure performance of REP MOVSB. */ > +ENTRY (__mempcpy_erms) > + mov %RDI_LP, %RAX_LP > + /* Skip zero length. */ > + test %RDX_LP, %RDX_LP > + jz 2f > + add %RDX_LP, %RAX_LP > + jmp L(start_movsb) > +END (__mempcpy_erms) > + > +ENTRY (__memmove_chk_erms) > + cmp %RDX_LP, %RCX_LP > + jb HIDDEN_JUMPTARGET (__chk_fail) > +END (__memmove_chk_erms) > + > +ENTRY (__memmove_erms) > + movq %rdi, %rax > + /* Skip zero length. */ > + test %RDX_LP, %RDX_LP > + jz 2f > +L(start_movsb): > + mov %RDX_LP, %RCX_LP > + cmp %RSI_LP, %RDI_LP > + jb 1f > + /* Source == destination is less common. */ > + je 2f > + lea (%rsi,%rcx), %RDX_LP > + cmp %RDX_LP, %RDI_LP > + jb L(movsb_backward) > +1: > + rep movsb > +2: > + ret > +L(movsb_backward): > + leaq -1(%rdi,%rcx), %rdi > + leaq -1(%rsi,%rcx), %rsi > + std > + rep movsb > + cld > + ret > +END (__memmove_erms) > +strong_alias (__memmove_erms, __memcpy_erms) > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > +#endif > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > index d1518b8bab..04747133b7 100644 > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > @@ -239,56 +239,6 @@ L(start): > #endif > #if defined USE_MULTIARCH && IS_IN (libc) > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > -# if VEC_SIZE == 16 > -ENTRY (__mempcpy_chk_erms) > - cmp %RDX_LP, %RCX_LP > - jb HIDDEN_JUMPTARGET (__chk_fail) > -END (__mempcpy_chk_erms) > - > -/* Only used to measure performance of REP MOVSB. */ > -ENTRY (__mempcpy_erms) > - mov %RDI_LP, %RAX_LP > - /* Skip zero length. */ > - test %RDX_LP, %RDX_LP > - jz 2f > - add %RDX_LP, %RAX_LP > - jmp L(start_movsb) > -END (__mempcpy_erms) > - > -ENTRY (__memmove_chk_erms) > - cmp %RDX_LP, %RCX_LP > - jb HIDDEN_JUMPTARGET (__chk_fail) > -END (__memmove_chk_erms) > - > -ENTRY (__memmove_erms) > - movq %rdi, %rax > - /* Skip zero length. */ > - test %RDX_LP, %RDX_LP > - jz 2f > -L(start_movsb): > - mov %RDX_LP, %RCX_LP > - cmp %RSI_LP, %RDI_LP > - jb 1f > - /* Source == destination is less common. */ > - je 2f > - lea (%rsi,%rcx), %RDX_LP > - cmp %RDX_LP, %RDI_LP > - jb L(movsb_backward) > -1: > - rep movsb > -2: > - ret > -L(movsb_backward): > - leaq -1(%rdi,%rcx), %rdi > - leaq -1(%rsi,%rcx), %rsi > - std > - rep movsb > - cld > - ret > -END (__memmove_erms) > -strong_alias (__memmove_erms, __memcpy_erms) > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > -# endif > > # ifdef SHARED > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > -- > 2.34.1 > Please make a standalone patch.
On Wed, Jun 29, 2022 at 12:32 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Tue, Jun 28, 2022 at 8:28 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > The primary memmove_{impl}_unaligned_erms implementations don't > > interact with this function. Putting them in same file both > > wastes space and unnecessarily bloats a hot code section. > > --- > > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++ > > .../multiarch/memmove-vec-unaligned-erms.S | 50 ----------------- > > 2 files changed, 53 insertions(+), 50 deletions(-) > > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > > new file mode 100644 > > index 0000000000..d98d21644b > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > > @@ -0,0 +1,53 @@ > > +#include <sysdep.h> > > + > > +#if defined USE_MULTIARCH && IS_IN (libc) > > + .text > > +ENTRY (__mempcpy_chk_erms) > > + cmp %RDX_LP, %RCX_LP > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > +END (__mempcpy_chk_erms) > > + > > +/* Only used to measure performance of REP MOVSB. */ > > +ENTRY (__mempcpy_erms) > > + mov %RDI_LP, %RAX_LP > > + /* Skip zero length. */ > > + test %RDX_LP, %RDX_LP > > + jz 2f > > + add %RDX_LP, %RAX_LP > > + jmp L(start_movsb) > > +END (__mempcpy_erms) > > + > > +ENTRY (__memmove_chk_erms) > > + cmp %RDX_LP, %RCX_LP > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > +END (__memmove_chk_erms) > > + > > +ENTRY (__memmove_erms) > > + movq %rdi, %rax > > + /* Skip zero length. */ > > + test %RDX_LP, %RDX_LP > > + jz 2f > > +L(start_movsb): > > + mov %RDX_LP, %RCX_LP > > + cmp %RSI_LP, %RDI_LP > > + jb 1f > > + /* Source == destination is less common. */ > > + je 2f > > + lea (%rsi,%rcx), %RDX_LP > > + cmp %RDX_LP, %RDI_LP > > + jb L(movsb_backward) > > +1: > > + rep movsb > > +2: > > + ret > > +L(movsb_backward): > > + leaq -1(%rdi,%rcx), %rdi > > + leaq -1(%rsi,%rcx), %rsi > > + std > > + rep movsb > > + cld > > + ret > > +END (__memmove_erms) > > +strong_alias (__memmove_erms, __memcpy_erms) > > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > index d1518b8bab..04747133b7 100644 > > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > @@ -239,56 +239,6 @@ L(start): > > #endif > > #if defined USE_MULTIARCH && IS_IN (libc) > > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > > -# if VEC_SIZE == 16 > > -ENTRY (__mempcpy_chk_erms) > > - cmp %RDX_LP, %RCX_LP > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > -END (__mempcpy_chk_erms) > > - > > -/* Only used to measure performance of REP MOVSB. */ > > -ENTRY (__mempcpy_erms) > > - mov %RDI_LP, %RAX_LP > > - /* Skip zero length. */ > > - test %RDX_LP, %RDX_LP > > - jz 2f > > - add %RDX_LP, %RAX_LP > > - jmp L(start_movsb) > > -END (__mempcpy_erms) > > - > > -ENTRY (__memmove_chk_erms) > > - cmp %RDX_LP, %RCX_LP > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > -END (__memmove_chk_erms) > > - > > -ENTRY (__memmove_erms) > > - movq %rdi, %rax > > - /* Skip zero length. */ > > - test %RDX_LP, %RDX_LP > > - jz 2f > > -L(start_movsb): > > - mov %RDX_LP, %RCX_LP > > - cmp %RSI_LP, %RDI_LP > > - jb 1f > > - /* Source == destination is less common. */ > > - je 2f > > - lea (%rsi,%rcx), %RDX_LP > > - cmp %RDX_LP, %RDI_LP > > - jb L(movsb_backward) > > -1: > > - rep movsb > > -2: > > - ret > > -L(movsb_backward): > > - leaq -1(%rdi,%rcx), %rdi > > - leaq -1(%rsi,%rcx), %rsi > > - std > > - rep movsb > > - cld > > - ret > > -END (__memmove_erms) > > -strong_alias (__memmove_erms, __memcpy_erms) > > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > -# endif > > > > # ifdef SHARED > > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > > -- > > 2.34.1 > > > > Please make a standalone patch. The memmove isa raising change has a dependency on it hence the series. Submit this first then rebsubmit memmove? Same for memset-erms / memset-isa raising? > > -- > H.J.
On Wed, Jun 29, 2022 at 12:34 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Wed, Jun 29, 2022 at 12:32 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Tue, Jun 28, 2022 at 8:28 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > The primary memmove_{impl}_unaligned_erms implementations don't > > > interact with this function. Putting them in same file both > > > wastes space and unnecessarily bloats a hot code section. > > > --- > > > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++ > > > .../multiarch/memmove-vec-unaligned-erms.S | 50 ----------------- > > > 2 files changed, 53 insertions(+), 50 deletions(-) > > > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > > > new file mode 100644 > > > index 0000000000..d98d21644b > > > --- /dev/null > > > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > > > @@ -0,0 +1,53 @@ > > > +#include <sysdep.h> > > > + > > > +#if defined USE_MULTIARCH && IS_IN (libc) > > > + .text > > > +ENTRY (__mempcpy_chk_erms) > > > + cmp %RDX_LP, %RCX_LP > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > +END (__mempcpy_chk_erms) > > > + > > > +/* Only used to measure performance of REP MOVSB. */ > > > +ENTRY (__mempcpy_erms) > > > + mov %RDI_LP, %RAX_LP > > > + /* Skip zero length. */ > > > + test %RDX_LP, %RDX_LP > > > + jz 2f > > > + add %RDX_LP, %RAX_LP > > > + jmp L(start_movsb) > > > +END (__mempcpy_erms) > > > + > > > +ENTRY (__memmove_chk_erms) > > > + cmp %RDX_LP, %RCX_LP > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > +END (__memmove_chk_erms) > > > + > > > +ENTRY (__memmove_erms) > > > + movq %rdi, %rax > > > + /* Skip zero length. */ > > > + test %RDX_LP, %RDX_LP > > > + jz 2f > > > +L(start_movsb): > > > + mov %RDX_LP, %RCX_LP > > > + cmp %RSI_LP, %RDI_LP > > > + jb 1f > > > + /* Source == destination is less common. */ > > > + je 2f > > > + lea (%rsi,%rcx), %RDX_LP > > > + cmp %RDX_LP, %RDI_LP > > > + jb L(movsb_backward) > > > +1: > > > + rep movsb > > > +2: > > > + ret > > > +L(movsb_backward): > > > + leaq -1(%rdi,%rcx), %rdi > > > + leaq -1(%rsi,%rcx), %rsi > > > + std > > > + rep movsb > > > + cld > > > + ret > > > +END (__memmove_erms) > > > +strong_alias (__memmove_erms, __memcpy_erms) > > > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > +#endif > > > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > index d1518b8bab..04747133b7 100644 > > > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > @@ -239,56 +239,6 @@ L(start): > > > #endif > > > #if defined USE_MULTIARCH && IS_IN (libc) > > > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > > > -# if VEC_SIZE == 16 > > > -ENTRY (__mempcpy_chk_erms) > > > - cmp %RDX_LP, %RCX_LP > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > -END (__mempcpy_chk_erms) > > > - > > > -/* Only used to measure performance of REP MOVSB. */ > > > -ENTRY (__mempcpy_erms) > > > - mov %RDI_LP, %RAX_LP > > > - /* Skip zero length. */ > > > - test %RDX_LP, %RDX_LP > > > - jz 2f > > > - add %RDX_LP, %RAX_LP > > > - jmp L(start_movsb) > > > -END (__mempcpy_erms) > > > - > > > -ENTRY (__memmove_chk_erms) > > > - cmp %RDX_LP, %RCX_LP > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > -END (__memmove_chk_erms) > > > - > > > -ENTRY (__memmove_erms) > > > - movq %rdi, %rax > > > - /* Skip zero length. */ > > > - test %RDX_LP, %RDX_LP > > > - jz 2f > > > -L(start_movsb): > > > - mov %RDX_LP, %RCX_LP > > > - cmp %RSI_LP, %RDI_LP > > > - jb 1f > > > - /* Source == destination is less common. */ > > > - je 2f > > > - lea (%rsi,%rcx), %RDX_LP > > > - cmp %RDX_LP, %RDI_LP > > > - jb L(movsb_backward) > > > -1: > > > - rep movsb > > > -2: > > > - ret > > > -L(movsb_backward): > > > - leaq -1(%rdi,%rcx), %rdi > > > - leaq -1(%rsi,%rcx), %rsi > > > - std > > > - rep movsb > > > - cld > > > - ret > > > -END (__memmove_erms) > > > -strong_alias (__memmove_erms, __memcpy_erms) > > > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > -# endif > > > > > > # ifdef SHARED > > > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > > > -- > > > 2.34.1 > > > > > > > Please make a standalone patch. > > The memmove isa raising change has a dependency on it hence the series. > Submit this first then rebsubmit memmove? Same for memset-erms / > memset-isa raising? Yes. Moving the erms version to a separate file should be standalone. Each patch should build. You should combine 2 memset-erms changes in 2 memset patches into a single patch. > > > > -- > > H.J.
On Wed, Jun 29, 2022 at 12:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Wed, Jun 29, 2022 at 12:34 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Wed, Jun 29, 2022 at 12:32 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > On Tue, Jun 28, 2022 at 8:28 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > The primary memmove_{impl}_unaligned_erms implementations don't > > > > interact with this function. Putting them in same file both > > > > wastes space and unnecessarily bloats a hot code section. > > > > --- > > > > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++ > > > > .../multiarch/memmove-vec-unaligned-erms.S | 50 ----------------- > > > > 2 files changed, 53 insertions(+), 50 deletions(-) > > > > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > > > > new file mode 100644 > > > > index 0000000000..d98d21644b > > > > --- /dev/null > > > > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > > > > @@ -0,0 +1,53 @@ > > > > +#include <sysdep.h> > > > > + > > > > +#if defined USE_MULTIARCH && IS_IN (libc) > > > > + .text > > > > +ENTRY (__mempcpy_chk_erms) > > > > + cmp %RDX_LP, %RCX_LP > > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > > +END (__mempcpy_chk_erms) > > > > + > > > > +/* Only used to measure performance of REP MOVSB. */ > > > > +ENTRY (__mempcpy_erms) > > > > + mov %RDI_LP, %RAX_LP > > > > + /* Skip zero length. */ > > > > + test %RDX_LP, %RDX_LP > > > > + jz 2f > > > > + add %RDX_LP, %RAX_LP > > > > + jmp L(start_movsb) > > > > +END (__mempcpy_erms) > > > > + > > > > +ENTRY (__memmove_chk_erms) > > > > + cmp %RDX_LP, %RCX_LP > > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > > +END (__memmove_chk_erms) > > > > + > > > > +ENTRY (__memmove_erms) > > > > + movq %rdi, %rax > > > > + /* Skip zero length. */ > > > > + test %RDX_LP, %RDX_LP > > > > + jz 2f > > > > +L(start_movsb): > > > > + mov %RDX_LP, %RCX_LP > > > > + cmp %RSI_LP, %RDI_LP > > > > + jb 1f > > > > + /* Source == destination is less common. */ > > > > + je 2f > > > > + lea (%rsi,%rcx), %RDX_LP > > > > + cmp %RDX_LP, %RDI_LP > > > > + jb L(movsb_backward) > > > > +1: > > > > + rep movsb > > > > +2: > > > > + ret > > > > +L(movsb_backward): > > > > + leaq -1(%rdi,%rcx), %rdi > > > > + leaq -1(%rsi,%rcx), %rsi > > > > + std > > > > + rep movsb > > > > + cld > > > > + ret > > > > +END (__memmove_erms) > > > > +strong_alias (__memmove_erms, __memcpy_erms) > > > > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > > +#endif > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > index d1518b8bab..04747133b7 100644 > > > > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > @@ -239,56 +239,6 @@ L(start): > > > > #endif > > > > #if defined USE_MULTIARCH && IS_IN (libc) > > > > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > > > > -# if VEC_SIZE == 16 > > > > -ENTRY (__mempcpy_chk_erms) > > > > - cmp %RDX_LP, %RCX_LP > > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > > -END (__mempcpy_chk_erms) > > > > - > > > > -/* Only used to measure performance of REP MOVSB. */ > > > > -ENTRY (__mempcpy_erms) > > > > - mov %RDI_LP, %RAX_LP > > > > - /* Skip zero length. */ > > > > - test %RDX_LP, %RDX_LP > > > > - jz 2f > > > > - add %RDX_LP, %RAX_LP > > > > - jmp L(start_movsb) > > > > -END (__mempcpy_erms) > > > > - > > > > -ENTRY (__memmove_chk_erms) > > > > - cmp %RDX_LP, %RCX_LP > > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > > -END (__memmove_chk_erms) > > > > - > > > > -ENTRY (__memmove_erms) > > > > - movq %rdi, %rax > > > > - /* Skip zero length. */ > > > > - test %RDX_LP, %RDX_LP > > > > - jz 2f > > > > -L(start_movsb): > > > > - mov %RDX_LP, %RCX_LP > > > > - cmp %RSI_LP, %RDI_LP > > > > - jb 1f > > > > - /* Source == destination is less common. */ > > > > - je 2f > > > > - lea (%rsi,%rcx), %RDX_LP > > > > - cmp %RDX_LP, %RDI_LP > > > > - jb L(movsb_backward) > > > > -1: > > > > - rep movsb > > > > -2: > > > > - ret > > > > -L(movsb_backward): > > > > - leaq -1(%rdi,%rcx), %rdi > > > > - leaq -1(%rsi,%rcx), %rsi > > > > - std > > > > - rep movsb > > > > - cld > > > > - ret > > > > -END (__memmove_erms) > > > > -strong_alias (__memmove_erms, __memcpy_erms) > > > > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > > -# endif > > > > > > > > # ifdef SHARED > > > > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > > > > -- > > > > 2.34.1 > > > > > > > > > > Please make a standalone patch. > > > > The memmove isa raising change has a dependency on it hence the series. > > Submit this first then rebsubmit memmove? Same for memset-erms / > > memset-isa raising? > > Yes. Moving the erms version to a separate file should be standalone. > Each patch should build. The reason it doesn't is that the isa raising for memmove-vec... would require handling the memmove_erms case for it to build otherwise ISA V3/V4 would leave memmove_erms as undefined symbols in the multiarch build. > You should combine 2 memset-erms changes > in 2 memset patches into a single patch. ? > > > > > > > -- > > > H.J. > > > > -- > H.J.
On Wed, Jun 29, 2022 at 12:48 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Wed, Jun 29, 2022 at 12:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Wed, Jun 29, 2022 at 12:34 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > On Wed, Jun 29, 2022 at 12:32 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > On Tue, Jun 28, 2022 at 8:28 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > > > The primary memmove_{impl}_unaligned_erms implementations don't > > > > > interact with this function. Putting them in same file both > > > > > wastes space and unnecessarily bloats a hot code section. > > > > > --- > > > > > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++ > > > > > .../multiarch/memmove-vec-unaligned-erms.S | 50 ----------------- > > > > > 2 files changed, 53 insertions(+), 50 deletions(-) > > > > > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > > > > > new file mode 100644 > > > > > index 0000000000..d98d21644b > > > > > --- /dev/null > > > > > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > > > > > @@ -0,0 +1,53 @@ > > > > > +#include <sysdep.h> > > > > > + > > > > > +#if defined USE_MULTIARCH && IS_IN (libc) > > > > > + .text > > > > > +ENTRY (__mempcpy_chk_erms) > > > > > + cmp %RDX_LP, %RCX_LP > > > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > +END (__mempcpy_chk_erms) > > > > > + > > > > > +/* Only used to measure performance of REP MOVSB. */ > > > > > +ENTRY (__mempcpy_erms) > > > > > + mov %RDI_LP, %RAX_LP > > > > > + /* Skip zero length. */ > > > > > + test %RDX_LP, %RDX_LP > > > > > + jz 2f > > > > > + add %RDX_LP, %RAX_LP > > > > > + jmp L(start_movsb) > > > > > +END (__mempcpy_erms) > > > > > + > > > > > +ENTRY (__memmove_chk_erms) > > > > > + cmp %RDX_LP, %RCX_LP > > > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > +END (__memmove_chk_erms) > > > > > + > > > > > +ENTRY (__memmove_erms) > > > > > + movq %rdi, %rax > > > > > + /* Skip zero length. */ > > > > > + test %RDX_LP, %RDX_LP > > > > > + jz 2f > > > > > +L(start_movsb): > > > > > + mov %RDX_LP, %RCX_LP > > > > > + cmp %RSI_LP, %RDI_LP > > > > > + jb 1f > > > > > + /* Source == destination is less common. */ > > > > > + je 2f > > > > > + lea (%rsi,%rcx), %RDX_LP > > > > > + cmp %RDX_LP, %RDI_LP > > > > > + jb L(movsb_backward) > > > > > +1: > > > > > + rep movsb > > > > > +2: > > > > > + ret > > > > > +L(movsb_backward): > > > > > + leaq -1(%rdi,%rcx), %rdi > > > > > + leaq -1(%rsi,%rcx), %rsi > > > > > + std > > > > > + rep movsb > > > > > + cld > > > > > + ret > > > > > +END (__memmove_erms) > > > > > +strong_alias (__memmove_erms, __memcpy_erms) > > > > > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > > > +#endif > > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > index d1518b8bab..04747133b7 100644 > > > > > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > @@ -239,56 +239,6 @@ L(start): > > > > > #endif > > > > > #if defined USE_MULTIARCH && IS_IN (libc) > > > > > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > > > > > -# if VEC_SIZE == 16 > > > > > -ENTRY (__mempcpy_chk_erms) > > > > > - cmp %RDX_LP, %RCX_LP > > > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > -END (__mempcpy_chk_erms) > > > > > - > > > > > -/* Only used to measure performance of REP MOVSB. */ > > > > > -ENTRY (__mempcpy_erms) > > > > > - mov %RDI_LP, %RAX_LP > > > > > - /* Skip zero length. */ > > > > > - test %RDX_LP, %RDX_LP > > > > > - jz 2f > > > > > - add %RDX_LP, %RAX_LP > > > > > - jmp L(start_movsb) > > > > > -END (__mempcpy_erms) > > > > > - > > > > > -ENTRY (__memmove_chk_erms) > > > > > - cmp %RDX_LP, %RCX_LP > > > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > -END (__memmove_chk_erms) > > > > > - > > > > > -ENTRY (__memmove_erms) > > > > > - movq %rdi, %rax > > > > > - /* Skip zero length. */ > > > > > - test %RDX_LP, %RDX_LP > > > > > - jz 2f > > > > > -L(start_movsb): > > > > > - mov %RDX_LP, %RCX_LP > > > > > - cmp %RSI_LP, %RDI_LP > > > > > - jb 1f > > > > > - /* Source == destination is less common. */ > > > > > - je 2f > > > > > - lea (%rsi,%rcx), %RDX_LP > > > > > - cmp %RDX_LP, %RDI_LP > > > > > - jb L(movsb_backward) > > > > > -1: > > > > > - rep movsb > > > > > -2: > > > > > - ret > > > > > -L(movsb_backward): > > > > > - leaq -1(%rdi,%rcx), %rdi > > > > > - leaq -1(%rsi,%rcx), %rsi > > > > > - std > > > > > - rep movsb > > > > > - cld > > > > > - ret > > > > > -END (__memmove_erms) > > > > > -strong_alias (__memmove_erms, __memcpy_erms) > > > > > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > > > -# endif > > > > > > > > > > # ifdef SHARED > > > > > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > > > > > -- > > > > > 2.34.1 > > > > > > > > > > > > > Please make a standalone patch. > > > > > > The memmove isa raising change has a dependency on it hence the series. > > > Submit this first then rebsubmit memmove? Same for memset-erms / > > > memset-isa raising? > > > > Yes. Moving the erms version to a separate file should be standalone. > > Each patch should build. > > The reason it doesn't is that the isa raising for memmove-vec... > would require handling the memmove_erms case for it to build > otherwise ISA V3/V4 would leave memmove_erms > as undefined symbols in the multiarch build. The first patch in your patch set isn't standalone since Makefile change is in the second patch. > > > > You should combine 2 memset-erms changes > > in 2 memset patches into a single patch. > ? > The first patch moves memset-erms to the file end and the second patch moves it to another file.
On Wed, Jun 29, 2022 at 12:59 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Wed, Jun 29, 2022 at 12:48 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Wed, Jun 29, 2022 at 12:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > On Wed, Jun 29, 2022 at 12:34 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > On Wed, Jun 29, 2022 at 12:32 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > On Tue, Jun 28, 2022 at 8:28 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > > > > > The primary memmove_{impl}_unaligned_erms implementations don't > > > > > > interact with this function. Putting them in same file both > > > > > > wastes space and unnecessarily bloats a hot code section. > > > > > > --- > > > > > > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++ > > > > > > .../multiarch/memmove-vec-unaligned-erms.S | 50 ----------------- > > > > > > 2 files changed, 53 insertions(+), 50 deletions(-) > > > > > > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > > > > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > new file mode 100644 > > > > > > index 0000000000..d98d21644b > > > > > > --- /dev/null > > > > > > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > @@ -0,0 +1,53 @@ > > > > > > +#include <sysdep.h> > > > > > > + > > > > > > +#if defined USE_MULTIARCH && IS_IN (libc) > > > > > > + .text > > > > > > +ENTRY (__mempcpy_chk_erms) > > > > > > + cmp %RDX_LP, %RCX_LP > > > > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > +END (__mempcpy_chk_erms) > > > > > > + > > > > > > +/* Only used to measure performance of REP MOVSB. */ > > > > > > +ENTRY (__mempcpy_erms) > > > > > > + mov %RDI_LP, %RAX_LP > > > > > > + /* Skip zero length. */ > > > > > > + test %RDX_LP, %RDX_LP > > > > > > + jz 2f > > > > > > + add %RDX_LP, %RAX_LP > > > > > > + jmp L(start_movsb) > > > > > > +END (__mempcpy_erms) > > > > > > + > > > > > > +ENTRY (__memmove_chk_erms) > > > > > > + cmp %RDX_LP, %RCX_LP > > > > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > +END (__memmove_chk_erms) > > > > > > + > > > > > > +ENTRY (__memmove_erms) > > > > > > + movq %rdi, %rax > > > > > > + /* Skip zero length. */ > > > > > > + test %RDX_LP, %RDX_LP > > > > > > + jz 2f > > > > > > +L(start_movsb): > > > > > > + mov %RDX_LP, %RCX_LP > > > > > > + cmp %RSI_LP, %RDI_LP > > > > > > + jb 1f > > > > > > + /* Source == destination is less common. */ > > > > > > + je 2f > > > > > > + lea (%rsi,%rcx), %RDX_LP > > > > > > + cmp %RDX_LP, %RDI_LP > > > > > > + jb L(movsb_backward) > > > > > > +1: > > > > > > + rep movsb > > > > > > +2: > > > > > > + ret > > > > > > +L(movsb_backward): > > > > > > + leaq -1(%rdi,%rcx), %rdi > > > > > > + leaq -1(%rsi,%rcx), %rsi > > > > > > + std > > > > > > + rep movsb > > > > > > + cld > > > > > > + ret > > > > > > +END (__memmove_erms) > > > > > > +strong_alias (__memmove_erms, __memcpy_erms) > > > > > > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > > > > +#endif > > > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > > index d1518b8bab..04747133b7 100644 > > > > > > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > > @@ -239,56 +239,6 @@ L(start): > > > > > > #endif > > > > > > #if defined USE_MULTIARCH && IS_IN (libc) > > > > > > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > > > > > > -# if VEC_SIZE == 16 > > > > > > -ENTRY (__mempcpy_chk_erms) > > > > > > - cmp %RDX_LP, %RCX_LP > > > > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > -END (__mempcpy_chk_erms) > > > > > > - > > > > > > -/* Only used to measure performance of REP MOVSB. */ > > > > > > -ENTRY (__mempcpy_erms) > > > > > > - mov %RDI_LP, %RAX_LP > > > > > > - /* Skip zero length. */ > > > > > > - test %RDX_LP, %RDX_LP > > > > > > - jz 2f > > > > > > - add %RDX_LP, %RAX_LP > > > > > > - jmp L(start_movsb) > > > > > > -END (__mempcpy_erms) > > > > > > - > > > > > > -ENTRY (__memmove_chk_erms) > > > > > > - cmp %RDX_LP, %RCX_LP > > > > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > -END (__memmove_chk_erms) > > > > > > - > > > > > > -ENTRY (__memmove_erms) > > > > > > - movq %rdi, %rax > > > > > > - /* Skip zero length. */ > > > > > > - test %RDX_LP, %RDX_LP > > > > > > - jz 2f > > > > > > -L(start_movsb): > > > > > > - mov %RDX_LP, %RCX_LP > > > > > > - cmp %RSI_LP, %RDI_LP > > > > > > - jb 1f > > > > > > - /* Source == destination is less common. */ > > > > > > - je 2f > > > > > > - lea (%rsi,%rcx), %RDX_LP > > > > > > - cmp %RDX_LP, %RDI_LP > > > > > > - jb L(movsb_backward) > > > > > > -1: > > > > > > - rep movsb > > > > > > -2: > > > > > > - ret > > > > > > -L(movsb_backward): > > > > > > - leaq -1(%rdi,%rcx), %rdi > > > > > > - leaq -1(%rsi,%rcx), %rsi > > > > > > - std > > > > > > - rep movsb > > > > > > - cld > > > > > > - ret > > > > > > -END (__memmove_erms) > > > > > > -strong_alias (__memmove_erms, __memcpy_erms) > > > > > > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > > > > -# endif > > > > > > > > > > > > # ifdef SHARED > > > > > > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > > > > > > -- > > > > > > 2.34.1 > > > > > > > > > > > > > > > > Please make a standalone patch. > > > > > > > > The memmove isa raising change has a dependency on it hence the series. > > > > Submit this first then rebsubmit memmove? Same for memset-erms / > > > > memset-isa raising? > > > > > > Yes. Moving the erms version to a separate file should be standalone. > > > Each patch should build. > > > > The reason it doesn't is that the isa raising for memmove-vec... > > would require handling the memmove_erms case for it to build > > otherwise ISA V3/V4 would leave memmove_erms > > as undefined symbols in the multiarch build. > > The first patch in your patch set isn't standalone since Makefile > change is in the second patch. Err I think there is a misunderstanding. I am going to fixup to make the memmove_erms / memset_erms changes standalone. My point is the following ISA raising patches are dependent on the respective erms functions being moved. Hences its a series. > > > > > > > > You should combine 2 memset-erms changes > > > in 2 memset patches into a single patch. > > ? > > > > The first patch moves memset-erms to the file end > and the second patch moves it to another file. > > -- > H.J.
On Wed, Jun 29, 2022 at 1:03 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Wed, Jun 29, 2022 at 12:59 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Wed, Jun 29, 2022 at 12:48 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > On Wed, Jun 29, 2022 at 12:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > On Wed, Jun 29, 2022 at 12:34 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > > > On Wed, Jun 29, 2022 at 12:32 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > > > On Tue, Jun 28, 2022 at 8:28 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > > > > > > > The primary memmove_{impl}_unaligned_erms implementations don't > > > > > > > interact with this function. Putting them in same file both > > > > > > > wastes space and unnecessarily bloats a hot code section. > > > > > > > --- > > > > > > > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++ > > > > > > > .../multiarch/memmove-vec-unaligned-erms.S | 50 ----------------- > > > > > > > 2 files changed, 53 insertions(+), 50 deletions(-) > > > > > > > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > > > > > > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > > new file mode 100644 > > > > > > > index 0000000000..d98d21644b > > > > > > > --- /dev/null > > > > > > > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > > @@ -0,0 +1,53 @@ > > > > > > > +#include <sysdep.h> > > > > > > > + > > > > > > > +#if defined USE_MULTIARCH && IS_IN (libc) > > > > > > > + .text > > > > > > > +ENTRY (__mempcpy_chk_erms) > > > > > > > + cmp %RDX_LP, %RCX_LP > > > > > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > > +END (__mempcpy_chk_erms) > > > > > > > + > > > > > > > +/* Only used to measure performance of REP MOVSB. */ > > > > > > > +ENTRY (__mempcpy_erms) > > > > > > > + mov %RDI_LP, %RAX_LP > > > > > > > + /* Skip zero length. */ > > > > > > > + test %RDX_LP, %RDX_LP > > > > > > > + jz 2f > > > > > > > + add %RDX_LP, %RAX_LP > > > > > > > + jmp L(start_movsb) > > > > > > > +END (__mempcpy_erms) > > > > > > > + > > > > > > > +ENTRY (__memmove_chk_erms) > > > > > > > + cmp %RDX_LP, %RCX_LP > > > > > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > > +END (__memmove_chk_erms) > > > > > > > + > > > > > > > +ENTRY (__memmove_erms) > > > > > > > + movq %rdi, %rax > > > > > > > + /* Skip zero length. */ > > > > > > > + test %RDX_LP, %RDX_LP > > > > > > > + jz 2f > > > > > > > +L(start_movsb): > > > > > > > + mov %RDX_LP, %RCX_LP > > > > > > > + cmp %RSI_LP, %RDI_LP > > > > > > > + jb 1f > > > > > > > + /* Source == destination is less common. */ > > > > > > > + je 2f > > > > > > > + lea (%rsi,%rcx), %RDX_LP > > > > > > > + cmp %RDX_LP, %RDI_LP > > > > > > > + jb L(movsb_backward) > > > > > > > +1: > > > > > > > + rep movsb > > > > > > > +2: > > > > > > > + ret > > > > > > > +L(movsb_backward): > > > > > > > + leaq -1(%rdi,%rcx), %rdi > > > > > > > + leaq -1(%rsi,%rcx), %rsi > > > > > > > + std > > > > > > > + rep movsb > > > > > > > + cld > > > > > > > + ret > > > > > > > +END (__memmove_erms) > > > > > > > +strong_alias (__memmove_erms, __memcpy_erms) > > > > > > > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > > > > > +#endif > > > > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > > > index d1518b8bab..04747133b7 100644 > > > > > > > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > > > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > > > @@ -239,56 +239,6 @@ L(start): > > > > > > > #endif > > > > > > > #if defined USE_MULTIARCH && IS_IN (libc) > > > > > > > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > > > > > > > -# if VEC_SIZE == 16 > > > > > > > -ENTRY (__mempcpy_chk_erms) > > > > > > > - cmp %RDX_LP, %RCX_LP > > > > > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > > -END (__mempcpy_chk_erms) > > > > > > > - > > > > > > > -/* Only used to measure performance of REP MOVSB. */ > > > > > > > -ENTRY (__mempcpy_erms) > > > > > > > - mov %RDI_LP, %RAX_LP > > > > > > > - /* Skip zero length. */ > > > > > > > - test %RDX_LP, %RDX_LP > > > > > > > - jz 2f > > > > > > > - add %RDX_LP, %RAX_LP > > > > > > > - jmp L(start_movsb) > > > > > > > -END (__mempcpy_erms) > > > > > > > - > > > > > > > -ENTRY (__memmove_chk_erms) > > > > > > > - cmp %RDX_LP, %RCX_LP > > > > > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > > -END (__memmove_chk_erms) > > > > > > > - > > > > > > > -ENTRY (__memmove_erms) > > > > > > > - movq %rdi, %rax > > > > > > > - /* Skip zero length. */ > > > > > > > - test %RDX_LP, %RDX_LP > > > > > > > - jz 2f > > > > > > > -L(start_movsb): > > > > > > > - mov %RDX_LP, %RCX_LP > > > > > > > - cmp %RSI_LP, %RDI_LP > > > > > > > - jb 1f > > > > > > > - /* Source == destination is less common. */ > > > > > > > - je 2f > > > > > > > - lea (%rsi,%rcx), %RDX_LP > > > > > > > - cmp %RDX_LP, %RDI_LP > > > > > > > - jb L(movsb_backward) > > > > > > > -1: > > > > > > > - rep movsb > > > > > > > -2: > > > > > > > - ret > > > > > > > -L(movsb_backward): > > > > > > > - leaq -1(%rdi,%rcx), %rdi > > > > > > > - leaq -1(%rsi,%rcx), %rsi > > > > > > > - std > > > > > > > - rep movsb > > > > > > > - cld > > > > > > > - ret > > > > > > > -END (__memmove_erms) > > > > > > > -strong_alias (__memmove_erms, __memcpy_erms) > > > > > > > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > > > > > -# endif > > > > > > > > > > > > > > # ifdef SHARED > > > > > > > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > > > > > > > -- > > > > > > > 2.34.1 > > > > > > > > > > > > > > > > > > > Please make a standalone patch. > > > > > > > > > > The memmove isa raising change has a dependency on it hence the series. > > > > > Submit this first then rebsubmit memmove? Same for memset-erms / > > > > > memset-isa raising? > > > > > > > > Yes. Moving the erms version to a separate file should be standalone. > > > > Each patch should build. > > > > > > The reason it doesn't is that the isa raising for memmove-vec... > > > would require handling the memmove_erms case for it to build > > > otherwise ISA V3/V4 would leave memmove_erms > > > as undefined symbols in the multiarch build. > > > > The first patch in your patch set isn't standalone since Makefile > > change is in the second patch. > > Err I think there is a misunderstanding. > > I am going to fixup to make the memmove_erms / memset_erms > changes standalone. My point is the following ISA raising patches > are dependent on the respective erms functions being moved. > > Hences its a series. A series is OK. > > > > > > > > > > > > You should combine 2 memset-erms changes > > > > in 2 memset patches into a single patch. > > > ? > > > > > > > The first patch moves memset-erms to the file end > > and the second patch moves it to another file. > > > > -- > > H.J.
On Wed, Jun 29, 2022 at 1:17 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Wed, Jun 29, 2022 at 1:03 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Wed, Jun 29, 2022 at 12:59 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > On Wed, Jun 29, 2022 at 12:48 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > On Wed, Jun 29, 2022 at 12:46 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > On Wed, Jun 29, 2022 at 12:34 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > > > > > On Wed, Jun 29, 2022 at 12:32 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > > > > > On Tue, Jun 28, 2022 at 8:28 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > > > > > > > > > The primary memmove_{impl}_unaligned_erms implementations don't > > > > > > > > interact with this function. Putting them in same file both > > > > > > > > wastes space and unnecessarily bloats a hot code section. > > > > > > > > --- > > > > > > > > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++ > > > > > > > > .../multiarch/memmove-vec-unaligned-erms.S | 50 ----------------- > > > > > > > > 2 files changed, 53 insertions(+), 50 deletions(-) > > > > > > > > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > > > > > > > > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > > > new file mode 100644 > > > > > > > > index 0000000000..d98d21644b > > > > > > > > --- /dev/null > > > > > > > > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S > > > > > > > > @@ -0,0 +1,53 @@ > > > > > > > > +#include <sysdep.h> > > > > > > > > + > > > > > > > > +#if defined USE_MULTIARCH && IS_IN (libc) > > > > > > > > + .text > > > > > > > > +ENTRY (__mempcpy_chk_erms) > > > > > > > > + cmp %RDX_LP, %RCX_LP > > > > > > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > > > +END (__mempcpy_chk_erms) > > > > > > > > + > > > > > > > > +/* Only used to measure performance of REP MOVSB. */ > > > > > > > > +ENTRY (__mempcpy_erms) > > > > > > > > + mov %RDI_LP, %RAX_LP > > > > > > > > + /* Skip zero length. */ > > > > > > > > + test %RDX_LP, %RDX_LP > > > > > > > > + jz 2f > > > > > > > > + add %RDX_LP, %RAX_LP > > > > > > > > + jmp L(start_movsb) > > > > > > > > +END (__mempcpy_erms) > > > > > > > > + > > > > > > > > +ENTRY (__memmove_chk_erms) > > > > > > > > + cmp %RDX_LP, %RCX_LP > > > > > > > > + jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > > > +END (__memmove_chk_erms) > > > > > > > > + > > > > > > > > +ENTRY (__memmove_erms) > > > > > > > > + movq %rdi, %rax > > > > > > > > + /* Skip zero length. */ > > > > > > > > + test %RDX_LP, %RDX_LP > > > > > > > > + jz 2f > > > > > > > > +L(start_movsb): > > > > > > > > + mov %RDX_LP, %RCX_LP > > > > > > > > + cmp %RSI_LP, %RDI_LP > > > > > > > > + jb 1f > > > > > > > > + /* Source == destination is less common. */ > > > > > > > > + je 2f > > > > > > > > + lea (%rsi,%rcx), %RDX_LP > > > > > > > > + cmp %RDX_LP, %RDI_LP > > > > > > > > + jb L(movsb_backward) > > > > > > > > +1: > > > > > > > > + rep movsb > > > > > > > > +2: > > > > > > > > + ret > > > > > > > > +L(movsb_backward): > > > > > > > > + leaq -1(%rdi,%rcx), %rdi > > > > > > > > + leaq -1(%rsi,%rcx), %rsi > > > > > > > > + std > > > > > > > > + rep movsb > > > > > > > > + cld > > > > > > > > + ret > > > > > > > > +END (__memmove_erms) > > > > > > > > +strong_alias (__memmove_erms, __memcpy_erms) > > > > > > > > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > > > > > > +#endif > > > > > > > > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > > > > index d1518b8bab..04747133b7 100644 > > > > > > > > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > > > > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S > > > > > > > > @@ -239,56 +239,6 @@ L(start): > > > > > > > > #endif > > > > > > > > #if defined USE_MULTIARCH && IS_IN (libc) > > > > > > > > END (MEMMOVE_SYMBOL (__memmove, unaligned)) > > > > > > > > -# if VEC_SIZE == 16 > > > > > > > > -ENTRY (__mempcpy_chk_erms) > > > > > > > > - cmp %RDX_LP, %RCX_LP > > > > > > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > > > -END (__mempcpy_chk_erms) > > > > > > > > - > > > > > > > > -/* Only used to measure performance of REP MOVSB. */ > > > > > > > > -ENTRY (__mempcpy_erms) > > > > > > > > - mov %RDI_LP, %RAX_LP > > > > > > > > - /* Skip zero length. */ > > > > > > > > - test %RDX_LP, %RDX_LP > > > > > > > > - jz 2f > > > > > > > > - add %RDX_LP, %RAX_LP > > > > > > > > - jmp L(start_movsb) > > > > > > > > -END (__mempcpy_erms) > > > > > > > > - > > > > > > > > -ENTRY (__memmove_chk_erms) > > > > > > > > - cmp %RDX_LP, %RCX_LP > > > > > > > > - jb HIDDEN_JUMPTARGET (__chk_fail) > > > > > > > > -END (__memmove_chk_erms) > > > > > > > > - > > > > > > > > -ENTRY (__memmove_erms) > > > > > > > > - movq %rdi, %rax > > > > > > > > - /* Skip zero length. */ > > > > > > > > - test %RDX_LP, %RDX_LP > > > > > > > > - jz 2f > > > > > > > > -L(start_movsb): > > > > > > > > - mov %RDX_LP, %RCX_LP > > > > > > > > - cmp %RSI_LP, %RDI_LP > > > > > > > > - jb 1f > > > > > > > > - /* Source == destination is less common. */ > > > > > > > > - je 2f > > > > > > > > - lea (%rsi,%rcx), %RDX_LP > > > > > > > > - cmp %RDX_LP, %RDI_LP > > > > > > > > - jb L(movsb_backward) > > > > > > > > -1: > > > > > > > > - rep movsb > > > > > > > > -2: > > > > > > > > - ret > > > > > > > > -L(movsb_backward): > > > > > > > > - leaq -1(%rdi,%rcx), %rdi > > > > > > > > - leaq -1(%rsi,%rcx), %rsi > > > > > > > > - std > > > > > > > > - rep movsb > > > > > > > > - cld > > > > > > > > - ret > > > > > > > > -END (__memmove_erms) > > > > > > > > -strong_alias (__memmove_erms, __memcpy_erms) > > > > > > > > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) > > > > > > > > -# endif > > > > > > > > > > > > > > > > # ifdef SHARED > > > > > > > > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) > > > > > > > > -- > > > > > > > > 2.34.1 > > > > > > > > > > > > > > > > > > > > > > Please make a standalone patch. > > > > > > > > > > > > The memmove isa raising change has a dependency on it hence the series. > > > > > > Submit this first then rebsubmit memmove? Same for memset-erms / > > > > > > memset-isa raising? > > > > > > > > > > Yes. Moving the erms version to a separate file should be standalone. > > > > > Each patch should build. > > > > > > > > The reason it doesn't is that the isa raising for memmove-vec... > > > > would require handling the memmove_erms case for it to build > > > > otherwise ISA V3/V4 would leave memmove_erms > > > > as undefined symbols in the multiarch build. > > > > > > The first patch in your patch set isn't standalone since Makefile > > > change is in the second patch. > > > > Err I think there is a misunderstanding. > > > > I am going to fixup to make the memmove_erms / memset_erms > > changes standalone. My point is the following ISA raising patches > > are dependent on the respective erms functions being moved. > > > > Hences its a series. > > A series is OK. Fixed up both memset/memmove erms patches in V2. > > > > > > > > > > > > > > > > > You should combine 2 memset-erms changes > > > > > in 2 memset patches into a single patch. > > > > ? > > > > > > > > > > The first patch moves memset-erms to the file end > > > and the second patch moves it to another file. > > > > > > -- > > > H.J. > > > > -- > H.J.
diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S new file mode 100644 index 0000000000..d98d21644b --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-erms.S @@ -0,0 +1,53 @@ +#include <sysdep.h> + +#if defined USE_MULTIARCH && IS_IN (libc) + .text +ENTRY (__mempcpy_chk_erms) + cmp %RDX_LP, %RCX_LP + jb HIDDEN_JUMPTARGET (__chk_fail) +END (__mempcpy_chk_erms) + +/* Only used to measure performance of REP MOVSB. */ +ENTRY (__mempcpy_erms) + mov %RDI_LP, %RAX_LP + /* Skip zero length. */ + test %RDX_LP, %RDX_LP + jz 2f + add %RDX_LP, %RAX_LP + jmp L(start_movsb) +END (__mempcpy_erms) + +ENTRY (__memmove_chk_erms) + cmp %RDX_LP, %RCX_LP + jb HIDDEN_JUMPTARGET (__chk_fail) +END (__memmove_chk_erms) + +ENTRY (__memmove_erms) + movq %rdi, %rax + /* Skip zero length. */ + test %RDX_LP, %RDX_LP + jz 2f +L(start_movsb): + mov %RDX_LP, %RCX_LP + cmp %RSI_LP, %RDI_LP + jb 1f + /* Source == destination is less common. */ + je 2f + lea (%rsi,%rcx), %RDX_LP + cmp %RDX_LP, %RDI_LP + jb L(movsb_backward) +1: + rep movsb +2: + ret +L(movsb_backward): + leaq -1(%rdi,%rcx), %rdi + leaq -1(%rsi,%rcx), %rsi + std + rep movsb + cld + ret +END (__memmove_erms) +strong_alias (__memmove_erms, __memcpy_erms) +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) +#endif diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S index d1518b8bab..04747133b7 100644 --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S @@ -239,56 +239,6 @@ L(start): #endif #if defined USE_MULTIARCH && IS_IN (libc) END (MEMMOVE_SYMBOL (__memmove, unaligned)) -# if VEC_SIZE == 16 -ENTRY (__mempcpy_chk_erms) - cmp %RDX_LP, %RCX_LP - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__mempcpy_chk_erms) - -/* Only used to measure performance of REP MOVSB. */ -ENTRY (__mempcpy_erms) - mov %RDI_LP, %RAX_LP - /* Skip zero length. */ - test %RDX_LP, %RDX_LP - jz 2f - add %RDX_LP, %RAX_LP - jmp L(start_movsb) -END (__mempcpy_erms) - -ENTRY (__memmove_chk_erms) - cmp %RDX_LP, %RCX_LP - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memmove_chk_erms) - -ENTRY (__memmove_erms) - movq %rdi, %rax - /* Skip zero length. */ - test %RDX_LP, %RDX_LP - jz 2f -L(start_movsb): - mov %RDX_LP, %RCX_LP - cmp %RSI_LP, %RDI_LP - jb 1f - /* Source == destination is less common. */ - je 2f - lea (%rsi,%rcx), %RDX_LP - cmp %RDX_LP, %RDI_LP - jb L(movsb_backward) -1: - rep movsb -2: - ret -L(movsb_backward): - leaq -1(%rdi,%rcx), %rdi - leaq -1(%rsi,%rcx), %rsi - std - rep movsb - cld - ret -END (__memmove_erms) -strong_alias (__memmove_erms, __memcpy_erms) -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) -# endif # ifdef SHARED ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))