Message ID | 20220207063854.3324172-1-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1] x86: Remove SSE3 instruction for broadcast in memset.S (SSE2 Only) | expand |
On Sun, Feb 6, 2022 at 10:39 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > commit b62ace2740a106222e124cc86956448fa07abf4d > Author: Noah Goldstein <goldstein.w.n@gmail.com> > Date: Sun Feb 6 00:54:18 2022 -0600 > > x86: Improve vec generation in memset-vec-unaligned-erms.S > > Revert usage of 'pshufb' in broadcast logic as it is an SSE3 > instruction and memset.S is restricted to only SSE2 instructions. > --- > sysdeps/x86_64/memset.S | 19 ++++++++++--------- > 1 file changed, 10 insertions(+), 9 deletions(-) > > diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S > index ccf036be53..148553cf3d 100644 > --- a/sysdeps/x86_64/memset.S > +++ b/sysdeps/x86_64/memset.S > @@ -28,22 +28,23 @@ > #define VMOVU movups > #define VMOVA movaps > > -# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > +#define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > movd d, %xmm0; \ > - pxor %xmm1, %xmm1; \ > - pshufb %xmm1, %xmm0; \ > - movq r, %rax > + movq r, %rax; \ > + punpcklbw %xmm0, %xmm0; \ > + punpcklwd %xmm0, %xmm0; \ > + pshufd $0, %xmm0, %xmm0 > > -# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > +#define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > movd d, %xmm0; \ > pshufd $0, %xmm0, %xmm0; \ > movq r, %rax > > -# define MEMSET_VDUP_TO_VEC0_HIGH() > -# define MEMSET_VDUP_TO_VEC0_LOW() > +#define MEMSET_VDUP_TO_VEC0_HIGH() > +#define MEMSET_VDUP_TO_VEC0_LOW() What are these changes for? > -# define WMEMSET_VDUP_TO_VEC0_HIGH() > -# define WMEMSET_VDUP_TO_VEC0_LOW() > +#define WMEMSET_VDUP_TO_VEC0_HIGH() > +#define WMEMSET_VDUP_TO_VEC0_LOW() What are these changes for? > #define SECTION(p) p > > -- > 2.25.1 >
On Mon, Feb 7, 2022 at 4:54 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Sun, Feb 6, 2022 at 10:39 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > commit b62ace2740a106222e124cc86956448fa07abf4d > > Author: Noah Goldstein <goldstein.w.n@gmail.com> > > Date: Sun Feb 6 00:54:18 2022 -0600 > > > > x86: Improve vec generation in memset-vec-unaligned-erms.S > > > > Revert usage of 'pshufb' in broadcast logic as it is an SSE3 pshufb is an SSSE3, not SSE3, instruction. > > instruction and memset.S is restricted to only SSE2 instructions. > > --- > > sysdeps/x86_64/memset.S | 19 ++++++++++--------- > > 1 file changed, 10 insertions(+), 9 deletions(-) > > > > diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S > > index ccf036be53..148553cf3d 100644 > > --- a/sysdeps/x86_64/memset.S > > +++ b/sysdeps/x86_64/memset.S > > @@ -28,22 +28,23 @@ > > #define VMOVU movups > > #define VMOVA movaps > > > > -# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > > +#define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > > movd d, %xmm0; \ > > - pxor %xmm1, %xmm1; \ > > - pshufb %xmm1, %xmm0; \ > > - movq r, %rax > > + movq r, %rax; \ > > + punpcklbw %xmm0, %xmm0; \ > > + punpcklwd %xmm0, %xmm0; \ > > + pshufd $0, %xmm0, %xmm0 > > > > -# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > > +#define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > > movd d, %xmm0; \ > > pshufd $0, %xmm0, %xmm0; \ > > movq r, %rax > > > > -# define MEMSET_VDUP_TO_VEC0_HIGH() > > -# define MEMSET_VDUP_TO_VEC0_LOW() > > +#define MEMSET_VDUP_TO_VEC0_HIGH() > > +#define MEMSET_VDUP_TO_VEC0_LOW() > > What are these changes for? > > > -# define WMEMSET_VDUP_TO_VEC0_HIGH() > > -# define WMEMSET_VDUP_TO_VEC0_LOW() > > +#define WMEMSET_VDUP_TO_VEC0_HIGH() > > +#define WMEMSET_VDUP_TO_VEC0_LOW() > > What are these changes for? > > > #define SECTION(p) p > > > > -- > > 2.25.1 > > > > > -- > H.J.
On Mon, Feb 7, 2022 at 8:33 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Mon, Feb 7, 2022 at 4:54 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Sun, Feb 6, 2022 at 10:39 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > commit b62ace2740a106222e124cc86956448fa07abf4d > > > Author: Noah Goldstein <goldstein.w.n@gmail.com> > > > Date: Sun Feb 6 00:54:18 2022 -0600 > > > > > > x86: Improve vec generation in memset-vec-unaligned-erms.S > > > > > > Revert usage of 'pshufb' in broadcast logic as it is an SSE3 > > pshufb is an SSSE3, not SSE3, instruction. Fixed. The commit message is different but V2 is up. > > > > instruction and memset.S is restricted to only SSE2 instructions. > > > --- > > > sysdeps/x86_64/memset.S | 19 ++++++++++--------- > > > 1 file changed, 10 insertions(+), 9 deletions(-) > > > > > > diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S > > > index ccf036be53..148553cf3d 100644 > > > --- a/sysdeps/x86_64/memset.S > > > +++ b/sysdeps/x86_64/memset.S > > > @@ -28,22 +28,23 @@ > > > #define VMOVU movups > > > #define VMOVA movaps > > > > > > -# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > > > +#define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > > > movd d, %xmm0; \ > > > - pxor %xmm1, %xmm1; \ > > > - pshufb %xmm1, %xmm0; \ > > > - movq r, %rax > > > + movq r, %rax; \ > > > + punpcklbw %xmm0, %xmm0; \ > > > + punpcklwd %xmm0, %xmm0; \ > > > + pshufd $0, %xmm0, %xmm0 > > > > > > -# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > > > +#define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ > > > movd d, %xmm0; \ > > > pshufd $0, %xmm0, %xmm0; \ > > > movq r, %rax > > > > > > -# define MEMSET_VDUP_TO_VEC0_HIGH() > > > -# define MEMSET_VDUP_TO_VEC0_LOW() > > > +#define MEMSET_VDUP_TO_VEC0_HIGH() > > > +#define MEMSET_VDUP_TO_VEC0_LOW() > > > > What are these changes for? > > > > > -# define WMEMSET_VDUP_TO_VEC0_HIGH() > > > -# define WMEMSET_VDUP_TO_VEC0_LOW() > > > +#define WMEMSET_VDUP_TO_VEC0_HIGH() > > > +#define WMEMSET_VDUP_TO_VEC0_LOW() > > > > What are these changes for? Undid them in V2. Realized I had misindented them in my last commit. > > > > > #define SECTION(p) p > > > > > > -- > > > 2.25.1 > > > > > > > > > -- > > H.J. > > > > -- > H.J.
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S index ccf036be53..148553cf3d 100644 --- a/sysdeps/x86_64/memset.S +++ b/sysdeps/x86_64/memset.S @@ -28,22 +28,23 @@ #define VMOVU movups #define VMOVA movaps -# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ +#define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ movd d, %xmm0; \ - pxor %xmm1, %xmm1; \ - pshufb %xmm1, %xmm0; \ - movq r, %rax + movq r, %rax; \ + punpcklbw %xmm0, %xmm0; \ + punpcklwd %xmm0, %xmm0; \ + pshufd $0, %xmm0, %xmm0 -# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ +#define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ movd d, %xmm0; \ pshufd $0, %xmm0, %xmm0; \ movq r, %rax -# define MEMSET_VDUP_TO_VEC0_HIGH() -# define MEMSET_VDUP_TO_VEC0_LOW() +#define MEMSET_VDUP_TO_VEC0_HIGH() +#define MEMSET_VDUP_TO_VEC0_LOW() -# define WMEMSET_VDUP_TO_VEC0_HIGH() -# define WMEMSET_VDUP_TO_VEC0_LOW() +#define WMEMSET_VDUP_TO_VEC0_HIGH() +#define WMEMSET_VDUP_TO_VEC0_LOW() #define SECTION(p) p