Message ID | 20211207020957.93540-1-haochen.jiang@intel.com |
---|---|
State | New |
Headers | show |
Series | [i386] Add combine splitter to transform vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0 | expand |
On Tue, Dec 7, 2021 at 3:10 AM Haochen Jiang via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > This patch adds combine splitter to transform vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0. > > OK for trunk? > > BRs, > Haochen > > gcc/ChangeLog: > > PR target/100738 > * config/i386/sse.md (*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint): > Add new define_insn_and_split. > > gcc/testsuite/ChangeLog: > > PR target/100738 > * g++.target/i386/pr100738-1.C: New test. OK with a change below. Thanks, Uros. > > --- > gcc/config/i386/sse.md | 28 ++++++++++++++++++++++ > gcc/testsuite/g++.target/i386/pr100738-1.C | 19 +++++++++++++++ > 2 files changed, 47 insertions(+) > create mode 100755 gcc/testsuite/g++.target/i386/pr100738-1.C > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 08bdcddc111..db3506c78d7 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -20659,6 +20659,34 @@ > (set_attr "btver2_decode" "vector,vector,vector") > (set_attr "mode" "<ssefltvecmode>")]) > > +;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to vblendvps for inverted mask; > +(define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint" > + [(set (match_operand:<ssebytemode> 0 "register_operand") > + (unspec:<ssebytemode> > + [(match_operand:<ssebytemode> 1 "register_operand") > + (match_operand:<ssebytemode> 2 "vector_operand") > + (subreg:<ssebytemode> > + (lt:VI48_AVX > + (subreg:VI48_AVX > + (not:<ssebytemode> > + (match_operand:<ssebytemode> 3 "register_operand")) 0) > + (match_operand:VI48_AVX 4 "const0_operand")) 0)] > + UNSPEC_BLENDV))] > + "TARGET_SSE4_1 && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (unspec:<ssefltvecmode> > + [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))] > +{ > + operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]); > + operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]); > + operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]); > + operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]); > + if (MEM_P (operands[2])) > + operands[2] = force_reg (<ssefltvecmode>mode, operands[2]); You don't need to check for MEM_P, force_reg will do it for you. > +}) > + > (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" > [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") > (unspec:VF_128_256 > diff --git a/gcc/testsuite/g++.target/i386/pr100738-1.C b/gcc/testsuite/g++.target/i386/pr100738-1.C > new file mode 100755 > index 00000000000..5a04c5b031f > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr100738-1.C > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -mavx2" } */ > +/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */ > +/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */ > +/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */ > + > +typedef int v4si __attribute__((vector_size(16))); > +typedef char v16qi __attribute__((vector_size(16))); > +v4si > +foo_1 (v16qi a, v4si b, v4si c, v4si d) > +{ > + return ((v4si)~a) < 0 ? c : d; > +} > + > +v4si > +foo_2 (v16qi a, v4si b, v4si c, v4si d) > +{ > + return ((v4si)~a) >= 0 ? c : d; > +} > -- > 2.18.1 >
Hi Uros, I have fixed that in this patch attached for checking in. Is that ok for trunk? Regtested on x86_64-pc-linux-gnu. Thx, Haochen -----Original Message----- From: Uros Bizjak <ubizjak@gmail.com> Sent: Wednesday, December 8, 2021 12:14 AM To: Jiang, Haochen <haochen.jiang@intel.com> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH] [i386]Add combine splitter to transform vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0 On Tue, Dec 7, 2021 at 3:10 AM Haochen Jiang via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > This patch adds combine splitter to transform vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0. > > OK for trunk? > > BRs, > Haochen > > gcc/ChangeLog: > > PR target/100738 > * config/i386/sse.md (*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint): > Add new define_insn_and_split. > > gcc/testsuite/ChangeLog: > > PR target/100738 > * g++.target/i386/pr100738-1.C: New test. OK with a change below. Thanks, Uros. > > --- > gcc/config/i386/sse.md | 28 ++++++++++++++++++++++ > gcc/testsuite/g++.target/i386/pr100738-1.C | 19 +++++++++++++++ > 2 files changed, 47 insertions(+) > create mode 100755 gcc/testsuite/g++.target/i386/pr100738-1.C > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index > 08bdcddc111..db3506c78d7 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -20659,6 +20659,34 @@ > (set_attr "btver2_decode" "vector,vector,vector") > (set_attr "mode" "<ssefltvecmode>")]) > > +;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to > +vblendvps for inverted mask; (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint" > + [(set (match_operand:<ssebytemode> 0 "register_operand") > + (unspec:<ssebytemode> > + [(match_operand:<ssebytemode> 1 "register_operand") > + (match_operand:<ssebytemode> 2 "vector_operand") > + (subreg:<ssebytemode> > + (lt:VI48_AVX > + (subreg:VI48_AVX > + (not:<ssebytemode> > + (match_operand:<ssebytemode> 3 "register_operand")) 0) > + (match_operand:VI48_AVX 4 "const0_operand")) 0)] > + UNSPEC_BLENDV))] > + "TARGET_SSE4_1 && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (unspec:<ssefltvecmode> > + [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))] > +{ > + operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]); > + operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]); > + operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]); > + operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]); > + if (MEM_P (operands[2])) > + operands[2] = force_reg (<ssefltvecmode>mode, operands[2]); You don't need to check for MEM_P, force_reg will do it for you. > +}) > + > (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" > [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") > (unspec:VF_128_256 > diff --git a/gcc/testsuite/g++.target/i386/pr100738-1.C > b/gcc/testsuite/g++.target/i386/pr100738-1.C > new file mode 100755 > index 00000000000..5a04c5b031f > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr100738-1.C > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -mavx2" } */ > +/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */ > +/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */ > +/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */ > + > +typedef int v4si __attribute__((vector_size(16))); typedef char v16qi > +__attribute__((vector_size(16))); > +v4si > +foo_1 (v16qi a, v4si b, v4si c, v4si d) { > + return ((v4si)~a) < 0 ? c : d; > +} > + > +v4si > +foo_2 (v16qi a, v4si b, v4si c, v4si d) { > + return ((v4si)~a) >= 0 ? c : d; > +} > -- > 2.18.1 >
On Wed, Dec 8, 2021 at 11:13 AM Jiang, Haochen via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi Uros, > > I have fixed that in this patch attached for checking in. Is that ok for trunk? > Uros already said it's ok with that change, let me check in the patch for you. > Regtested on x86_64-pc-linux-gnu. > > Thx, > Haochen > > -----Original Message----- > From: Uros Bizjak <ubizjak@gmail.com> > Sent: Wednesday, December 8, 2021 12:14 AM > To: Jiang, Haochen <haochen.jiang@intel.com> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com> > Subject: Re: [PATCH] [i386]Add combine splitter to transform vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0 > > On Tue, Dec 7, 2021 at 3:10 AM Haochen Jiang via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > > > This patch adds combine splitter to transform vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0. > > > > OK for trunk? > > > > BRs, > > Haochen > > > > gcc/ChangeLog: > > > > PR target/100738 > > * config/i386/sse.md (*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint): > > Add new define_insn_and_split. > > > > gcc/testsuite/ChangeLog: > > > > PR target/100738 > > * g++.target/i386/pr100738-1.C: New test. > > OK with a change below. > > Thanks, > Uros. > > > > > --- > > gcc/config/i386/sse.md | 28 ++++++++++++++++++++++ > > gcc/testsuite/g++.target/i386/pr100738-1.C | 19 +++++++++++++++ > > 2 files changed, 47 insertions(+) > > create mode 100755 gcc/testsuite/g++.target/i386/pr100738-1.C > > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index > > 08bdcddc111..db3506c78d7 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -20659,6 +20659,34 @@ > > (set_attr "btver2_decode" "vector,vector,vector") > > (set_attr "mode" "<ssefltvecmode>")]) > > > > +;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to > > +vblendvps for inverted mask; (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint" > > + [(set (match_operand:<ssebytemode> 0 "register_operand") > > + (unspec:<ssebytemode> > > + [(match_operand:<ssebytemode> 1 "register_operand") > > + (match_operand:<ssebytemode> 2 "vector_operand") > > + (subreg:<ssebytemode> > > + (lt:VI48_AVX > > + (subreg:VI48_AVX > > + (not:<ssebytemode> > > + (match_operand:<ssebytemode> 3 "register_operand")) 0) > > + (match_operand:VI48_AVX 4 "const0_operand")) 0)] > > + UNSPEC_BLENDV))] > > + "TARGET_SSE4_1 && ix86_pre_reload_split ()" > > + "#" > > + "&& 1" > > + [(set (match_dup 0) > > + (unspec:<ssefltvecmode> > > + [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))] > > +{ > > + operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]); > > + operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]); > > + operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]); > > + operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]); > > + if (MEM_P (operands[2])) > > + operands[2] = force_reg (<ssefltvecmode>mode, operands[2]); > > You don't need to check for MEM_P, force_reg will do it for you. > > > +}) > > + > > (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" > > [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") > > (unspec:VF_128_256 > > diff --git a/gcc/testsuite/g++.target/i386/pr100738-1.C > > b/gcc/testsuite/g++.target/i386/pr100738-1.C > > new file mode 100755 > > index 00000000000..5a04c5b031f > > --- /dev/null > > +++ b/gcc/testsuite/g++.target/i386/pr100738-1.C > > @@ -0,0 +1,19 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-Ofast -mavx2" } */ > > +/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */ > > +/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */ > > +/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */ > > + > > +typedef int v4si __attribute__((vector_size(16))); typedef char v16qi > > +__attribute__((vector_size(16))); > > +v4si > > +foo_1 (v16qi a, v4si b, v4si c, v4si d) { > > + return ((v4si)~a) < 0 ? c : d; > > +} > > + > > +v4si > > +foo_2 (v16qi a, v4si b, v4si c, v4si d) { > > + return ((v4si)~a) >= 0 ? c : d; > > +} > > -- > > 2.18.1 > >
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 08bdcddc111..db3506c78d7 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -20659,6 +20659,34 @@ (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "<ssefltvecmode>")]) +;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to vblendvps for inverted mask; +(define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint" + [(set (match_operand:<ssebytemode> 0 "register_operand") + (unspec:<ssebytemode> + [(match_operand:<ssebytemode> 1 "register_operand") + (match_operand:<ssebytemode> 2 "vector_operand") + (subreg:<ssebytemode> + (lt:VI48_AVX + (subreg:VI48_AVX + (not:<ssebytemode> + (match_operand:<ssebytemode> 3 "register_operand")) 0) + (match_operand:VI48_AVX 4 "const0_operand")) 0)] + UNSPEC_BLENDV))] + "TARGET_SSE4_1 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<ssefltvecmode> + [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))] +{ + operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]); + operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]); + operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]); + operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]); + if (MEM_P (operands[2])) + operands[2] = force_reg (<ssefltvecmode>mode, operands[2]); +}) + (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 diff --git a/gcc/testsuite/g++.target/i386/pr100738-1.C b/gcc/testsuite/g++.target/i386/pr100738-1.C new file mode 100755 index 00000000000..5a04c5b031f --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr100738-1.C @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx2" } */ +/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */ +/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */ +/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */ + +typedef int v4si __attribute__((vector_size(16))); +typedef char v16qi __attribute__((vector_size(16))); +v4si +foo_1 (v16qi a, v4si b, v4si c, v4si d) +{ + return ((v4si)~a) < 0 ? c : d; +} + +v4si +foo_2 (v16qi a, v4si b, v4si c, v4si d) +{ + return ((v4si)~a) >= 0 ? c : d; +}