diff mbox series

[committed] i386: Add V2SFmode hadd/hsub instructions [PR95046]

Message ID CAFULd4a5BrU81sV9OiffeAjtaapH5srDEr3hymti2+zWhx5ppQ@mail.gmail.com
State New
Headers show
Series [committed] i386: Add V2SFmode hadd/hsub instructions [PR95046] | expand

Commit Message

Uros Bizjak May 15, 2020, 7:26 a.m. UTC
PFACC/PFNACC 3dNow! instructions got their corresponding SSE alternative
in SSE3, so these can't be implemented with TARGET_MMX_WITH_SSE, which
implies SSE2.  These instructions are only generated via builtins, and
since several 3dNow! insns have no corresponding SSE alternative,
we can't avoid MMX registers with 3dNow! builtins anyway.

Add SSE3/AVX alternatives to the insn pattern, so compiler will be able
to use XMM registers when available, but don't prevent MMX registers,
since they are needed when SSE3 is not active.

Add additional generic insn patterns, used by the combiner to
synthesize horizontal V2SFmode add/sub instructions.  These patterns
are active for TARGET_MMX_WITH_SSE only, and use only XMM registers.

gcc/ChangeLog:

2020-05-15  Uroš Bizjak  <ubizjak@gmail.com>

    PR target/95046
    * config/i386/i386.md (isa): Add sse3_noavx.
    (enabled): Handle sse3_noavx.

    * config/i386/mmx.md (mmx_haddv2sf3): New expander.
    (*mmx_haddv2sf3): Rename from mmx_haddv2sf3.  Add SSE/AVX
    alternatives.  Match commutative vec_select selector operands.
    (*mmx_haddv2sf3_low): New insn pattern.

    (*mmx_hsubv2sf3): Add SSE/AVX alternatives.
    (*mmx_hsubv2sf3_low): New insn pattern.

testsuite/ChangeLog:

2020-05-15  Uroš Bizjak  <ubizjak@gmail.com>

    PR target/95046
    * gcc.target/i386/pr95046-8.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 722eb9b5ec8..b555c165647 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -803,8 +803,8 @@ 
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
 (define_attr "isa" "base,x64,x64_sse2,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
-		    sse_noavx,sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
-		    avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
+		    sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
+		    avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
 		    avx512bw,noavx512bw,avx512dq,noavx512dq,
 		    avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
@@ -834,6 +834,8 @@ 
 	 (eq_attr "isa" "sse2_noavx")
 	   (symbol_ref "TARGET_SSE2 && !TARGET_AVX")
 	 (eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3")
+	 (eq_attr "isa" "sse3_noavx")
+	   (symbol_ref "TARGET_SSE3 && !TARGET_AVX")
 	 (eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1")
 	 (eq_attr "isa" "sse4_noavx")
 	   (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2955ca2898f..f73c8452651 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -530,43 +530,109 @@ 
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "V2SF")])
 
-(define_insn "mmx_haddv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
+(define_expand "mmx_haddv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
 	(vec_concat:V2SF
 	  (plus:SF
 	    (vec_select:SF
-	      (match_operand:V2SF 1 "register_operand" "0")
-	      (parallel [(const_int  0)]))
+	      (match_operand:V2SF 1 "register_operand")
+	      (parallel [(const_int 0)]))
 	    (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
 	  (plus:SF
-            (vec_select:SF
-	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
-	      (parallel [(const_int  0)]))
+	    (vec_select:SF
+	      (match_operand:V2SF 2 "nonimmediate_operand")
+	      (parallel [(const_int 0)]))
 	    (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
-  "TARGET_3DNOW"
-  "pfacc\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2SF")])
+  "TARGET_3DNOW")
+
+(define_insn "*mmx_haddv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y,x,x")
+	(vec_concat:V2SF
+	  (plus:SF
+	    (vec_select:SF
+	      (match_operand:V2SF 1 "register_operand" "0,0,x")
+	      (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
+	    (vec_select:SF (match_dup 1)
+	    (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
+	  (plus:SF
+            (vec_select:SF
+	      (match_operand:V2SF 2 "nonimmediate_operand" "ym,x,x")
+	      (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
+	    (vec_select:SF (match_dup 2)
+	    (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
+  "TARGET_3DNOW
+   && INTVAL (operands[3]) != INTVAL (operands[4])
+   && INTVAL (operands[5]) != INTVAL (operands[6])"
+  "@
+   pfacc\t{%2, %0|%0, %2}
+   haddps\t{%2, %0|%0, %2}
+   vhaddps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,sse3_noavx,avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "prefix_extra" "1,*,*")
+   (set_attr "prefix" "*,orig,vex")
+   (set_attr "mode" "V2SF,V4SF,V4SF")])
+
+(define_insn "*mmx_haddv2sf3_low"
+  [(set (match_operand:SF 0 "register_operand" "=x,x")
+	(plus:SF
+	  (vec_select:SF
+	    (match_operand:V2SF 1 "register_operand" "0,x")
+	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
+	  (vec_select:SF
+	    (match_dup 1)
+	    (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
+  "TARGET_MMX_WITH_SSE && TARGET_SSE3
+   && INTVAL (operands[2]) != INTVAL (operands[3])"
+  "@
+   haddps\t{%0, %0|%0, %0}
+   vhaddps\t{%1, %1, %0|%0, %1, %1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V4SF")])
 
 (define_insn "mmx_hsubv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
+  [(set (match_operand:V2SF 0 "register_operand" "=y,x,x")
 	(vec_concat:V2SF
 	  (minus:SF
 	    (vec_select:SF
-	      (match_operand:V2SF 1 "register_operand" "0")
+	      (match_operand:V2SF 1 "register_operand" "0,0,x")
 	      (parallel [(const_int  0)]))
 	    (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
 	  (minus:SF
             (vec_select:SF
-	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+	      (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,x")
 	      (parallel [(const_int  0)]))
 	    (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
   "TARGET_3DNOW_A"
-  "pfnacc\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2SF")])
+  "@
+   pfnacc\t{%2, %0|%0, %2}
+   hsubps\t{%2, %0|%0, %2}
+   vhsubps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,sse3_noavx,avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "prefix_extra" "1,*,*")
+   (set_attr "prefix" "*,orig,vex")
+   (set_attr "mode" "V2SF,V4SF,V4SF")])
+
+(define_insn "*mmx_hsubv2sf3_low"
+  [(set (match_operand:SF 0 "register_operand" "=x,x")
+	(minus:SF
+	  (vec_select:SF
+	    (match_operand:V2SF 1 "register_operand" "0,x")
+	    (parallel [(const_int 0)]))
+	  (vec_select:SF
+	    (match_dup 1)
+	    (parallel [(const_int 1)]))))]
+  "TARGET_MMX_WITH_SSE && TARGET_SSE3"
+  "@
+   hsubps\t{%0, %0|%0, %0}
+   vhsubps\t{%1, %1, %0|%0, %1, %1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V4SF")])
 
 (define_insn "mmx_addsubv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
diff --git a/gcc/testsuite/gcc.target/i386/pr95046-8.c b/gcc/testsuite/gcc.target/i386/pr95046-8.c
new file mode 100644
index 00000000000..e164fd4ea37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95046-8.c
@@ -0,0 +1,22 @@ 
+/* PR target/95046 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse3" } */
+
+
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+float
+foo (__v2sf a)
+{
+  return a[0] + a[1];
+}
+
+/* { dg-final { scan-assembler "\tv?haddps" } } */
+
+float
+bar (__v2sf a)
+{
+  return a[0] - a[1];
+}
+
+/* { dg-final { scan-assembler "\tv?hsubps" } } */