diff mbox series

[committed] i386: Vectorize basic V2SFmode operations [PR95046]

Message ID CAFULd4aCWk-w_7vVP=8Otp2FEJE8hsMSGO8HAg+AO6e_0fGqTQ@mail.gmail.com
State New
Headers show
Series [committed] i386: Vectorize basic V2SFmode operations [PR95046] | expand

Commit Message

Uros Bizjak May 11, 2020, 9:20 a.m. UTC
Enable V2SFmode vectorization and vectorize V2SFmode PLUS,
MINUS, MULT, MIN and MAX operations using XMM registers.

To avoid unwanted secondary effects (e.g. exceptions), load values
to XMM registers using MOVQ that clears high bits of the XMM
register outside V2SFmode.

The compiler now vectorizes e.g.:

float r[2], a[2], b[2];

void
test_plus (void)
{
  for (int i = 0; i < 2; i++)
    r[i] = a[i] + b[i];
}

to:
        movq    a(%rip), %xmm0
        movq    b(%rip), %xmm1
        addps   %xmm1, %xmm0
        movlps  %xmm0, r(%rip)
        ret

gcc/ChangeLog:

2020-05-11  Uroš Bizjak  <ubizjak@gmail.com>

    PR target/95046
    * config/i386/i386.c (ix86_vector_mode_supported_p):
    Vectorize 3dNOW! vector modes for TARGET_MMX_WITH_SSE.
    * config/i386/mmx.md (*mov<mode>_internal): Do not set
    mode of alternative 13 to V2SF for TARGET_MMX_WITH_SSE.

    (mmx_addv2sf3): Change operand predicates from
    nonimmediate_operand to register_mmxmem_operand.
    (addv2sf3): New expander.
    (*mmx_addv2sf3): Add SSE/AVX alternatives.  Change operand
    predicates from nonimmediate_operand to register_mmxmem_operand.
    Enable instruction pattern for TARGET_MMX_WITH_SSE.

    (mmx_subv2sf3): Change operand predicate from
    nonimmediate_operand to register_mmxmem_operand.
    (mmx_subrv2sf3): Ditto.
    (subv2sf3): New expander.
    (*mmx_subv2sf3): Add SSE/AVX alternatives.  Change operand
    predicates from nonimmediate_operand to register_mmxmem_operand.
    Enable instruction pattern for TARGET_MMX_WITH_SSE.

    (mmx_mulv2sf3): Change operand predicates from
    nonimmediate_operand to register_mmxmem_operand.
    (mulv2sf3): New expander.
    (*mmx_mulv2sf3): Add SSE/AVX alternatives.  Change operand
    predicates from nonimmediate_operand to register_mmxmem_operand.
    Enable instruction pattern for TARGET_MMX_WITH_SSE.

    (mmx_<code>v2sf3): Change operand predicates from
    nonimmediate_operand to register_mmxmem_operand.
    (<code>v2sf3): New expander.
    (*mmx_<code>v2sf3): Add SSE/AVX alternatives.  Change operand
    predicates from nonimmediate_operand to register_mmxmem_operand.
    Enable instruction pattern for TARGET_MMX_WITH_SSE.
    (mmx_ieee_<ieee_maxmin>v2sf3): Ditto.

testsuite/ChangeLog:

2020-05-11  Uroš Bizjak  <ubizjak@gmail.com>

    PR target/95046
    * gcc.target/i386/pr95046-1.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {-m32}.

Committed to mainline.

Uros.

Comments

Uros Bizjak May 11, 2020, 9:22 a.m. UTC | #1
Now with missing testcase.

On Mon, May 11, 2020 at 11:20 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> Enable V2SFmode vectorization and vectorize V2SFmode PLUS,
> MINUS, MULT, MIN and MAX operations using XMM registers.
>
> To avoid unwanted secondary effects (e.g. exceptions), load values
> to XMM registers using MOVQ that clears high bits of the XMM
> register outside V2SFmode.
>
> The compiler now vectorizes e.g.:
>
> float r[2], a[2], b[2];
>
> void
> test_plus (void)
> {
>   for (int i = 0; i < 2; i++)
>     r[i] = a[i] + b[i];
> }
>
> to:
>         movq    a(%rip), %xmm0
>         movq    b(%rip), %xmm1
>         addps   %xmm1, %xmm0
>         movlps  %xmm0, r(%rip)
>         ret
>
> gcc/ChangeLog:
>
> 2020-05-11  Uroš Bizjak  <ubizjak@gmail.com>
>
>     PR target/95046
>     * config/i386/i386.c (ix86_vector_mode_supported_p):
>     Vectorize 3dNOW! vector modes for TARGET_MMX_WITH_SSE.
>     * config/i386/mmx.md (*mov<mode>_internal): Do not set
>     mode of alternative 13 to V2SF for TARGET_MMX_WITH_SSE.
>
>     (mmx_addv2sf3): Change operand predicates from
>     nonimmediate_operand to register_mmxmem_operand.
>     (addv2sf3): New expander.
>     (*mmx_addv2sf3): Add SSE/AVX alternatives.  Change operand
>     predicates from nonimmediate_operand to register_mmxmem_operand.
>     Enable instruction pattern for TARGET_MMX_WITH_SSE.
>
>     (mmx_subv2sf3): Change operand predicate from
>     nonimmediate_operand to register_mmxmem_operand.
>     (mmx_subrv2sf3): Ditto.
>     (subv2sf3): New expander.
>     (*mmx_subv2sf3): Add SSE/AVX alternatives.  Change operand
>     predicates from nonimmediate_operand to register_mmxmem_operand.
>     Enable instruction pattern for TARGET_MMX_WITH_SSE.
>
>     (mmx_mulv2sf3): Change operand predicates from
>     nonimmediate_operand to register_mmxmem_operand.
>     (mulv2sf3): New expander.
>     (*mmx_mulv2sf3): Add SSE/AVX alternatives.  Change operand
>     predicates from nonimmediate_operand to register_mmxmem_operand.
>     Enable instruction pattern for TARGET_MMX_WITH_SSE.
>
>     (mmx_<code>v2sf3): Change operand predicates from
>     nonimmediate_operand to register_mmxmem_operand.
>     (<code>v2sf3): New expander.
>     (*mmx_<code>v2sf3): Add SSE/AVX alternatives.  Change operand
>     predicates from nonimmediate_operand to register_mmxmem_operand.
>     Enable instruction pattern for TARGET_MMX_WITH_SSE.
>     (mmx_ieee_<ieee_maxmin>v2sf3): Ditto.
>
> testsuite/ChangeLog:
>
> 2020-05-11  Uroš Bizjak  <ubizjak@gmail.com>
>
>     PR target/95046
>     * gcc.target/i386/pr95046-1.c: New test.
>
> Bootstrapped and regression tested on x86_64-linux-gnu {-m32}.
>
> Committed to mainline.
>
> Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b40f443ba8a..d1c0e354162 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21007,9 +21007,11 @@  ix86_vector_mode_supported_p (machine_mode mode)
     return true;
   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
     return true;
-  if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
+  if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
+      && VALID_MMX_REG_MODE (mode))
     return true;
-  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
+  if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+      && VALID_MMX_REG_MODE_3DNOW (mode))
     return true;
   return false;
 }
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 472f90f9bc1..d3e0004d3a0 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -175,7 +175,13 @@ 
 		    ]
 		    (const_string "TI"))
 
-	    (and (eq_attr "alternative" "13,14")
+	    (and (eq_attr "alternative" "13")
+		 (ior (and (match_test "<MODE>mode == V2SFmode")
+			   (not (match_test "TARGET_MMX_WITH_SSE")))
+		      (not (match_test "TARGET_SSE2"))))
+	      (const_string "V2SF")
+
+	    (and (eq_attr "alternative" "14")
 	    	 (ior (match_test "<MODE>mode == V2SFmode")
 		      (not (match_test "TARGET_SSE2"))))
 	      (const_string "V2SF")
@@ -235,67 +241,112 @@ 
 (define_expand "mmx_addv2sf3"
   [(set (match_operand:V2SF 0 "register_operand")
 	(plus:V2SF
-	  (match_operand:V2SF 1 "nonimmediate_operand")
-	  (match_operand:V2SF 2 "nonimmediate_operand")))]
+	  (match_operand:V2SF 1 "register_mmxmem_operand")
+	  (match_operand:V2SF 2 "register_mmxmem_operand")))]
   "TARGET_3DNOW"
   "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
 
+(define_expand "addv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(plus:V2SF
+	  (match_operand:V2SF 1 "register_operand")
+	  (match_operand:V2SF 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
+
 (define_insn "*mmx_addv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-	(plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
-  "pfadd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2SF")])
+  [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
+	(plus:V2SF
+	  (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv")
+	  (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
+  "@
+   pfadd\t{%2, %0|%0, %2}
+   addps\t{%2, %0|%0, %2}
+   vaddps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,sse2_noavx,avx")
+   (set_attr "mmx_isa" "native,*,*")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "prefix_extra" "1,*,*")
+   (set_attr "mode" "V2SF,V4SF,V4SF")])
 
 (define_expand "mmx_subv2sf3"
   [(set (match_operand:V2SF 0 "register_operand")
         (minus:V2SF (match_operand:V2SF 1 "register_operand")
-		    (match_operand:V2SF 2 "nonimmediate_operand")))]
+		    (match_operand:V2SF 2 "register_mmxmem_operand")))]
   "TARGET_3DNOW")
 
 (define_expand "mmx_subrv2sf3"
   [(set (match_operand:V2SF 0 "register_operand")
         (minus:V2SF (match_operand:V2SF 2 "register_operand")
-		    (match_operand:V2SF 1 "nonimmediate_operand")))]
+		    (match_operand:V2SF 1 "register_mmxmem_operand")))]
   "TARGET_3DNOW")
 
+(define_expand "subv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(minus:V2SF
+	  (match_operand:V2SF 1 "register_operand")
+	  (match_operand:V2SF 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (MINUS, V2SFmode, operands);")
+
 (define_insn "*mmx_subv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y,y")
-        (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
-		    (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
-  "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  [(set (match_operand:V2SF 0 "register_operand" "=y,y,x,Yv")
+        (minus:V2SF
+	  (match_operand:V2SF 1 "register_mmxmem_operand" "0,ym,0,Yv")
+	  (match_operand:V2SF 2 "register_mmxmem_operand" "ym,0,x,Yv")))]
+  "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    pfsub\t{%2, %0|%0, %2}
-   pfsubr\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2SF")])
+   pfsubr\t{%1, %0|%0, %1}
+   subps\t{%2, %0|%0, %2}
+   vsubps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,sse2_noavx,avx")
+   (set_attr "mmx_isa" "native,native,*,*")
+   (set_attr "type" "mmxadd,mmxadd,sseadd,sseadd")
+   (set_attr "prefix_extra" "1,1,*,*")
+   (set_attr "mode" "V2SF,V2SF,V4SF,V4SF")])
 
 (define_expand "mmx_mulv2sf3"
   [(set (match_operand:V2SF 0 "register_operand")
-	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
-		   (match_operand:V2SF 2 "nonimmediate_operand")))]
+	(mult:V2SF (match_operand:V2SF 1 "register_mmxmem_operand")
+		   (match_operand:V2SF 2 "register_mmxmem_operand")))]
   "TARGET_3DNOW"
   "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
 
+(define_expand "mulv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(mult:V2SF
+	  (match_operand:V2SF 1 "register_operand")
+	  (match_operand:V2SF 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
+
 (define_insn "*mmx_mulv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
-  "pfmul\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2SF")])
+  [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
+	(mult:V2SF
+	  (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv")
+	  (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
+  "@
+   pfmul\t{%2, %0|%0, %2}
+   mulps\t{%2, %0|%0, %2}
+   vmulps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,sse2_noavx,avx")
+   (set_attr "mmx_isa" "native,*,*")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "prefix_extra" "1,*,*")
+   (set_attr "btver2_decode" "*,direct,double")
+   (set_attr "mode" "V2SF,V4SF,V4SF")])
 
 (define_expand "mmx_<code>v2sf3"
   [(set (match_operand:V2SF 0 "register_operand")
         (smaxmin:V2SF
-	  (match_operand:V2SF 1 "nonimmediate_operand")
-	  (match_operand:V2SF 2 "nonimmediate_operand")))]
+	  (match_operand:V2SF 1 "register_mmxmem_operand")
+	  (match_operand:V2SF 2 "register_mmxmem_operand")))]
   "TARGET_3DNOW"
 {
   if (!flag_finite_math_only || flag_signed_zeros)
@@ -309,21 +360,45 @@ 
     ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
 })
 
+(define_expand "<code>v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+        (smaxmin:V2SF
+	  (match_operand:V2SF 1 "register_operand")
+	  (match_operand:V2SF 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
+{
+  if (!flag_finite_math_only || flag_signed_zeros)
+    {
+      emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3
+		 (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
+})
+
 ;; These versions of the min/max patterns are intentionally ignorant of
 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
 ;; are undefined in this condition, we're certain this is correct.
 
 (define_insn "*mmx_<code>v2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
+  [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
         (smaxmin:V2SF
-	  (match_operand:V2SF 1 "nonimmediate_operand" "%0")
-	  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
-  "pf<maxmin_float>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2SF")])
+	  (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv")
+	  (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
+  "@
+   pf<maxmin_float>\t{%2, %0|%0, %2}
+   <maxmin_float>ps\t{%2, %0|%0, %2}
+   v<maxmin_float>ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,sse2_noavx,avx")
+   (set_attr "mmx_isa" "native,*,*")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "btver2_sse_attr" "*,maxmin,maxmin")
+   (set_attr "prefix_extra" "1,*,*")
+   (set_attr "mode" "V2SF,V4SF,V4SF")])
 
 ;; These versions of the min/max patterns implement exactly the operations
 ;;   min = (op1 < op2 ? op1 : op2)
@@ -332,16 +407,22 @@ 
 ;; presence of -0.0 and NaN.
 
 (define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
+  [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
         (unspec:V2SF
-	  [(match_operand:V2SF 1 "register_operand" "0")
-	   (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+	  [(match_operand:V2SF 1 "register_operand" "0,0,Yv")
+	   (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")]
 	  IEEE_MAXMIN))]
-  "TARGET_3DNOW"
-  "pf<ieee_maxmin>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "mode" "V2SF")])
+  "TARGET_3DNOW || TARGET_MMX_WITH_SSE"
+  "@
+   pf<ieee_maxmin>\t{%2, %0|%0, %2}
+   <ieee_maxmin>ps\t{%2, %0|%0, %2}
+   v<ieee_maxmin>ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,sse2_noavx,avx")
+   (set_attr "mmx_isa" "native,*,*")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "btver2_sse_attr" "*,maxmin,maxmin")
+   (set_attr "prefix_extra" "1,*,*")
+   (set_attr "mode" "V2SF,V4SF,V4SF")])
 
 (define_insn "mmx_rcpv2sf2"
   [(set (match_operand:V2SF 0 "register_operand" "=y")