@@ -660,6 +660,76 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision FMA multiply/accumulate instructions.
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "fmav2sf4"
+ [(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
+ (fma:V2SF
+ (match_operand:V2SF 1 "register_operand" "%0,v,x")
+ (match_operand:V2SF 2 "register_operand" "v,v,x")
+ (match_operand:V2SF 3 "register_operand" "v,0,x")))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE"
+ "@
+ vfmadd132ps\t{%2, %3, %0|%0, %3, %2}
+ vfmadd231ps\t{%2, %1, %0|%0, %1, %2}
+ vfmaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "fmsv2sf4"
+ [(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
+ (fma:V2SF
+ (match_operand:V2SF 1 "register_operand" "%0,v,x")
+ (match_operand:V2SF 2 "register_operand" "v,v,x")
+ (neg:V2SF
+ (match_operand:V2SF 3 "register_operand" "v,0,x"))))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE"
+ "@
+ vfmsub132ps\t{%2, %3, %0|%0, %3, %2}
+ vfmsub231ps\t{%2, %1, %0|%0, %1, %2}
+ vfmsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "fnmav2sf4"
+ [(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
+ (fma:V2SF
+ (neg:V2SF
+ (match_operand:V2SF 1 "register_operand" "%0,v,x"))
+ (match_operand:V2SF 2 "register_operand" "v,v,x")
+ (match_operand:V2SF 3 "register_operand" "v,0,x")))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE"
+ "@
+ vfnmadd132ps\t{%2, %3, %0|%0, %3, %2}
+ vfnmadd231ps\t{%2, %1, %0|%0, %1, %2}
+ vfnmaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "fnmsv2sf4"
+ [(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
+ (fma:V2SF
+ (neg:V2SF
+ (match_operand:V2SF 1 "register_operand" "%0,v,x"))
+ (match_operand:V2SF 2 "register_operand" "v,v,x")
+ (neg:V2SF
+ (match_operand:V2SF 3 "register_operand" "v,0,x"))))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE"
+ "@
+ vfnmsub132ps\t{%2, %3, %0|%0, %3, %2}
+ vfnmsub231ps\t{%2, %1, %0|%0, %1, %2}
+ vfnmsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "V4SF")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point conversion operations
new file mode 100644
@@ -0,0 +1,42 @@
+/* PR target/95046 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O3 -mfma" } */
+
+
+float r[2], a[2], b[2], c[2];
+
+void
+test_fma (void)
+{
+ for (int i = 0; i < 2; i++)
+ r[i] = a[i] * b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler "\tvfmadd\[123\]+ps" } } */
+
+void
+test_fms (void)
+{
+ for (int i = 0; i < 2; i++)
+ r[i] = a[i] * b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler "\tvfmsub\[123\]+ps" } } */
+
+void
+test_fnma (void)
+{
+ for (int i = 0; i < 2; i++)
+ r[i] = -(a[i] * b[i]) + c[i];
+}
+
+/* { dg-final { scan-assembler "\tvfnmadd\[123\]+ps" } } */
+
+void
+test_fnms (void)
+{
+ for (int i = 0; i < 2; i++)
+ r[i] = -(a[i] * b[i]) - c[i];
+}
+
+/* { dg-final { scan-assembler "\tvfnmsub\[123\]+ps" } } */