@@ -1,3 +1,17 @@
+2010-10-19 Richard Henderson <rth@redhat.com>
+
+ * config/i386/i386.c (bdesc_multi_arg): Use fma4i_fmadd_<mode>.
+ * config/i386/sse.md (fma<mode>4): Enable for FMA & SSE_MATH.
+ (fma4i_fmadd_<mode>): New.
+ (*split_fma, *split_fms, *split_fnma, *split_fnms): Rename from
+ fma4_fm*_<mode> and adjust to be pre-reload splitters to the
+ standard fma patterns.
+ (fmaddsub_<mode>): Rename from fma4i_fmaddsub_<mode> and
+ enable for FMA.
+ (*fma_fmadd_<mode>, *fma_fmsub_<mode>): New.
+ (*fma_fmadd_<mode>, *fma_fmsub_<mode>): New.
+ (*fma_fmaddsub_<mode>, *fma_fmsubadd_<mode>): New.
+
2010-10-19 Paul Koning <pkoning@equallogic.com>
* lower-subreg.c (resolve_shift_zext): Delete conditional code for
@@ -23931,18 +23931,38 @@ static const struct builtin_description bdesc_args[] =
static const struct builtin_description bdesc_multi_arg[] =
{
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
-
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav8sf4, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav4df4, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
-
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v4sf, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v2df, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v8sf, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v4df, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
+ "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
+ UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
+ "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
+ UNKNOWN, (int)MULTI_ARG_3_DF },
+
+ { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
+ "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
+ UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
+ "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
+ UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
+ "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
+ UNKNOWN, (int)MULTI_ARG_3_SF2 },
+ { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
+ "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
+ UNKNOWN, (int)MULTI_ARG_3_DF2 },
+
+ { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
+ "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
+ UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
+ "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
+ UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
+ "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
+ UNKNOWN, (int)MULTI_ARG_3_SF2 },
+ { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
+ "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
+ UNKNOWN, (int)MULTI_ARG_3_DF2 },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
@@ -1770,13 +1770,24 @@
;; Intrinsic FMA operations.
+;; The standard name for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand")
(match_operand:FMAMODE 2 "nonimmediate_operand")
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
- "TARGET_FMA4"
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+ "")
+
+;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
+(define_expand "fma4i_fmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (fma:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand")
+ (match_operand:FMAMODE 2 "nonimmediate_operand")
+ (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+ "TARGET_FMA || TARGET_FMA4"
"")
(define_insn "*fma4i_fmadd_<mode>"
@@ -1904,61 +1915,6 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-;; Non-intrinsic versions, matched when fused-multiply-add is allowed.
-
-(define_insn "*fma4_fmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
- (plus:FMAMODE
- (mult:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m"))
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
- "TARGET_FMA4 && TARGET_FUSED_MADD"
- "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;; Floating multiply and subtract.
-(define_insn "*fma4_fmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
- (minus:FMAMODE
- (mult:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m"))
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
- "TARGET_FMA4 && TARGET_FUSED_MADD"
- "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;; Floating point negative multiply and add.
-;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
-(define_insn "*fma4_fnmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
- (minus:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")
- (mult:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m"))))]
- "TARGET_FMA4 && TARGET_FUSED_MADD"
- "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;; Floating point negative multiply and subtract.
-(define_insn "*fma4_fnmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
- (minus:FMAMODE
- (mult:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m"))
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
- "TARGET_FMA4 && TARGET_FUSED_MADD"
- "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; FMA4 Parallel floating point multiply addsub and subadd operations.
@@ -1974,7 +1930,17 @@
;;
;; But this doesn't seem useful in practice.
-(define_insn "fma4i_fmaddsub_<mode>"
+(define_expand "fmaddsub_<mode>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "nonimmediate_operand")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand")
+ (match_operand:AVXMODEF2P 3 "nonimmediate_operand")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4"
+ "")
+
+(define_insn "*fma4_fmaddsub_<mode>"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
@@ -1984,9 +1950,9 @@
"TARGET_FMA4"
"vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
- (set_attr "mode" "V8SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fmsubadd_<mode>"
+(define_insn "*fma4_fmsubadd_<mode>"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
@@ -1997,7 +1963,198 @@
"TARGET_FMA4"
"vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
- (set_attr "mode" "V8SF")])
+ (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; FMA3 floating point multiply/accumulate instructions.
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "*fma_fmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
+ "TARGET_FMA"
+ "@
+ vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:FMAMODE
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
+ "TARGET_FMA"
+ "@
+ vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (neg:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
+ "TARGET_FMA"
+ "@
+ vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (neg:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:FMAMODE
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
+ "TARGET_FMA"
+ "@
+ vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmaddsub_<mode>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA"
+ "@
+ vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsubadd_<mode>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:AVXMODEF2P
+ (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0"))]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA"
+ "@
+ vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Non-intrinsic versions, matched when fused-multiply-add is allowed.
+;;
+;; ??? If fused-madd were a generic flag, combine could do this without
+;; needing splitters here in the backend. Irritatingly, combine won't
+;; recognize many of these with mere splits, since only 3 or more insns
+;; are allowed to split during combine. Thankfully, there's always a
+;; split_all_insns pass that runs before reload.
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_and_split "*split_fma"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (plus:FMAMODE
+ (mult:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand")
+ (match_operand:FMAMODE 2 "nonimmediate_operand"))
+ (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+ "TARGET_SSE_MATH && TARGET_FUSED_MADD
+ && (TARGET_FMA || TARGET_FMA4)
+ && !(reload_in_progress || reload_completed)"
+ { gcc_unreachable (); }
+ "&& 1"
+ [(set (match_dup 0)
+ (fma:FMAMODE
+ (match_dup 1)
+ (match_dup 2)
+ (match_dup 3)))]
+ "")
+
+;; Floating multiply and subtract.
+(define_insn_and_split "*split_fms"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (minus:FMAMODE
+ (mult:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand")
+ (match_operand:FMAMODE 2 "nonimmediate_operand"))
+ (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+ "TARGET_SSE_MATH && TARGET_FUSED_MADD
+ && (TARGET_FMA || TARGET_FMA4)
+ && !(reload_in_progress || reload_completed)"
+ { gcc_unreachable (); }
+ "&& 1"
+ [(set (match_dup 0)
+ (fma:FMAMODE
+ (match_dup 1)
+ (match_dup 2)
+ (neg:FMAMODE (match_dup 3))))]
+ "")
+
+;; Floating point negative multiply and add.
+;; Recognize (-a * b + c) via the canonical form: c - (a * b).
+(define_insn_and_split "*split_fnma"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (minus:FMAMODE
+ (match_operand:FMAMODE 3 "nonimmediate_operand")
+ (mult:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand")
+ (match_operand:FMAMODE 2 "nonimmediate_operand"))))]
+ "TARGET_SSE_MATH && TARGET_FUSED_MADD
+ && (TARGET_FMA || TARGET_FMA4)
+ && !(reload_in_progress || reload_completed)"
+ { gcc_unreachable (); }
+ "&& 1"
+ [(set (match_dup 0)
+ (fma:FMAMODE
+ (neg:FMAMODE (match_dup 1))
+ (match_dup 2)
+ (match_dup 3)))]
+ "")
+
+;; Floating point negative multiply and subtract.
+;; Recognize (-a * b - c) via the canonical form: c - (-a * b).
+(define_insn_and_split "*split_fnms"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (minus:FMAMODE
+ (mult:FMAMODE
+ (neg:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand"))
+ (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+ "TARGET_SSE_MATH && TARGET_FUSED_MADD
+ && (TARGET_FMA || TARGET_FMA4)
+ && !(reload_in_progress || reload_completed)"
+ { gcc_unreachable (); }
+ "&& 1"
+ [(set (match_dup 0)
+ (fma:FMAMODE
+ (neg:FMAMODE (match_dup 1))
+ (match_dup 2)
+ (neg:FMAMODE (match_dup 3))))]
+ "")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
@@ -1,3 +1,9 @@
+2010-10-19 Richard Henderson <rth@redhat.com>
+
+ * gcc.target/i386/fma3-fma.c: New.
+ * gcc.target/i386/fma3-builtin.c: New.
+ * gcc.target/i386/fma4-builtin.c: New.
+
2010-10-19 Richard Guenther <rguenther@suse.de>
PR testsuite/46081
new file mode 100644
@@ -0,0 +1,82 @@
+/* Test that the compiler properly generates floating point multiply
+ and add instructions FMA3 systems. */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mfma -mno-fma4" } */
+
+#ifndef __FP_FAST_FMAF
+# error "__FP_FAST_FMAF should be defined"
+#endif
+#ifndef __FP_FAST_FMA
+# error "__FP_FAST_FMA should be defined"
+#endif
+
+float
+flt_mul_add (float a, float b, float c)
+{
+ return __builtin_fmaf (a, b, c);
+}
+
+double
+dbl_mul_add (double a, double b, double c)
+{
+ return __builtin_fma (a, b, c);
+}
+
+float
+flt_mul_sub (float a, float b, float c)
+{
+ return __builtin_fmaf (a, b, -c);
+}
+
+double
+dbl_mul_sub (double a, double b, double c)
+{
+ return __builtin_fma (a, b, -c);
+}
+
+float
+flt_neg_mul_add_1 (float a, float b, float c)
+{
+ return __builtin_fmaf (-a, b, c);
+}
+
+double
+dbl_neg_mul_add_1 (double a, double b, double c)
+{
+ return __builtin_fma (-a, b, c);
+}
+
+float
+flt_neg_mul_add_2 (float a, float b, float c)
+{
+ return __builtin_fmaf (a, -b, c);
+}
+
+double
+dbl_neg_mul_add_2 (double a, double b, double c)
+{
+ return __builtin_fma (a, -b, c);
+}
+
+float
+flt_neg_mul_sub (float a, float b, float c)
+{
+ return __builtin_fmaf (-a, b, -c);
+}
+
+double
+dbl_neg_mul_sub (double a, double b, double c)
+{
+ return __builtin_fma (-a, b, -c);
+}
+
+/* { dg-final { scan-assembler-times "vfmadd...ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...sd" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...sd" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd" 1 } } */
new file mode 100644
@@ -0,0 +1,82 @@
+/* Test that the compiler properly optimizes floating point multiply
+ and add instructions FMA3 systems. */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mfma -mno-fma4" } */
+
+extern void exit (int);
+
+float
+flt_mul_add (float a, float b, float c)
+{
+ return (a * b) + c;
+}
+
+double
+dbl_mul_add (double a, double b, double c)
+{
+ return (a * b) + c;
+}
+
+float
+flt_mul_sub (float a, float b, float c)
+{
+ return (a * b) - c;
+}
+
+double
+dbl_mul_sub (double a, double b, double c)
+{
+ return (a * b) - c;
+}
+
+float
+flt_neg_mul_add (float a, float b, float c)
+{
+ return (-(a * b)) + c;
+}
+
+double
+dbl_neg_mul_add (double a, double b, double c)
+{
+ return (-(a * b)) + c;
+}
+
+float
+flt_neg_mul_sub (float a, float b, float c)
+{
+ return (-(a * b)) - c;
+}
+
+double
+dbl_neg_mul_sub (double a, double b, double c)
+{
+ return (-(a * b)) - c;
+}
+
+float f[10] = { 2, 3, 4 };
+double d[10] = { 2, 3, 4 };
+
+int main ()
+{
+ f[3] = flt_mul_add (f[0], f[1], f[2]);
+ f[4] = flt_mul_sub (f[0], f[1], f[2]);
+ f[5] = flt_neg_mul_add (f[0], f[1], f[2]);
+ f[6] = flt_neg_mul_sub (f[0], f[1], f[2]);
+
+ d[3] = dbl_mul_add (d[0], d[1], d[2]);
+ d[4] = dbl_mul_sub (d[0], d[1], d[2]);
+ d[5] = dbl_neg_mul_add (d[0], d[1], d[2]);
+ d[6] = dbl_neg_mul_sub (d[0], d[1], d[2]);
+ exit (0);
+}
+
+/* { dg-final { scan-assembler "vfmadd...ss" } } */
+/* { dg-final { scan-assembler "vfmadd...sd" } } */
+/* { dg-final { scan-assembler "vfmsub...ss" } } */
+/* { dg-final { scan-assembler "vfmsub...sd" } } */
+/* { dg-final { scan-assembler "vfnmadd...ss" } } */
+/* { dg-final { scan-assembler "vfnmadd...sd" } } */
+/* { dg-final { scan-assembler "vfnmsub...ss" } } */
+/* { dg-final { scan-assembler "vfnmsub...sd" } } */
new file mode 100644
@@ -0,0 +1,82 @@
+/* Test that the compiler properly generates floating point multiply
+ and add instructions FMA4 systems. */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mfma4" } */
+
+#ifndef __FP_FAST_FMAF
+# error "__FP_FAST_FMAF should be defined"
+#endif
+#ifndef __FP_FAST_FMA
+# error "__FP_FAST_FMA should be defined"
+#endif
+
+float
+flt_mul_add (float a, float b, float c)
+{
+ return __builtin_fmaf (a, b, c);
+}
+
+double
+dbl_mul_add (double a, double b, double c)
+{
+ return __builtin_fma (a, b, c);
+}
+
+float
+flt_mul_sub (float a, float b, float c)
+{
+ return __builtin_fmaf (a, b, -c);
+}
+
+double
+dbl_mul_sub (double a, double b, double c)
+{
+ return __builtin_fma (a, b, -c);
+}
+
+float
+flt_neg_mul_add_1 (float a, float b, float c)
+{
+ return __builtin_fmaf (-a, b, c);
+}
+
+double
+dbl_neg_mul_add_1 (double a, double b, double c)
+{
+ return __builtin_fma (-a, b, c);
+}
+
+float
+flt_neg_mul_add_2 (float a, float b, float c)
+{
+ return __builtin_fmaf (a, -b, c);
+}
+
+double
+dbl_neg_mul_add_2 (double a, double b, double c)
+{
+ return __builtin_fma (a, -b, c);
+}
+
+float
+flt_neg_mul_sub (float a, float b, float c)
+{
+ return __builtin_fmaf (-a, b, -c);
+}
+
+double
+dbl_neg_mul_sub (double a, double b, double c)
+{
+ return __builtin_fma (-a, b, -c);
+}
+
+/* { dg-final { scan-assembler-times "vfmaddss" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsd" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubss" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubsd" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmaddss" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmaddsd" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsubss" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsubsd" 1 } } */