===================================================================
@@ -182,7 +182,8 @@ and are lvalues (so they can be used for
@item RTX_TERNARY
An RTX code for other three input operations. Currently only
-@code{IF_THEN_ELSE} and @code{VEC_MERGE}.
+@code{IF_THEN_ELSE}, @code{VEC_MERGE}, @code{SIGN_EXTRACT},
+@code{ZERO_EXTRACT}, and @code{FMA}.
@item RTX_INSN
An RTX code for an entire instruction: @code{INSN}, @code{JUMP_INSN}, and
@@ -2234,6 +2235,12 @@ not be the same.
For unsigned widening multiplication, use the same idiom, but with
@code{zero_extend} instead of @code{sign_extend}.
+@findex fma
+@item (fma:@var{m} @var{x} @var{y} @var{z})
+Represents the @code{fma}, @code{fmaf}, and @code{fmal} builtin
+functions that do a combined multiply of @var{x} and @var{y} and then
+adding to@var{z} without doing an intermediate rounding step.
+
@findex div
@findex ss_div
@cindex division
===================================================================
@@ -3948,6 +3948,16 @@ means of constraints requiring operands
@itemx @samp{and@var{m}3}, @samp{ior@var{m}3}, @samp{xor@var{m}3}
Similar, for other arithmetic operations.
+@cindex @code{fma@var{m}4} instruction pattern
+@item @samp{fma@var{m}4}
+Multiply operand 2 and operand 1, then add operand 3, storing the
+result in operand 0. All operands must have mode @var{m}. This
+pattern is used to implement the @code{fma}, @code{fmas}, and
+@code{fmal} builtin functions from the ISO C99 standard. The
+@code{fma} operation may produce different results than doing the
+multiply followed by the add if the machine does not perform a
+rounding step between the operations.
+
@cindex @code{min@var{m}3} instruction pattern
@cindex @code{max@var{m}3} instruction pattern
@item @samp{smin@var{m}3}, @samp{smax@var{m}3}
===================================================================
@@ -260,6 +260,27 @@ builtin_define_float_constants (const ch
NaN has quiet NaNs. */
sprintf (name, "__%s_HAS_QUIET_NAN__", name_prefix);
builtin_define_with_int_value (name, MODE_HAS_NANS (TYPE_MODE (type)));
+
+ /* Note, whether the port has builtin FMA support. */
+#ifdef HAVE_fmasf4
+ if (HAVE_fmasf4 && FLOAT_TYPE_SIZE == 32)
+ builtin_define_with_int_value ("__FP_FAST_FMAS", 1);
+#endif
+
+#ifdef HAVE_fmadf4
+ if (HAVE_fmadf4 && DOUBLE_TYPE_SIZE == 64)
+ builtin_define_with_int_value ("__FP_FAST_FMA", 1);
+#endif
+
+#ifdef HAVE_fmatf4
+ if (HAVE_fmatf4 && LONG_DOUBLE_TYPE_SIZE == 128)
+ builtin_define_with_int_value ("__FP_FAST_FMAL", 1);
+#endif
+
+#ifdef HAVE_fmaxf4
+ if (HAVE_fmaxf4 && LONG_DOUBLE_TYPE_SIZE > 64 && LONG_DOUBLE_TYPE_SIZE < 128)
+ builtin_define_with_int_value ("__FP_FAST_FMAL", 1);
+#endif
}
/* Define __DECx__ constants for TYPE using NAME_PREFIX and SUFFIX. */
===================================================================
@@ -0,0 +1,183 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math -mno-fused-madd" } */
+/* { dg-final { scan-assembler-times "xvmadd" 2 } } */
+/* { dg-final { scan-assembler-times "xsmadd" 1 } } */
+/* { dg-final { scan-assembler-times "fmadds" 1 } } */
+/* { dg-final { scan-assembler-times "xvmsub" 2 } } */
+/* { dg-final { scan-assembler-times "xsmsub" 1 } } */
+/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
+/* { dg-final { scan-assembler-times "xvnmadd" 2 } } */
+/* { dg-final { scan-assembler-times "xsnmadd" 1 } } */
+/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
+/* { dg-final { scan-assembler-times "xvnmsub" 2 } } */
+/* { dg-final { scan-assembler-times "xsnmsub" 1 } } */
+/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
+
+/* Only the functions calling the bulitin should generate an appropriate (a *
+ b) + c instruction. */
+
+double
+builtin_fma (double b, double c, double d)
+{
+ return __builtin_fma (b, c, d); /* xsmadd{a,m}dp */
+}
+
+double
+builtin_fms (double b, double c, double d)
+{
+ return __builtin_fma (b, c, -d) /* xsmsub{a,b}dp */;
+}
+
+double
+builtin_fnma (double b, double c, double d)
+{
+ return - __builtin_fma (b, c, d); /* xsnmadd{a,b}dp */
+}
+
+double
+builtin_fnms (double b, double c, double d)
+{
+ return - __builtin_fma (b, c, -d); /* xsnmsub{a,b}dp */
+}
+
+float
+builtin_fmaf (float b, float c, float d)
+{
+ return __builtin_fmaf (b, c, d); /* fmadds */
+}
+
+float
+builtin_fmsf (float b, float c, float d)
+{
+ return __builtin_fmaf (b, c, -d); /* fmsubs */
+}
+
+float
+builtin_fnmaf (float b, float c, float d)
+{
+ return - __builtin_fmaf (b, c, d); /* fnmadds */
+}
+
+float
+builtin_fnmsf (float b, float c, float d)
+{
+ return - __builtin_fmaf (b, c, -d); /* fnmsubs */
+}
+
+double
+normal_fma (double b, double c, double d)
+{
+ return (b * c) + d; /* fmul/fadd */
+}
+
+float
+normal_fmaf (float b, float c, float d)
+{
+ return (b * c) + d; /* fmuls/fadds */
+}
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+double vda[SIZE] __attribute__((__aligned__(32)));
+double vdb[SIZE] __attribute__((__aligned__(32)));
+double vdc[SIZE] __attribute__((__aligned__(32)));
+double vdd[SIZE] __attribute__((__aligned__(32)));
+
+float vfa[SIZE] __attribute__((__aligned__(32)));
+float vfb[SIZE] __attribute__((__aligned__(32)));
+float vfc[SIZE] __attribute__((__aligned__(32)));
+float vfd[SIZE] __attribute__((__aligned__(32)));
+
+void
+vector_fma (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]); /* xvmadd{a,m}dp */
+}
+
+void
+vector_fms (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]); /* xvmsub{a,m}dp */
+}
+
+void
+vector_fnma (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vda[i] = - __builtin_fma (vdb[i], vdc[i], vdd[i]); /* xvnmadd{a,m}dp */
+}
+
+void
+vector_fnms (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vda[i] = - __builtin_fma (vdb[i], vdc[i], -vdd[i]); /* xvnmsub{a,m}dp */
+}
+
+void
+vector_fmaf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* xvmadd{a,m}sp */
+}
+
+void
+vector_fmsf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); /* xvmsub{a,m}sp */
+}
+
+void
+vector_fnmaf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* xvnmadd{a,m}sp */
+}
+
+void
+vector_fnmsf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); /* xvnmsub{a,m}sp */
+}
+
+void
+vnormal_fma (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vda[i] = (vdb[i] * vdc[i]) + vdd[i]; /* xvmadd{a,m}dp */
+}
+
+void
+vnormal_fmaf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = (vfb[i] * vfc[i]) + vfd[i]; /* xvmadd{a,m}sp */
+}
===================================================================
@@ -0,0 +1,103 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O3 -ftree-vectorize -mcpu=power6 -maltivec -ffast-math" } */
+/* { dg-final { scan-assembler-times "vmaddfp" 2 } } */
+/* { dg-final { scan-assembler-times "fmadd " 2 } } */
+/* { dg-final { scan-assembler-times "fmadds" 2 } } */
+/* { dg-final { scan-assembler-times "fmsub " 1 } } */
+/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
+/* { dg-final { scan-assembler-times "fnmadd " 1 } } */
+/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
+/* { dg-final { scan-assembler-times "fnmsub " 1 } } */
+/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
+
+/* All functions should generate an appropriate (a * b) + c instruction
+ since -mfused-madd is on by default. */
+
+double
+builtin_fma (double b, double c, double d)
+{
+ return __builtin_fma (b, c, d); /* fmadd */
+}
+
+double
+builtin_fms (double b, double c, double d)
+{
+ return __builtin_fma (b, c, -d); /* fmsub */
+}
+
+double
+builtin_fnma (double b, double c, double d)
+{
+ return - __builtin_fma (b, c, d); /* fnmadd */
+}
+
+double
+builtin_fnms (double b, double c, double d)
+{
+ return - __builtin_fma (b, c, -d); /* fnmsub */
+}
+
+float
+builtin_fmaf (float b, float c, float d)
+{
+ return __builtin_fmaf (b, c, d); /* fmadds */
+}
+
+float
+builtin_fmsf (float b, float c, float d)
+{
+ return __builtin_fmaf (b, c, -d); /* fmsubs */
+}
+
+float
+builtin_fnmaf (float b, float c, float d)
+{
+ return - __builtin_fmaf (b, c, d); /* fnmadds */
+}
+
+float
+builtin_fnmsf (float b, float c, float d)
+{
+ return - __builtin_fmaf (b, c, -d); /* fnmsubs */
+}
+
+double
+normal_fma (double b, double c, double d)
+{
+ return (b * c) + d; /* fmadd */
+}
+
+float
+normal_fmaf (float b, float c, float d)
+{
+ return (b * c) + d; /* fmadds */
+}
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+float vfa[SIZE] __attribute__((__aligned__(32)));
+float vfb[SIZE] __attribute__((__aligned__(32)));
+float vfc[SIZE] __attribute__((__aligned__(32)));
+float vfd[SIZE] __attribute__((__aligned__(32)));
+
+void
+vector_fmaf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* vaddfp */
+}
+
+void
+vnormal_fmaf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = (vfb[i] * vfc[i]) + vfd[i]; /* vaddfp */
+}
===================================================================
@@ -0,0 +1,94 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O3 -ftree-vectorize -mcpu=power6 -maltivec -ffast-math -mno-fused-madd" } */
+/* { dg-final { scan-assembler-times "vmaddfp" 1 } } */
+/* { dg-final { scan-assembler-times "fmadd " 1 } } */
+/* { dg-final { scan-assembler-times "fmadds" 1 } } */
+/* { dg-final { scan-assembler-times "fmsub " 1 } } */
+/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
+/* { dg-final { scan-assembler-times "fnmadd " 1 } } */
+/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
+/* { dg-final { scan-assembler-times "fnmsub " 1 } } */
+/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
+
+/* Only the functions calling the builtin should generate an appropriate
+ (a * b) + c instruction. */
+
+double
+builtin_fma (double b, double c, double d)
+{
+ return __builtin_fma (b, c, d); /* fmadd */
+}
+
+double
+builtin_fms (double b, double c, double d)
+{
+ return __builtin_fma (b, c, -d); /* fmsub */
+}
+
+double
+builtin_fnma (double b, double c, double d)
+{
+ return - __builtin_fma (b, c, d); /* fnmadd */
+}
+
+double
+builtin_fnms (double b, double c, double d)
+{
+ return - __builtin_fma (b, c, -d); /* fnmsub */
+}
+
+float
+builtin_fmaf (float b, float c, float d)
+{
+ return __builtin_fmaf (b, c, d); /* fmadds */
+}
+
+float
+builtin_fmsf (float b, float c, float d)
+{
+ return __builtin_fmaf (b, c, -d); /* fmsubs */
+}
+
+float
+builtin_fnmaf (float b, float c, float d)
+{
+ return - __builtin_fmaf (b, c, d); /* fnmadds */
+}
+
+float
+builtin_fnmsf (float b, float c, float d)
+{
+ return - __builtin_fmaf (b, c, -d); /* fnmsubs */
+}
+
+double
+normal_fma (double b, double c, double d)
+{
+ return (b * c) + d; /* fmul/fadd */
+}
+
+float
+normal_fmaf (float b, float c, float d)
+{
+ return (b * c) + d; /* fmuls/fadds */
+}
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+float vfa[SIZE] __attribute__((__aligned__(32)));
+float vfb[SIZE] __attribute__((__aligned__(32)));
+float vfc[SIZE] __attribute__((__aligned__(32)));
+float vfd[SIZE] __attribute__((__aligned__(32)));
+
+void
+vector_fmaf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* vaddfp */
+}
===================================================================
@@ -0,0 +1,183 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math" } */
+/* { dg-final { scan-assembler-times "xvmadd" 4 } } */
+/* { dg-final { scan-assembler-times "xsmadd" 2 } } */
+/* { dg-final { scan-assembler-times "fmadds" 2 } } */
+/* { dg-final { scan-assembler-times "xvmsub" 2 } } */
+/* { dg-final { scan-assembler-times "xsmsub" 1 } } */
+/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
+/* { dg-final { scan-assembler-times "xvnmadd" 2 } } */
+/* { dg-final { scan-assembler-times "xsnmadd" 1 } } */
+/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
+/* { dg-final { scan-assembler-times "xvnmsub" 2 } } */
+/* { dg-final { scan-assembler-times "xsnmsub" 1 } } */
+/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
+
+/* All functions should generate an appropriate (a * b) + c instruction
+ since -mfused-madd is on by default. */
+
+double
+builtin_fma (double b, double c, double d)
+{
+ return __builtin_fma (b, c, d); /* xsmadd{a,m}dp */
+}
+
+double
+builtin_fms (double b, double c, double d)
+{
+ return __builtin_fma (b, c, -d) /* xsmsub{a,b}dp */;
+}
+
+double
+builtin_fnma (double b, double c, double d)
+{
+ return - __builtin_fma (b, c, d); /* xsnmadd{a,b}dp */
+}
+
+double
+builtin_fnms (double b, double c, double d)
+{
+ return - __builtin_fma (b, c, -d); /* xsnmsub{a,b}dp */
+}
+
+float
+builtin_fmaf (float b, float c, float d)
+{
+ return __builtin_fmaf (b, c, d); /* fmadds */
+}
+
+float
+builtin_fmsf (float b, float c, float d)
+{
+ return __builtin_fmaf (b, c, -d); /* fmsubs */
+}
+
+float
+builtin_fnmaf (float b, float c, float d)
+{
+ return - __builtin_fmaf (b, c, d); /* fnmadds */
+}
+
+float
+builtin_fnmsf (float b, float c, float d)
+{
+ return - __builtin_fmaf (b, c, -d); /* fnmsubs */
+}
+
+double
+normal_fma (double b, double c, double d)
+{
+ return (b * c) + d; /* xsmadd{a,m}dp */
+}
+
+float
+normal_fmaf (float b, float c, float d)
+{
+ return (b * c) + d; /* fmadds */
+}
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+double vda[SIZE] __attribute__((__aligned__(32)));
+double vdb[SIZE] __attribute__((__aligned__(32)));
+double vdc[SIZE] __attribute__((__aligned__(32)));
+double vdd[SIZE] __attribute__((__aligned__(32)));
+
+float vfa[SIZE] __attribute__((__aligned__(32)));
+float vfb[SIZE] __attribute__((__aligned__(32)));
+float vfc[SIZE] __attribute__((__aligned__(32)));
+float vfd[SIZE] __attribute__((__aligned__(32)));
+
+void
+vector_fma (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]); /* xvmadd{a,m}dp */
+}
+
+void
+vector_fms (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]); /* xvmsub{a,m}dp */
+}
+
+void
+vector_fnma (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vda[i] = - __builtin_fma (vdb[i], vdc[i], vdd[i]); /* xvnmadd{a,m}dp */
+}
+
+void
+vector_fnms (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vda[i] = - __builtin_fma (vdb[i], vdc[i], -vdd[i]); /* xvnmsub{a,m}dp */
+}
+
+void
+vector_fmaf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* xvmadd{a,m}sp */
+}
+
+void
+vector_fmsf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); /* xvmsub{a,m}sp */
+}
+
+void
+vector_fnmaf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], vfd[i]); /* xvnmadd{a,m}sp */
+}
+
+void
+vector_fnmsf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); /* xvnmsub{a,m}sp */
+}
+
+void
+vnormal_fma (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vda[i] = (vdb[i] * vdc[i]) + vdd[i]; /* xvmadd{a,m}dp */
+}
+
+void
+vnormal_fmaf (void)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ vfa[i] = (vfb[i] * vfc[i]) + vfd[i]; /* xvmadd{a,m}sp */
+}
===================================================================
@@ -706,6 +706,9 @@ DEF_RTL_EXPR(SS_TRUNCATE, "ss_truncate",
/* Unsigned saturating truncate. */
DEF_RTL_EXPR(US_TRUNCATE, "us_truncate", "e", RTX_UNARY)
+/* Floating point multiply/add combined instruction. */
+DEF_RTL_EXPR(FMA, "fma", "eee", RTX_TERNARY)
+
/* Information about the variable and its location. */
/* Changed 'te' to 'tei'; the 'i' field is for recording
initialization status of variables. */
===================================================================
@@ -4704,6 +4704,12 @@ simplify_ternary_operation (enum rtx_cod
switch (code)
{
+ /* At present, don't simplify fused multiply and add ops, because we need
+ to make sure there are no intermediate rounding steps used, and that
+ we get the right sign if negative 0 would be returned. */
+ case FMA:
+ return 0;
+
case SIGN_EXTRACT:
case ZERO_EXTRACT:
if (CONST_INT_P (op0)
===================================================================
@@ -194,11 +194,7 @@ (define_constants
(UNSPEC_VSX_CVUXDSP 507)
(UNSPEC_VSX_CVSPSXDS 508)
(UNSPEC_VSX_CVSPUXDS 509)
- (UNSPEC_VSX_MADD 510)
- (UNSPEC_VSX_MSUB 511)
- (UNSPEC_VSX_NMADD 512)
- (UNSPEC_VSX_NMSUB 513)
- ;; 514 deleted
+ ;; 510-514 deleted
(UNSPEC_VSX_TDIV 515)
(UNSPEC_VSX_TSQRT 516)
(UNSPEC_VSX_XXPERMDI 517)
@@ -499,19 +495,22 @@ (define_insn "*vsx_tsqrt<mode>2_internal
;; does not check -mfused-madd to allow users to use these ops when they know
;; they want the fused multiply/add.
+;; Fused multiply add. By default expand the FMA into (plus (mult)) to help
+;; loop unrolling. Don't do negate multiply ops, because of complications with
+;; honoring signed zero and fused-madd.
+
(define_expand "vsx_fmadd<mode>4"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "")
(plus:VSX_B
- (mult:VSX_B
- (match_operand:VSX_B 1 "vsx_register_operand" "")
- (match_operand:VSX_B 2 "vsx_register_operand" ""))
+ (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "")
+ (match_operand:VSX_B 2 "vsx_register_operand" ""))
(match_operand:VSX_B 3 "vsx_register_operand" "")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
{
if (!TARGET_FUSED_MADD)
{
- emit_insn (gen_vsx_fmadd<mode>4_2 (operands[0], operands[1], operands[2],
- operands[3]));
+ emit_insn (gen_vsx_fmadd<mode>4_2 (operands[0], operands[1],
+ operands[2], operands[3]));
DONE;
}
})
@@ -534,10 +533,9 @@ (define_insn "*vsx_fmadd<mode>4_1"
(define_insn "vsx_fmadd<mode>4_2"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
- UNSPEC_VSX_MADD))]
+ (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"@
x<VSv>madda<VSs> %x0,%x1,%x2
@@ -550,16 +548,15 @@ (define_insn "vsx_fmadd<mode>4_2"
(define_expand "vsx_fmsub<mode>4"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "")
(minus:VSX_B
- (mult:VSX_B
- (match_operand:VSX_B 1 "vsx_register_operand" "")
- (match_operand:VSX_B 2 "vsx_register_operand" ""))
+ (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "")
+ (match_operand:VSX_B 2 "vsx_register_operand" ""))
(match_operand:VSX_B 3 "vsx_register_operand" "")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
{
if (!TARGET_FUSED_MADD)
{
- emit_insn (gen_vsx_fmsub<mode>4_2 (operands[0], operands[1], operands[2],
- operands[3]));
+ emit_insn (gen_vsx_fmsub<mode>4_2 (operands[0], operands[1],
+ operands[2], operands[3]));
DONE;
}
})
@@ -582,10 +579,10 @@ (define_insn "*vsx_fmsub<mode>4_1"
(define_insn "vsx_fmsub<mode>4_2"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
- UNSPEC_VSX_MSUB))]
+ (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (neg:VSX_B
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"@
x<VSv>msuba<VSs> %x0,%x1,%x2
@@ -595,32 +592,21 @@ (define_insn "vsx_fmsub<mode>4_2"
[(set_attr "type" "<VStype_mul>")
(set_attr "fp_type" "<VSfptype_mul>")])
-(define_expand "vsx_fnmadd<mode>4"
- [(match_operand:VSX_B 0 "vsx_register_operand" "")
- (match_operand:VSX_B 1 "vsx_register_operand" "")
- (match_operand:VSX_B 2 "vsx_register_operand" "")
- (match_operand:VSX_B 3 "vsx_register_operand" "")]
+(define_insn "vsx_fnmadd<mode>4"
+ [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+ (neg:VSX_B
+ (fma:VSX_B
+ (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
-{
- if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (DFmode))
- {
- emit_insn (gen_vsx_fnmadd<mode>4_1 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
- else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
- {
- emit_insn (gen_vsx_fnmadd<mode>4_2 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
- else
- {
- emit_insn (gen_vsx_fnmadd<mode>4_3 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
-})
+ "@
+ x<VSv>nmadda<VSs> %x0,%x1,%x2
+ x<VSv>nmaddm<VSs> %x0,%x1,%x3
+ x<VSv>nmadda<VSs> %x0,%x1,%x2
+ x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+ [(set_attr "type" "<VStype_mul>")
+ (set_attr "fp_type" "<VSfptype_mul>")])
(define_insn "vsx_fnmadd<mode>4_1"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
@@ -658,48 +644,22 @@ (define_insn "vsx_fnmadd<mode>4_2"
[(set_attr "type" "<VStype_mul>")
(set_attr "fp_type" "<VSfptype_mul>")])
-(define_insn "vsx_fnmadd<mode>4_3"
+(define_insn "vsx_fnmsub<mode>4"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
- UNSPEC_VSX_NMADD))]
+ (neg:VSX_B
+ (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (neg:VSX_B
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"@
- x<VSv>nmadda<VSs> %x0,%x1,%x2
- x<VSv>nmaddm<VSs> %x0,%x1,%x3
- x<VSv>nmadda<VSs> %x0,%x1,%x2
- x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+ x<VSv>nmsuba<VSs> %x0,%x1,%x2
+ x<VSv>nmsubm<VSs> %x0,%x1,%x3
+ x<VSv>nmsuba<VSs> %x0,%x1,%x2
+ x<VSv>nmsubm<VSs> %x0,%x1,%x3"
[(set_attr "type" "<VStype_mul>")
(set_attr "fp_type" "<VSfptype_mul>")])
-(define_expand "vsx_fnmsub<mode>4"
- [(match_operand:VSX_B 0 "vsx_register_operand" "")
- (match_operand:VSX_B 1 "vsx_register_operand" "")
- (match_operand:VSX_B 2 "vsx_register_operand" "")
- (match_operand:VSX_B 3 "vsx_register_operand" "")]
- "VECTOR_UNIT_VSX_P (<MODE>mode)"
-{
- if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (DFmode))
- {
- emit_insn (gen_vsx_fnmsub<mode>4_1 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
- else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
- {
- emit_insn (gen_vsx_fnmsub<mode>4_2 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
- else
- {
- emit_insn (gen_vsx_fnmsub<mode>4_3 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
-})
-
(define_insn "vsx_fnmsub<mode>4_1"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
(neg:VSX_B
@@ -735,21 +695,6 @@ (define_insn "vsx_fnmsub<mode>4_2"
[(set_attr "type" "<VStype_mul>")
(set_attr "fp_type" "<VSfptype_mul>")])
-(define_insn "vsx_fnmsub<mode>4_3"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
- UNSPEC_VSX_NMSUB))]
- "VECTOR_UNIT_VSX_P (<MODE>mode)"
- "@
- x<VSv>nmsuba<VSs> %x0,%x1,%x2
- x<VSv>nmsubm<VSs> %x0,%x1,%x3
- x<VSv>nmsuba<VSs> %x0,%x1,%x2
- x<VSv>nmsubm<VSs> %x0,%x1,%x3"
- [(set_attr "type" "<VStype_mul>")
- (set_attr "fp_type" "<VSfptype_mul>")])
-
;; Vector conditional expressions (no scalar version for these instructions)
(define_insn "vsx_eq<mode>"
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
===================================================================
@@ -143,7 +143,6 @@ (define_constants
(UNSPEC_VUPKLS_V4SF 325)
(UNSPEC_VUPKHU_V4SF 326)
(UNSPEC_VUPKLU_V4SF 327)
- (UNSPEC_VNMSUBFP 328)
])
(define_constants
@@ -513,12 +512,39 @@ (define_insn "*altivec_vsel<mode>_uns"
"vsel %0,%3,%2,%1"
[(set_attr "type" "vecperm")])
-;; Fused multiply add
-(define_insn "altivec_vmaddfp"
+;; Fused multiply add. By default expand the FMA into (plus (mult)) to help
+;; loop unrolling. Don't do negate multiply ops, because of complications with
+;; honoring signed zero and fused-madd.
+
+(define_expand "altivec_vmaddfp"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "register_operand" ""))
+ (match_operand:V4SF 3 "register_operand" "")))]
+ "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+{
+ if (!TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_altivec_vmaddfp_2 (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*altivec_vmaddfp_1"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v")
(match_operand:V4SF 2 "register_operand" "v"))
(match_operand:V4SF 3 "register_operand" "v")))]
+ "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD"
+ "vmaddfp %0,%1,%2,%3"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vmaddfp_2"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
+ (match_operand:V4SF 2 "register_operand" "v")
+ (match_operand:V4SF 3 "register_operand" "v")))]
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
"vmaddfp %0,%1,%2,%3"
[(set_attr "type" "vecfloat")])
@@ -529,7 +555,7 @@ (define_expand "altivec_mulv4sf3"
[(use (match_operand:V4SF 0 "register_operand" ""))
(use (match_operand:V4SF 1 "register_operand" ""))
(use (match_operand:V4SF 2 "register_operand" ""))]
- "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD"
+ "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
"
{
rtx neg0;
@@ -627,34 +653,18 @@ (define_expand "mulv8hi3"
}")
;; Fused multiply subtract
-(define_expand "altivec_vnmsubfp"
- [(match_operand:V4SF 0 "register_operand" "")
- (match_operand:V4SF 1 "register_operand" "")
- (match_operand:V4SF 2 "register_operand" "")
- (match_operand:V4SF 3 "register_operand" "")]
+(define_insn "altivec_vnmsubfp"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (neg:V4SF
+ (fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
+ (match_operand:V4SF 2 "register_operand" "v")
+ (neg:V4SF
+ (match_operand:V4SF 3 "register_operand" "v")))))]
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
-{
- if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (SFmode))
- {
- emit_insn (gen_altivec_vnmsubfp_1 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
- else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
- {
- emit_insn (gen_altivec_vnmsubfp_2 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
- else
- {
- emit_insn (gen_altivec_vnmsubfp_3 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
-})
+ "vnmsubfp %0,%1,%2,%3"
+ [(set_attr "type" "vecfloat")])
-(define_insn "altivec_vnmsubfp_1"
+(define_insn "*altivec_vnmsubfp_1"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(neg:V4SF
(minus:V4SF
@@ -667,7 +677,7 @@ (define_insn "altivec_vnmsubfp_1"
"vnmsubfp %0,%1,%2,%3"
[(set_attr "type" "vecfloat")])
-(define_insn "altivec_vnmsubfp_2"
+(define_insn "*altivec_vnmsubfp_2"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(minus:V4SF
(match_operand:V4SF 3 "register_operand" "v")
@@ -679,16 +689,6 @@ (define_insn "altivec_vnmsubfp_2"
"vnmsubfp %0,%1,%2,%3"
[(set_attr "type" "vecfloat")])
-(define_insn "altivec_vnmsubfp_3"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")
- (match_operand:V4SF 2 "register_operand" "v")
- (match_operand:V4SF 3 "register_operand" "v")]
- UNSPEC_VNMSUBFP))]
- "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
- "vnmsubfp %0,%1,%2,%3"
- [(set_attr "type" "vecfloat")])
-
(define_insn "altivec_vmsumu<VI_char>m"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
===================================================================
@@ -5844,6 +5844,78 @@ (define_insn "fres"
"fres %0,%1"
[(set_attr "type" "fp")])
+; __builtin_fmas support
+; If the user explicitly uses the fma builtin, don't convert this to
+; (plus (mult op1 op2) op3)
+(define_expand "fmasf4"
+ [(set (match_operand:SF 0 "gpc_reg_operand" "")
+ (fma:SF (match_operand:SF 1 "gpc_reg_operand" "")
+ (match_operand:SF 2 "gpc_reg_operand" "")
+ (match_operand:SF 3 "gpc_reg_operand" "")))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+ "")
+
+(define_insn "fmasf4_fpr"
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+ (match_operand:SF 2 "gpc_reg_operand" "f")
+ (match_operand:SF 3 "gpc_reg_operand" "f")))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+ "*
+{
+ return ((TARGET_POWERPC)
+ ? \"fmadds %0,%1,%2,%3\"
+ : \"{fma|fmadd} %0,%1,%2,%3\");
+}"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fmssf4_fpr"
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+ (match_operand:SF 2 "gpc_reg_operand" "f")
+ (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+ "*
+{
+ return ((TARGET_POWERPC)
+ ? \"fmsubs %0,%1,%2,%3\"
+ : \"{fms|fmsub} %0,%1,%2,%3\");
+}"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmasf4_fpr"
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+ (match_operand:SF 2 "gpc_reg_operand" "f")
+ (match_operand:SF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+ "*
+{
+ return ((TARGET_POWERPC)
+ ? \"fnmadds %0,%1,%2,%3\"
+ : \"{fnma|fnmadd} %0,%1,%2,%3\");
+}"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmssf4_fpr"
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+ (match_operand:SF 2 "gpc_reg_operand" "f")
+ (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+ "*
+{
+ return ((TARGET_POWERPC)
+ ? \"fnmsubs %0,%1,%2,%3\"
+ : \"{fnms|fnmsub} %0,%1,%2,%3\");
+}"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_s")])
+
+; Fused multiply/add ops created by the combiner
(define_insn "*fmaddsf4_powerpc"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
@@ -5855,6 +5927,7 @@ (define_insn "*fmaddsf4_powerpc"
[(set_attr "type" "fp")
(set_attr "fp_type" "fp_maddsub_s")])
+
(define_insn "*fmaddsf4_power"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
@@ -6280,6 +6353,62 @@ (define_insn "*rsqrtdf_internal1"
"frsqrte %0,%1"
[(set_attr "type" "fp")])
+; __builtin_fma support
+; If the user explicitly uses the fma builtin, don't convert this to
+; (plus (mult op1 op2) op3)
+(define_expand "fmadf4"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "")
+ (fma:DF (match_operand:DF 1 "gpc_reg_operand" "")
+ (match_operand:DF 2 "gpc_reg_operand" "")
+ (match_operand:DF 3 "gpc_reg_operand" "")))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+ "")
+
+(define_insn "fmadf4_fpr"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+ (match_operand:DF 2 "gpc_reg_operand" "f")
+ (match_operand:DF 3 "gpc_reg_operand" "f")))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && VECTOR_UNIT_NONE_P (DFmode)"
+ "{fma|fmadd} %0,%1,%2,%3"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fmsdf4_fpr"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+ (match_operand:DF 2 "gpc_reg_operand" "f")
+ (neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && VECTOR_UNIT_NONE_P (DFmode)"
+ "{fms|fmsub} %0,%1,%2,%3"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmadf4_fpr"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+ (match_operand:DF 2 "gpc_reg_operand" "f")
+ (match_operand:DF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && VECTOR_UNIT_NONE_P (DFmode)"
+ "{fnma|fnmadd} %0,%1,%2,%3"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmsdf4_fpr"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+ (match_operand:DF 2 "gpc_reg_operand" "f")
+ (neg:DF (match_operand:DF 3 "gpc_reg_operand" "f")))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+ && VECTOR_UNIT_NONE_P (DFmode)"
+ "{fnms|fnmsub} %0,%1,%2,%3"
+ [(set_attr "type" "fp")
+ (set_attr "fp_type" "fp_maddsub_s")])
+
+; Fused multiply/add ops created by the combiner
(define_insn "*fmadddf4_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
===================================================================
@@ -3895,6 +3895,22 @@ rs6000_builtin_vectorized_function (tree
if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
break;
+ case BUILT_IN_FMA:
+ if (VECTOR_UNIT_VSX_P (V2DFmode)
+ && out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
+ break;
+ case BUILT_IN_FMAF:
+ if (VECTOR_UNIT_VSX_P (V4SFmode)
+ && out_mode == SFmode && out_n == 4
+ && in_mode == SFmode && in_n == 4)
+ return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
+ else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+ && out_mode == SFmode && out_n == 4
+ && in_mode == SFmode && in_n == 4)
+ return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
+ break;
case BUILT_IN_TRUNC:
if (VECTOR_UNIT_VSX_P (V2DFmode)
&& out_mode == DFmode && out_n == 2
===================================================================
@@ -190,6 +190,8 @@ enum optab_index
OTI_pow,
/* Arc tangent of y/x */
OTI_atan2,
+ /* Floating multiply/add */
+ OTI_fma,
/* Move instruction. */
OTI_mov,
@@ -432,6 +434,7 @@ enum optab_index
#define umax_optab (&optab_table[OTI_umax])
#define pow_optab (&optab_table[OTI_pow])
#define atan2_optab (&optab_table[OTI_atan2])
+#define fma_optab (&optab_table[OTI_fma])
#define mov_optab (&optab_table[OTI_mov])
#define movstrict_optab (&optab_table[OTI_movstrict])
===================================================================
@@ -159,6 +159,7 @@ static const char * const optabs[] =
"set_optab_handler (sqrt_optab, $A, CODE_FOR_$(sqrt$a2$))",
"set_optab_handler (floor_optab, $A, CODE_FOR_$(floor$a2$))",
"set_convert_optab_handler (lfloor_optab, $B, $A, CODE_FOR_$(lfloor$F$a$I$b2$))",
+ "set_optab_handler (fma_optab, $A, CODE_FOR_$(fma$a4$))",
"set_optab_handler (ceil_optab, $A, CODE_FOR_$(ceil$a2$))",
"set_convert_optab_handler (lceil_optab, $B, $A, CODE_FOR_$(lceil$F$a$I$b2$))",
"set_optab_handler (round_optab, $A, CODE_FOR_$(round$a2$))",
===================================================================
@@ -106,6 +106,7 @@ static void expand_errno_check (tree, rt
static rtx expand_builtin_mathfn (tree, rtx, rtx);
static rtx expand_builtin_mathfn_2 (tree, rtx, rtx);
static rtx expand_builtin_mathfn_3 (tree, rtx, rtx);
+static rtx expand_builtin_mathfn_ternary (tree, rtx, rtx);
static rtx expand_builtin_interclass_mathfn (tree, rtx);
static rtx expand_builtin_sincos (tree);
static rtx expand_builtin_cexpi (tree, rtx);
@@ -2185,6 +2186,79 @@ expand_builtin_mathfn_2 (tree exp, rtx t
return target;
}
+/* Expand a call to the builtin trinary math functions (fma).
+ Return NULL_RTX if a normal call should be emitted rather than expanding the
+ function in-line. EXP is the expression that is a call to the builtin
+ function; if convenient, the result should be placed in TARGET.
+ SUBTARGET may be used as the target for computing one of EXP's
+ operands. */
+
+static rtx
+expand_builtin_mathfn_ternary (tree exp, rtx target, rtx subtarget)
+{
+ optab builtin_optab;
+ rtx op0, op1, op2, insns;
+ tree fndecl = get_callee_fndecl (exp);
+ tree arg0, arg1, arg2;
+ enum machine_mode mode;
+
+ if (!validate_arglist (exp, REAL_TYPE, REAL_TYPE, REAL_TYPE, VOID_TYPE))
+ return NULL_RTX;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ CASE_FLT_FN (BUILT_IN_FMA):
+ builtin_optab = fma_optab; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Make a suitable register to place result in. */
+ mode = TYPE_MODE (TREE_TYPE (exp));
+
+ /* Before working hard, check whether the instruction is available. */
+ if (optab_handler (builtin_optab, mode) == CODE_FOR_nothing)
+ return NULL_RTX;
+
+ target = gen_reg_rtx (mode);
+
+ /* Always stabilize the argument list. */
+ CALL_EXPR_ARG (exp, 0) = arg0 = builtin_save_expr (arg0);
+ CALL_EXPR_ARG (exp, 1) = arg1 = builtin_save_expr (arg1);
+ CALL_EXPR_ARG (exp, 2) = arg2 = builtin_save_expr (arg2);
+
+ op0 = expand_expr (arg0, subtarget, VOIDmode, EXPAND_NORMAL);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ start_sequence ();
+
+ /* Compute into TARGET.
+ Set TARGET to wherever the result comes back. */
+ target = expand_ternary_op (mode, builtin_optab, op0, op1, op2,
+ target, 0);
+
+ /* If we were unable to expand via the builtin, stop the sequence
+ (without outputting the insns) and call to the library function
+ with the stabilized argument list. */
+ if (target == 0)
+ {
+ end_sequence ();
+ return expand_call (exp, target, target == const0_rtx);
+ }
+
+ /* Output the entire sequence. */
+ insns = get_insns ();
+ end_sequence ();
+ emit_insn (insns);
+
+ return target;
+}
+
/* Expand a call to the builtin sin and cos math functions.
Return NULL_RTX if a normal call should be emitted rather than expanding the
function in-line. EXP is the expression that is a call to the builtin
@@ -5829,6 +5903,12 @@ expand_builtin (tree exp, rtx target, rt
return target;
break;
+ CASE_FLT_FN (BUILT_IN_FMA):
+ target = expand_builtin_mathfn_ternary (exp, target, subtarget);
+ if (target)
+ return target;
+ break;
+
CASE_FLT_FN (BUILT_IN_ILOGB):
if (! flag_unsafe_math_optimizations)
break;
===================================================================
@@ -1357,10 +1357,10 @@ vectorizable_call (gimple stmt, gimple_s
vectype_in = NULL_TREE;
nargs = gimple_call_num_args (stmt);
- /* Bail out if the function has more than two arguments, we
- do not have interesting builtin functions to vectorize with
- more than two arguments. No arguments is also not good. */
- if (nargs == 0 || nargs > 2)
+ /* Bail out if the function has more than three arguments, we do not have
+ interesting builtin functions to vectorize with more than two arguments
+ except for fma. No arguments is also not good. */
+ if (nargs == 0 || nargs > 3)
return false;
for (i = 0; i < nargs; i++)