@@ -372,11 +372,11 @@ powerpc*-*-*)
cpu_is_64bit=yes
;;
esac
- extra_options="${extra_options} g.opt"
+ extra_options="${extra_options} g.opt fused-madd.opt"
;;
rs6000*-*-*)
need_64bit_hwint=yes
- extra_options="${extra_options} g.opt"
+ extra_options="${extra_options} g.opt fused-madd.opt"
;;
score*-*-*)
cpu_type=score
@@ -512,35 +512,9 @@
"vsel %0,%3,%2,%1"
[(set_attr "type" "vecperm")])
-;; Fused multiply add. By default expand the FMA into (plus (mult)) to help
-;; loop unrolling. Don't do negate multiply ops, because of complications with
-;; honoring signed zero and fused-madd.
+;; Fused multiply add.
-(define_expand "altivec_vmaddfp"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "")
- (match_operand:V4SF 2 "register_operand" ""))
- (match_operand:V4SF 3 "register_operand" "")))]
- "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
-{
- if (!TARGET_FUSED_MADD)
- {
- emit_insn (gen_altivec_vmaddfp_2 (operands[0], operands[1], operands[2],
- operands[3]));
- DONE;
- }
-})
-
-(define_insn "*altivec_vmaddfp_1"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v")
- (match_operand:V4SF 2 "register_operand" "v"))
- (match_operand:V4SF 3 "register_operand" "v")))]
- "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD"
- "vmaddfp %0,%1,%2,%3"
- [(set_attr "type" "vecfloat")])
-
-(define_insn "altivec_vmaddfp_2"
+(define_insn "*altivec_fmav4sf4"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
(match_operand:V4SF 2 "register_operand" "v")
@@ -552,24 +526,19 @@
;; We do multiply as a fused multiply-add with an add of a -0.0 vector.
(define_expand "altivec_mulv4sf3"
- [(use (match_operand:V4SF 0 "register_operand" ""))
- (use (match_operand:V4SF 1 "register_operand" ""))
- (use (match_operand:V4SF 2 "register_operand" ""))]
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (fma:V4SF (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "register_operand" "")
+ (match_dup 3)))]
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
- "
{
rtx neg0;
/* Generate [-0.0, -0.0, -0.0, -0.0]. */
- neg0 = gen_reg_rtx (V4SImode);
+ operands[3] = neg0 = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx));
emit_insn (gen_vashlv4si3 (neg0, neg0, neg0));
-
- /* Use the multiply-add. */
- emit_insn (gen_altivec_vmaddfp (operands[0], operands[1], operands[2],
- gen_lowpart (V4SFmode, neg0)));
- DONE;
-}")
+})
;; 32-bit integer multiplication
;; A_high = Operand_0 & 0xFFFF0000 >> 16
@@ -653,7 +622,7 @@
}")
;; Fused multiply subtract
-(define_insn "altivec_vnmsubfp"
+(define_insn "*altivec_vnmsubfp"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(neg:V4SF
(fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
@@ -664,31 +633,6 @@
"vnmsubfp %0,%1,%2,%3"
[(set_attr "type" "vecfloat")])
-(define_insn "*altivec_vnmsubfp_1"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (neg:V4SF
- (minus:V4SF
- (mult:V4SF
- (match_operand:V4SF 1 "register_operand" "v")
- (match_operand:V4SF 2 "register_operand" "v"))
- (match_operand:V4SF 3 "register_operand" "v"))))]
- "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD
- && HONOR_SIGNED_ZEROS (SFmode)"
- "vnmsubfp %0,%1,%2,%3"
- [(set_attr "type" "vecfloat")])
-
-(define_insn "*altivec_vnmsubfp_2"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (minus:V4SF
- (match_operand:V4SF 3 "register_operand" "v")
- (mult:V4SF
- (match_operand:V4SF 1 "register_operand" "v")
- (match_operand:V4SF 2 "register_operand" "v"))))]
- "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD
- && !HONOR_SIGNED_ZEROS (SFmode)"
- "vnmsubfp %0,%1,%2,%3"
- [(set_attr "type" "vecfloat")])
-
(define_insn "altivec_vmsumu<VI_char>m"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
@@ -96,77 +96,85 @@
(define_insn "paired_madds0"
[(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
- (vec_concat:V2SF
- (plus:SF (mult:SF (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
- (parallel [(const_int 0)]))
- (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
- (parallel [(const_int 0)])))
- (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f")
- (parallel [(const_int 0)])))
- (plus:SF (mult:SF (vec_select:SF (match_dup 1)
- (parallel [(const_int 1)]))
- (vec_select:SF (match_dup 2)
- (parallel [(const_int 0)])))
- (vec_select:SF (match_dup 3)
- (parallel [(const_int 1)])))))]
- "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD"
+ (vec_concat:V2SF
+ (fma:SF
+ (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f")
+ (parallel [(const_int 0)])))
+ (fma:SF
+ (vec_select:SF (match_dup 1)
+ (parallel [(const_int 1)]))
+ (vec_select:SF (match_dup 2)
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 3)
+ (parallel [(const_int 1)])))))]
+ "TARGET_PAIRED_FLOAT"
"ps_madds0 %0,%1,%2,%3"
[(set_attr "type" "fp")])
(define_insn "paired_madds1"
[(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
- (vec_concat:V2SF
- (plus:SF (mult:SF (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
- (parallel [(const_int 0)]))
- (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
- (parallel [(const_int 1)])))
- (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f")
- (parallel [(const_int 0)])))
- (plus:SF (mult:SF (vec_select:SF (match_dup 1)
- (parallel [(const_int 1)]))
- (vec_select:SF (match_dup 2)
- (parallel [(const_int 1)])))
- (vec_select:SF (match_dup 3)
- (parallel [(const_int 1)])))))]
- "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD"
+ (vec_concat:V2SF
+ (fma:SF
+ (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+ (parallel [(const_int 1)]))
+ (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f")
+ (parallel [(const_int 0)])))
+ (fma:SF
+ (vec_select:SF (match_dup 1)
+ (parallel [(const_int 1)]))
+ (vec_select:SF (match_dup 2)
+ (parallel [(const_int 1)]))
+ (vec_select:SF (match_dup 3)
+ (parallel [(const_int 1)])))))]
+ "TARGET_PAIRED_FLOAT"
"ps_madds1 %0,%1,%2,%3"
[(set_attr "type" "fp")])
-(define_insn "paired_madd"
+(define_insn "*paired_madd"
[(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
- (plus:V2SF (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f")
- (match_operand:V2SF 2 "gpc_reg_operand" "f"))
- (match_operand:V2SF 3 "gpc_reg_operand" "f")))]
- "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD"
+ (fma:V2SF
+ (match_operand:V2SF 1 "gpc_reg_operand" "f")
+ (match_operand:V2SF 2 "gpc_reg_operand" "f")
+ (match_operand:V2SF 3 "gpc_reg_operand" "f")))]
+ "TARGET_PAIRED_FLOAT"
"ps_madd %0,%1,%2,%3"
[(set_attr "type" "fp")])
-(define_insn "paired_msub"
+(define_insn "*paired_msub"
[(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
- (minus:V2SF (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f")
- (match_operand:V2SF 2 "gpc_reg_operand" "f"))
- (match_operand:V2SF 3 "gpc_reg_operand" "f")))]
- "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD"
+ (fma:V2SF
+ (match_operand:V2SF 1 "gpc_reg_operand" "f")
+ (match_operand:V2SF 2 "gpc_reg_operand" "f")
+ (neg:V2SF (match_operand:V2SF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_PAIRED_FLOAT"
"ps_msub %0,%1,%2,%3"
[(set_attr "type" "fp")])
-(define_insn "paired_nmadd"
+(define_insn "*paired_nmadd"
[(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
- (neg:V2SF (plus:V2SF (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f")
- (match_operand:V2SF 2 "gpc_reg_operand" "f"))
- (match_operand:V2SF 3 "gpc_reg_operand" "f"))))]
- "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD
- && HONOR_SIGNED_ZEROS (SFmode)"
+ (neg:V2SF
+ (fma:V2SF
+ (match_operand:V2SF 1 "gpc_reg_operand" "f")
+ (match_operand:V2SF 2 "gpc_reg_operand" "f")
+ (match_operand:V2SF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_PAIRED_FLOAT"
"ps_nmadd %0,%1,%2,%3"
[(set_attr "type" "fp")])
-(define_insn "paired_nmsub"
+(define_insn "*paired_nmsub"
[(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
- (neg:V2SF (minus:V2SF (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f")
- (match_operand:V2SF 2 "gpc_reg_operand" "f"))
- (match_operand:V2SF 3 "gpc_reg_operand" "f"))))]
- "TARGET_PAIRED_FLOAT && TARGET_FUSED_MADD
- && HONOR_SIGNED_ZEROS (DFmode)"
+ (neg:V2SF
+ (fma:V2SF
+ (match_operand:V2SF 1 "gpc_reg_operand" "f")
+ (match_operand:V2SF 2 "gpc_reg_operand" "f")
+ (neg:V2SF (match_operand:V2SF 3 "gpc_reg_operand" "f")))))]
+ "TARGET_PAIRED_FLOAT"
"ps_nmsub %0,%1,%2,%3"
[(set_attr "type" "dmul")])
@@ -2284,16 +2284,13 @@ rs6000_init_hard_regno_mode_ok (void)
if (rs6000_recip_control)
{
- if (!TARGET_FUSED_MADD)
- warning (0, "-mrecip requires -mfused-madd");
if (!flag_finite_math_only)
warning (0, "-mrecip requires -ffinite-math or -ffast-math");
if (flag_trapping_math)
warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
if (!flag_reciprocal_math)
warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
- if (TARGET_FUSED_MADD && flag_finite_math_only && !flag_trapping_math
- && flag_reciprocal_math)
+ if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
{
if (RS6000_RECIP_HAVE_RE_P (SFmode)
&& (rs6000_recip_control & RECIP_SF_DIV) != 0)
@@ -9684,7 +9681,7 @@ def_builtin (int mask, const char *name, tree type, int code)
static const struct builtin_description bdesc_3arg[] =
{
- { MASK_ALTIVEC, CODE_FOR_altivec_vmaddfp, "__builtin_altivec_vmaddfp", ALTIVEC_BUILTIN_VMADDFP },
+ { MASK_ALTIVEC, CODE_FOR_fmav4sf4, "__builtin_altivec_vmaddfp", ALTIVEC_BUILTIN_VMADDFP },
{ MASK_ALTIVEC, CODE_FOR_altivec_vmhaddshs, "__builtin_altivec_vmhaddshs", ALTIVEC_BUILTIN_VMHADDSHS },
{ MASK_ALTIVEC, CODE_FOR_altivec_vmhraddshs, "__builtin_altivec_vmhraddshs", ALTIVEC_BUILTIN_VMHRADDSHS },
{ MASK_ALTIVEC, CODE_FOR_altivec_vmladduhm, "__builtin_altivec_vmladduhm", ALTIVEC_BUILTIN_VMLADDUHM},
@@ -9694,7 +9691,7 @@ static const struct builtin_description bdesc_3arg[] =
{ MASK_ALTIVEC, CODE_FOR_altivec_vmsumshm, "__builtin_altivec_vmsumshm", ALTIVEC_BUILTIN_VMSUMSHM },
{ MASK_ALTIVEC, CODE_FOR_altivec_vmsumuhs, "__builtin_altivec_vmsumuhs", ALTIVEC_BUILTIN_VMSUMUHS },
{ MASK_ALTIVEC, CODE_FOR_altivec_vmsumshs, "__builtin_altivec_vmsumshs", ALTIVEC_BUILTIN_VMSUMSHS },
- { MASK_ALTIVEC, CODE_FOR_altivec_vnmsubfp, "__builtin_altivec_vnmsubfp", ALTIVEC_BUILTIN_VNMSUBFP },
+ { MASK_ALTIVEC, CODE_FOR_nfmsv4sf4, "__builtin_altivec_vnmsubfp", ALTIVEC_BUILTIN_VNMSUBFP },
{ MASK_ALTIVEC, CODE_FOR_altivec_vperm_v2df, "__builtin_altivec_vperm_2df", ALTIVEC_BUILTIN_VPERM_2DF },
{ MASK_ALTIVEC, CODE_FOR_altivec_vperm_v2di, "__builtin_altivec_vperm_2di", ALTIVEC_BUILTIN_VPERM_2DI },
{ MASK_ALTIVEC, CODE_FOR_altivec_vperm_v4sf, "__builtin_altivec_vperm_4sf", ALTIVEC_BUILTIN_VPERM_4SF },
@@ -9736,15 +9733,15 @@ static const struct builtin_description bdesc_3arg[] =
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_perm", ALTIVEC_BUILTIN_VEC_PERM },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sel", ALTIVEC_BUILTIN_VEC_SEL },
- { MASK_VSX, CODE_FOR_vsx_fmaddv2df4, "__builtin_vsx_xvmadddp", VSX_BUILTIN_XVMADDDP },
- { MASK_VSX, CODE_FOR_vsx_fmsubv2df4, "__builtin_vsx_xvmsubdp", VSX_BUILTIN_XVMSUBDP },
- { MASK_VSX, CODE_FOR_vsx_fnmaddv2df4, "__builtin_vsx_xvnmadddp", VSX_BUILTIN_XVNMADDDP },
- { MASK_VSX, CODE_FOR_vsx_fnmsubv2df4, "__builtin_vsx_xvnmsubdp", VSX_BUILTIN_XVNMSUBDP },
+ { MASK_VSX, CODE_FOR_fmav2df4, "__builtin_vsx_xvmadddp", VSX_BUILTIN_XVMADDDP },
+ { MASK_VSX, CODE_FOR_fmsv2df4, "__builtin_vsx_xvmsubdp", VSX_BUILTIN_XVMSUBDP },
+ { MASK_VSX, CODE_FOR_nfmav2df4, "__builtin_vsx_xvnmadddp", VSX_BUILTIN_XVNMADDDP },
+ { MASK_VSX, CODE_FOR_nfmsv2df4, "__builtin_vsx_xvnmsubdp", VSX_BUILTIN_XVNMSUBDP },
- { MASK_VSX, CODE_FOR_vsx_fmaddv4sf4, "__builtin_vsx_xvmaddsp", VSX_BUILTIN_XVMADDSP },
- { MASK_VSX, CODE_FOR_vsx_fmsubv4sf4, "__builtin_vsx_xvmsubsp", VSX_BUILTIN_XVMSUBSP },
- { MASK_VSX, CODE_FOR_vsx_fnmaddv4sf4, "__builtin_vsx_xvnmaddsp", VSX_BUILTIN_XVNMADDSP },
- { MASK_VSX, CODE_FOR_vsx_fnmsubv4sf4, "__builtin_vsx_xvnmsubsp", VSX_BUILTIN_XVNMSUBSP },
+ { MASK_VSX, CODE_FOR_fmav4sf4, "__builtin_vsx_xvmaddsp", VSX_BUILTIN_XVMADDSP },
+ { MASK_VSX, CODE_FOR_fmsv4sf4, "__builtin_vsx_xvmsubsp", VSX_BUILTIN_XVMSUBSP },
+ { MASK_VSX, CODE_FOR_nfmav4sf4, "__builtin_vsx_xvnmaddsp", VSX_BUILTIN_XVNMADDSP },
+ { MASK_VSX, CODE_FOR_nfmsv4sf4, "__builtin_vsx_xvnmsubsp", VSX_BUILTIN_XVNMSUBSP },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_msub", VSX_BUILTIN_VEC_MSUB },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_nmadd", VSX_BUILTIN_VEC_NMADD },
@@ -9789,12 +9786,12 @@ static const struct builtin_description bdesc_3arg[] =
{ MASK_VSX, CODE_FOR_vsx_xxsldwi_v16qi, "__builtin_vsx_xxsldwi_16qi", VSX_BUILTIN_XXSLDWI_16QI },
{ MASK_VSX, CODE_FOR_nothing, "__builtin_vsx_xxsldwi", VSX_BUILTIN_VEC_XXSLDWI },
- { 0, CODE_FOR_paired_msub, "__builtin_paired_msub", PAIRED_BUILTIN_MSUB },
- { 0, CODE_FOR_paired_madd, "__builtin_paired_madd", PAIRED_BUILTIN_MADD },
+ { 0, CODE_FOR_fmsv2sf4, "__builtin_paired_msub", PAIRED_BUILTIN_MSUB },
+ { 0, CODE_FOR_fmav2sf4, "__builtin_paired_madd", PAIRED_BUILTIN_MADD },
{ 0, CODE_FOR_paired_madds0, "__builtin_paired_madds0", PAIRED_BUILTIN_MADDS0 },
{ 0, CODE_FOR_paired_madds1, "__builtin_paired_madds1", PAIRED_BUILTIN_MADDS1 },
- { 0, CODE_FOR_paired_nmsub, "__builtin_paired_nmsub", PAIRED_BUILTIN_NMSUB },
- { 0, CODE_FOR_paired_nmadd, "__builtin_paired_nmadd", PAIRED_BUILTIN_NMADD },
+ { 0, CODE_FOR_nfmsv2sf4, "__builtin_paired_nmsub", PAIRED_BUILTIN_NMSUB },
+ { 0, CODE_FOR_nfmav2sf4, "__builtin_paired_nmadd", PAIRED_BUILTIN_NMADD },
{ 0, CODE_FOR_paired_sum0, "__builtin_paired_sum0", PAIRED_BUILTIN_SUM0 },
{ 0, CODE_FOR_paired_sum1, "__builtin_paired_sum1", PAIRED_BUILTIN_SUM1 },
{ 0, CODE_FOR_selv2sf4, "__builtin_paired_selv2sf4", PAIRED_BUILTIN_SELV2SF4 },
@@ -26390,112 +26387,65 @@ rs6000_load_constant_and_splat (enum machine_mode mode, REAL_VALUE_TYPE dconst)
return reg;
}
-/* Generate a FMADD instruction:
- dst = (m1 * m2) + a
-
- generating different RTL based on the fused multiply/add switch. */
+/* Generate an FMA instruction. */
static void
-rs6000_emit_madd (rtx dst, rtx m1, rtx m2, rtx a)
+rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
{
- enum machine_mode mode = GET_MODE (dst);
-
- if (!TARGET_FUSED_MADD)
- {
- /* For the simple ops, use the generator function, rather than assuming
- that the RTL is standard. */
- enum insn_code mcode = optab_handler (smul_optab, mode);
- enum insn_code acode = optab_handler (add_optab, mode);
- gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (mcode);
- gen_2arg_fn_t gen_add = (gen_2arg_fn_t) GEN_FCN (acode);
- rtx mreg = gen_reg_rtx (mode);
+ enum machine_mode mode = GET_MODE (target);
+ rtx dst;
- gcc_assert (mcode != CODE_FOR_nothing && acode != CODE_FOR_nothing);
- emit_insn (gen_mul (mreg, m1, m2));
- emit_insn (gen_add (dst, mreg, a));
- }
+ dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
+ gcc_assert (dst != NULL);
- else
- emit_insn (gen_rtx_SET (VOIDmode, dst,
- gen_rtx_PLUS (mode,
- gen_rtx_MULT (mode, m1, m2),
- a)));
+ if (dst != target)
+ emit_move_insn (target, dst);
}
-/* Generate a FMSUB instruction:
- dst = (m1 * m2) - a
-
- generating different RTL based on the fused multiply/add switch. */
+/* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
static void
-rs6000_emit_msub (rtx dst, rtx m1, rtx m2, rtx a)
+rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
{
- enum machine_mode mode = GET_MODE (dst);
+ enum machine_mode mode = GET_MODE (target);
+ rtx dst;
- if (!TARGET_FUSED_MADD
- || (mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (V4SFmode)))
+ /* Altivec does not support fms directly;
+ generate in terms of fma in that case. */
+ if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
+ dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
+ else
{
- /* For the simple ops, use the generator function, rather than assuming
- that the RTL is standard. */
- enum insn_code mcode = optab_handler (smul_optab, mode);
- enum insn_code scode = optab_handler (add_optab, mode);
- gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (mcode);
- gen_2arg_fn_t gen_sub = (gen_2arg_fn_t) GEN_FCN (scode);
- rtx mreg = gen_reg_rtx (mode);
-
- gcc_assert (mcode != CODE_FOR_nothing && scode != CODE_FOR_nothing);
- emit_insn (gen_mul (mreg, m1, m2));
- emit_insn (gen_sub (dst, mreg, a));
+ a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
+ dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
}
+ gcc_assert (dst != NULL);
- else
- emit_insn (gen_rtx_SET (VOIDmode, dst,
- gen_rtx_MINUS (mode,
- gen_rtx_MULT (mode, m1, m2),
- a)));
+ if (dst != target)
+ emit_move_insn (target, dst);
}
-
-/* Generate a FNMSUB instruction:
- dst = - ((m1 * m2) - a)
-
- Which is equivalent to (except in the prescence of -0.0):
- dst = a - (m1 * m2)
-
- generating different RTL based on the fast-math and fused multiply/add
- switches. */
+
+/* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
static void
rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
{
enum machine_mode mode = GET_MODE (dst);
+ rtx r;
- if (!TARGET_FUSED_MADD)
- {
- /* For the simple ops, use the generator function, rather than assuming
- that the RTL is standard. */
- enum insn_code mcode = optab_handler (smul_optab, mode);
- enum insn_code scode = optab_handler (sub_optab, mode);
- gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (mcode);
- gen_2arg_fn_t gen_sub = (gen_2arg_fn_t) GEN_FCN (scode);
- rtx mreg = gen_reg_rtx (mode);
-
- gcc_assert (mcode != CODE_FOR_nothing && scode != CODE_FOR_nothing);
- emit_insn (gen_mul (mreg, m1, m2));
- emit_insn (gen_sub (dst, a, mreg));
- }
+ /* This is a tad more complicated, since the fnma_optab is for
+ a different expression: fma(-m1, m2, a), which is the same
+ thing except in the case of signed zeros.
- else
- {
- rtx m = gen_rtx_MULT (mode, m1, m2);
+ Fortunately we know that if FMA is supported that FNMSUB is
+ also supported in the ISA. Just expand it directly. */
- if (!HONOR_SIGNED_ZEROS (mode))
- emit_insn (gen_rtx_SET (VOIDmode, dst, gen_rtx_MINUS (mode, a, m)));
+ gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
- else
- emit_insn (gen_rtx_SET (VOIDmode, dst,
- gen_rtx_NEG (mode,
- gen_rtx_MINUS (mode, m, a))));
- }
+ r = gen_rtx_NEG (mode, a);
+ r = gen_rtx_FMA (mode, m1, m2, r);
+ r = gen_rtx_NEG (mode, r);
+ emit_insn (gen_rtx_SET (VOIDmode, dst, r));
}
/* Newton-Raphson approximation of floating point divide with just 2 passes
@@ -226,6 +226,16 @@
(DD "TARGET_DFP")
(TD "TARGET_DFP")])
+; Any fma capable floating-point mode.
+(define_mode_iterator FMA_F [
+ (SF "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT")
+ (DF "(TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+ || VECTOR_UNIT_VSX_P (DFmode)")
+ (V2SF "TARGET_PAIRED_FLOAT")
+ (V4SF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)")
+ (V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)")
+ ])
+
; These modes do not fit in integer registers in 32-bit mode.
; but on e500v2, the gpr are 64 bit registers
(define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
@@ -5845,28 +5855,17 @@
[(set_attr "type" "fp")])
; builtin fmaf support
-; If the user explicitly uses the fma builtin, don't convert this to
-; (plus (mult op1 op2) op3)
-(define_expand "fmasf4"
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
- (fma:SF (match_operand:SF 1 "gpc_reg_operand" "")
- (match_operand:SF 2 "gpc_reg_operand" "")
- (match_operand:SF 3 "gpc_reg_operand" "")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
- "")
-
-(define_insn "fmasf4_fpr"
+(define_insn "*fmasf4_fpr"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
(match_operand:SF 2 "gpc_reg_operand" "f")
(match_operand:SF 3 "gpc_reg_operand" "f")))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
- "*
{
- return ((TARGET_POWERPC)
- ? \"fmadds %0,%1,%2,%3\"
- : \"{fma|fmadd} %0,%1,%2,%3\");
-}"
+ return (TARGET_POWERPC
+ ? "fmadds %0,%1,%2,%3"
+ : "{fma|fmadd} %0,%1,%2,%3");
+}
[(set_attr "type" "fp")
(set_attr "fp_type" "fp_maddsub_s")])
@@ -5876,168 +5875,42 @@
(match_operand:SF 2 "gpc_reg_operand" "f")
(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
- "*
{
- return ((TARGET_POWERPC)
- ? \"fmsubs %0,%1,%2,%3\"
- : \"{fms|fmsub} %0,%1,%2,%3\");
-}"
+ return (TARGET_POWERPC
+ ? "fmsubs %0,%1,%2,%3"
+ : "{fms|fmsub} %0,%1,%2,%3");
+}
[(set_attr "type" "fp")
(set_attr "fp_type" "fp_maddsub_s")])
-(define_insn "*fnmasf4_fpr"
+(define_insn "*nfmasf4_fpr"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
(match_operand:SF 2 "gpc_reg_operand" "f")
(match_operand:SF 3 "gpc_reg_operand" "f"))))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
- "*
{
- return ((TARGET_POWERPC)
- ? \"fnmadds %0,%1,%2,%3\"
- : \"{fnma|fnmadd} %0,%1,%2,%3\");
-}"
+ return (TARGET_POWERPC
+ ? "fnmadds %0,%1,%2,%3"
+ : "{fnma|fnmadd} %0,%1,%2,%3");
+}
[(set_attr "type" "fp")
(set_attr "fp_type" "fp_maddsub_s")])
-(define_insn "*fnmssf4_fpr"
+(define_insn "*nfmssf4_fpr"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
(match_operand:SF 2 "gpc_reg_operand" "f")
(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
- "*
{
- return ((TARGET_POWERPC)
- ? \"fnmsubs %0,%1,%2,%3\"
- : \"{fnms|fnmsub} %0,%1,%2,%3\");
-}"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_s")])
-
-; Fused multiply/add ops created by the combiner
-(define_insn "*fmaddsf4_powerpc"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
- (match_operand:SF 2 "gpc_reg_operand" "f"))
- (match_operand:SF 3 "gpc_reg_operand" "f")))]
- "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS
- && TARGET_SINGLE_FLOAT && TARGET_FUSED_MADD"
- "fmadds %0,%1,%2,%3"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_s")])
-
-(define_insn "*fmaddsf4_power"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
- (match_operand:SF 2 "gpc_reg_operand" "f"))
- (match_operand:SF 3 "gpc_reg_operand" "f")))]
- "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
- "{fma|fmadd} %0,%1,%2,%3"
- [(set_attr "type" "dmul")])
-
-(define_insn "*fmsubsf4_powerpc"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
- (match_operand:SF 2 "gpc_reg_operand" "f"))
- (match_operand:SF 3 "gpc_reg_operand" "f")))]
- "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS
- && TARGET_SINGLE_FLOAT && TARGET_FUSED_MADD"
- "fmsubs %0,%1,%2,%3"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_s")])
-
-(define_insn "*fmsubsf4_power"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
- (match_operand:SF 2 "gpc_reg_operand" "f"))
- (match_operand:SF 3 "gpc_reg_operand" "f")))]
- "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
- "{fms|fmsub} %0,%1,%2,%3"
- [(set_attr "type" "dmul")])
-
-(define_insn "*fnmaddsf4_powerpc_1"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (neg:SF (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
- (match_operand:SF 2 "gpc_reg_operand" "f"))
- (match_operand:SF 3 "gpc_reg_operand" "f"))))]
- "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
- && TARGET_SINGLE_FLOAT"
- "fnmadds %0,%1,%2,%3"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_s")])
-
-(define_insn "*fnmaddsf4_powerpc_2"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (minus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f"))
- (match_operand:SF 2 "gpc_reg_operand" "f"))
- (match_operand:SF 3 "gpc_reg_operand" "f")))]
- "TARGET_POWERPC && TARGET_SINGLE_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
- && ! HONOR_SIGNED_ZEROS (SFmode)"
- "fnmadds %0,%1,%2,%3"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_s")])
-
-(define_insn "*fnmaddsf4_power_1"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (neg:SF (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
- (match_operand:SF 2 "gpc_reg_operand" "f"))
- (match_operand:SF 3 "gpc_reg_operand" "f"))))]
- "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
- "{fnma|fnmadd} %0,%1,%2,%3"
- [(set_attr "type" "dmul")])
-
-(define_insn "*fnmaddsf4_power_2"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (minus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f"))
- (match_operand:SF 2 "gpc_reg_operand" "f"))
- (match_operand:SF 3 "gpc_reg_operand" "f")))]
- "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
- && ! HONOR_SIGNED_ZEROS (SFmode)"
- "{fnma|fnmadd} %0,%1,%2,%3"
- [(set_attr "type" "dmul")])
-
-(define_insn "*fnmsubsf4_powerpc_1"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (neg:SF (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
- (match_operand:SF 2 "gpc_reg_operand" "f"))
- (match_operand:SF 3 "gpc_reg_operand" "f"))))]
- "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
- && TARGET_SINGLE_FLOAT"
- "fnmsubs %0,%1,%2,%3"
- [(set_attr "type" "fp")
- (set_attr "fp_type" "fp_maddsub_s")])
-
-(define_insn "*fnmsubsf4_powerpc_2"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (minus:SF (match_operand:SF 3 "gpc_reg_operand" "f")
- (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
- (match_operand:SF 2 "gpc_reg_operand" "f"))))]
- "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
- && TARGET_SINGLE_FLOAT && ! HONOR_SIGNED_ZEROS (SFmode)"
- "fnmsubs %0,%1,%2,%3"
+ return (TARGET_POWERPC
+ ? "fnmsubs %0,%1,%2,%3"
+ : "{fnms|fnmsub} %0,%1,%2,%3");
+}
[(set_attr "type" "fp")
(set_attr "fp_type" "fp_maddsub_s")])
-(define_insn "*fnmsubsf4_power_1"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (neg:SF (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
- (match_operand:SF 2 "gpc_reg_operand" "f"))
- (match_operand:SF 3 "gpc_reg_operand" "f"))))]
- "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
- "{fnms|fnmsub} %0,%1,%2,%3"
- [(set_attr "type" "dmul")])
-
-(define_insn "*fnmsubsf4_power_2"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (minus:SF (match_operand:SF 3 "gpc_reg_operand" "f")
- (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
- (match_operand:SF 2 "gpc_reg_operand" "f"))))]
- "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
- && ! HONOR_SIGNED_ZEROS (SFmode)"
- "{fnms|fnmsub} %0,%1,%2,%3"
- [(set_attr "type" "dmul")])
-
(define_expand "sqrtsf2"
[(set (match_operand:SF 0 "gpc_reg_operand" "")
(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
@@ -6385,17 +6258,7 @@
[(set_attr "type" "fp")])
; builtin fma support
-; If the user explicitly uses the fma builtin, don't convert this to
-; (plus (mult op1 op2) op3)
-(define_expand "fmadf4"
- [(set (match_operand:DF 0 "gpc_reg_operand" "")
- (fma:DF (match_operand:DF 1 "gpc_reg_operand" "")
- (match_operand:DF 2 "gpc_reg_operand" "")
- (match_operand:DF 3 "gpc_reg_operand" "")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
- "")
-
-(define_insn "fmadf4_fpr"
+(define_insn "*fmadf4_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
(match_operand:DF 2 "gpc_reg_operand" "f")
@@ -6417,7 +6280,7 @@
[(set_attr "type" "fp")
(set_attr "fp_type" "fp_maddsub_s")])
-(define_insn "*fnmadf4_fpr"
+(define_insn "*nfmadf4_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
(match_operand:DF 2 "gpc_reg_operand" "f")
@@ -6428,7 +6291,7 @@
[(set_attr "type" "fp")
(set_attr "fp_type" "fp_maddsub_s")])
-(define_insn "*fnmsdf4_fpr"
+(define_insn "*nfmsdf4_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
(match_operand:DF 2 "gpc_reg_operand" "f")
@@ -6439,73 +6302,6 @@
[(set_attr "type" "fp")
(set_attr "fp_type" "fp_maddsub_s")])
-; Fused multiply/add ops created by the combiner
-(define_insn "*fmadddf4_fpr"
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
- (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
- (match_operand:DF 2 "gpc_reg_operand" "d"))
- (match_operand:DF 3 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
- && VECTOR_UNIT_NONE_P (DFmode)"
- "{fma|fmadd} %0,%1,%2,%3"
- [(set_attr "type" "dmul")
- (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*fmsubdf4_fpr"
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
- (minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
- (match_operand:DF 2 "gpc_reg_operand" "d"))
- (match_operand:DF 3 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
- && VECTOR_UNIT_NONE_P (DFmode)"
- "{fms|fmsub} %0,%1,%2,%3"
- [(set_attr "type" "dmul")
- (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*fnmadddf4_fpr_1"
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
- (neg:DF (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
- (match_operand:DF 2 "gpc_reg_operand" "d"))
- (match_operand:DF 3 "gpc_reg_operand" "d"))))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
- && VECTOR_UNIT_NONE_P (DFmode)"
- "{fnma|fnmadd} %0,%1,%2,%3"
- [(set_attr "type" "dmul")
- (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*fnmadddf4_fpr_2"
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
- (minus:DF (mult:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" "d"))
- (match_operand:DF 2 "gpc_reg_operand" "d"))
- (match_operand:DF 3 "gpc_reg_operand" "d")))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
- && ! HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)"
- "{fnma|fnmadd} %0,%1,%2,%3"
- [(set_attr "type" "dmul")
- (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*fnmsubdf4_fpr_1"
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
- (neg:DF (minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
- (match_operand:DF 2 "gpc_reg_operand" "d"))
- (match_operand:DF 3 "gpc_reg_operand" "d"))))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
- && VECTOR_UNIT_NONE_P (DFmode)"
- "{fnms|fnmsub} %0,%1,%2,%3"
- [(set_attr "type" "dmul")
- (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*fnmsubdf4_fpr_2"
- [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
- (minus:DF (match_operand:DF 3 "gpc_reg_operand" "d")
- (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
- (match_operand:DF 2 "gpc_reg_operand" "d"))))]
- "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD && TARGET_DOUBLE_FLOAT
- && ! HONOR_SIGNED_ZEROS (DFmode) && VECTOR_UNIT_NONE_P (DFmode)"
- "{fnms|fnmsub} %0,%1,%2,%3"
- [(set_attr "type" "dmul")
- (set_attr "fp_type" "fp_maddsub_d")])
-
(define_expand "sqrtdf2"
[(set (match_operand:DF 0 "gpc_reg_operand" "")
(sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
@@ -16234,6 +16030,73 @@
[(set_attr "type" "integer")])
+;; Builtin fma support. Handle
+;; Note that the conditions for expansion are in the FMA_F iterator.
+
+(define_expand "fma<mode>4"
+ [(set (match_operand:FMA_F 0 "register_operand" "")
+ (fma:FMA_F
+ (match_operand:FMA_F 1 "register_operand" "")
+ (match_operand:FMA_F 2 "register_operand" "")
+ (match_operand:FMA_F 3 "register_operand" "")))]
+ ""
+ "")
+
+; Altivec only has fma and nfms.
+(define_expand "fms<mode>4"
+ [(set (match_operand:FMA_F 0 "register_operand" "")
+ (fma:FMA_F
+ (match_operand:FMA_F 1 "register_operand" "")
+ (match_operand:FMA_F 2 "register_operand" "")
+ (neg:FMA_F (match_operand:FMA_F 3 "register_operand" ""))))]
+ "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "")
+
+;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
+(define_expand "fnma<mode>4"
+ [(set (match_operand:FMA_F 0 "register_operand" "")
+ (neg:FMA_F
+ (fma:FMA_F
+ (match_operand:FMA_F 1 "register_operand" "")
+ (match_operand:FMA_F 2 "register_operand" "")
+ (neg:FMA_F (match_operand:FMA_F 3 "register_operand" "")))))]
+ "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+ "")
+
+;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
+(define_expand "fnms<mode>4"
+ [(set (match_operand:FMA_F 0 "register_operand" "")
+ (neg:FMA_F
+ (fma:FMA_F
+ (match_operand:FMA_F 1 "register_operand" "")
+ (match_operand:FMA_F 2 "register_operand" "")
+ (match_operand:FMA_F 3 "register_operand" ""))))]
+ "!HONOR_SIGNED_ZEROS (<MODE>mode) && !VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "")
+
+; Not an official optab name, but used from builtins.
+(define_expand "nfma<mode>4"
+ [(set (match_operand:FMA_F 0 "register_operand" "")
+ (neg:FMA_F
+ (fma:FMA_F
+ (match_operand:FMA_F 1 "register_operand" "")
+ (match_operand:FMA_F 2 "register_operand" "")
+ (match_operand:FMA_F 3 "register_operand" ""))))]
+ "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ "")
+
+; Not an official optab name, but used from builtins.
+(define_expand "nfms<mode>4"
+ [(set (match_operand:FMA_F 0 "register_operand" "")
+ (neg:FMA_F
+ (fma:FMA_F
+ (match_operand:FMA_F 1 "register_operand" "")
+ (match_operand:FMA_F 2 "register_operand" "")
+ (neg:FMA_F (match_operand:FMA_F 3 "register_operand" "")))))]
+ ""
+ "")
+
+
(include "sync.md")
(include "vector.md")
@@ -176,10 +176,6 @@ mavoid-indexed-addresses
Target Report Var(TARGET_AVOID_XFORM) Init(-1)
Avoid generation of indexed load/store instructions when possible
-mfused-madd
-Target Report Var(TARGET_FUSED_MADD) Init(1)
-Generate fused multiply/add instructions
-
mtls-markers
Target Report Var(tls_markers) Init(1)
Mark __tls_get_addr calls with argument info
@@ -202,16 +202,14 @@
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
(mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
(match_operand:VEC_F 2 "vfloat_operand" "")))]
- "(VECTOR_UNIT_VSX_P (<MODE>mode)
- || (VECTOR_UNIT_ALTIVEC_P (<MODE>mode) && TARGET_FUSED_MADD))"
- "
+ "VECTOR_UNIT_VSX_P (<MODE>mode) || VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
{
if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
{
emit_insn (gen_altivec_mulv4sf3 (operands[0], operands[1], operands[2]));
DONE;
}
-}")
+})
(define_expand "div<mode>3"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
@@ -513,51 +513,12 @@
;; Fused vector multiply/add instructions
-;; Note we have a pattern for the multiply/add operations that uses unspec and
-;; does not check -mfused-madd to allow users to use these ops when they know
-;; they want the fused multiply/add.
-
-;; Fused multiply add. By default expand the FMA into (plus (mult)) to help
-;; loop unrolling. Don't do negate multiply ops, because of complications with
-;; honoring signed zero and fused-madd.
-
-(define_expand "vsx_fmadd<mode>4"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "")
- (plus:VSX_B
- (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "")
- (match_operand:VSX_B 2 "vsx_register_operand" ""))
- (match_operand:VSX_B 3 "vsx_register_operand" "")))]
- "VECTOR_UNIT_VSX_P (<MODE>mode)"
-{
- if (!TARGET_FUSED_MADD)
- {
- emit_insn (gen_vsx_fmadd<mode>4_2 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
-})
-
-(define_insn "*vsx_fmadd<mode>4_1"
+(define_insn "*vsx_fma<mode>4"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (plus:VSX_B
- (mult:VSX_B
+ (fma:VSX_B
(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0"))
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
- "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD"
- "@
- x<VSv>madda<VSs> %x0,%x1,%x2
- x<VSv>maddm<VSs> %x0,%x1,%x3
- x<VSv>madda<VSs> %x0,%x1,%x2
- x<VSv>maddm<VSs> %x0,%x1,%x3"
- [(set_attr "type" "<VStype_mul>")
- (set_attr "fp_type" "<VSfptype_mul>")])
-
-(define_insn "vsx_fmadd<mode>4_2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"@
x<VSv>madda<VSs> %x0,%x1,%x2
@@ -567,44 +528,13 @@
[(set_attr "type" "<VStype_mul>")
(set_attr "fp_type" "<VSfptype_mul>")])
-(define_expand "vsx_fmsub<mode>4"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "")
- (minus:VSX_B
- (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "")
- (match_operand:VSX_B 2 "vsx_register_operand" ""))
- (match_operand:VSX_B 3 "vsx_register_operand" "")))]
- "VECTOR_UNIT_VSX_P (<MODE>mode)"
-{
- if (!TARGET_FUSED_MADD)
- {
- emit_insn (gen_vsx_fmsub<mode>4_2 (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
- }
-})
-
-(define_insn "*vsx_fmsub<mode>4_1"
+(define_insn "*vsx_fms<mode>4"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (minus:VSX_B
- (mult:VSX_B
+ (fma:VSX_B
(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0"))
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
- "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD"
- "@
- x<VSv>msuba<VSs> %x0,%x1,%x2
- x<VSv>msubm<VSs> %x0,%x1,%x3
- x<VSv>msuba<VSs> %x0,%x1,%x2
- x<VSv>msubm<VSs> %x0,%x1,%x3"
- [(set_attr "type" "<VStype_mul>")
- (set_attr "fp_type" "<VSfptype_mul>")])
-
-(define_insn "vsx_fmsub<mode>4_2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (neg:VSX_B
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (neg:VSX_B
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"@
x<VSv>msuba<VSs> %x0,%x1,%x2
@@ -614,7 +544,7 @@
[(set_attr "type" "<VStype_mul>")
(set_attr "fp_type" "<VSfptype_mul>")])
-(define_insn "vsx_fnmadd<mode>4"
+(define_insn "*vsx_nfma<mode>4"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
(neg:VSX_B
(fma:VSX_B
@@ -630,85 +560,15 @@
[(set_attr "type" "<VStype_mul>")
(set_attr "fp_type" "<VSfptype_mul>")])
-(define_insn "vsx_fnmadd<mode>4_1"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (neg:VSX_B
- (plus:VSX_B
- (mult:VSX_B
- (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0"))
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
- "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD
- && HONOR_SIGNED_ZEROS (DFmode)"
- "@
- x<VSv>nmadda<VSs> %x0,%x1,%x2
- x<VSv>nmaddm<VSs> %x0,%x1,%x3
- x<VSv>nmadda<VSs> %x0,%x1,%x2
- x<VSv>nmaddm<VSs> %x0,%x1,%x3"
- [(set_attr "type" "<VStype_mul>")
- (set_attr "fp_type" "<VSfptype_mul>")])
-
-(define_insn "vsx_fnmadd<mode>4_2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (minus:VSX_B
- (mult:VSX_B
- (neg:VSX_B
- (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,<VSr>,wa,wa"))
- (match_operand:VSX_B 2 "gpc_reg_operand" "<VSr>,0,wa,0"))
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
- "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD
- && !HONOR_SIGNED_ZEROS (DFmode)"
- "@
- x<VSv>nmadda<VSs> %x0,%x1,%x2
- x<VSv>nmaddm<VSs> %x0,%x1,%x3
- x<VSv>nmadda<VSs> %x0,%x1,%x2
- x<VSv>nmaddm<VSs> %x0,%x1,%x3"
- [(set_attr "type" "<VStype_mul>")
- (set_attr "fp_type" "<VSfptype_mul>")])
-
-(define_insn "vsx_fnmsub<mode>4"
+(define_insn "*vsx_nfms<mode>4"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
(neg:VSX_B
- (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
- (neg:VSX_B
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))))]
- "VECTOR_UNIT_VSX_P (<MODE>mode)"
- "@
- x<VSv>nmsuba<VSs> %x0,%x1,%x2
- x<VSv>nmsubm<VSs> %x0,%x1,%x3
- x<VSv>nmsuba<VSs> %x0,%x1,%x2
- x<VSv>nmsubm<VSs> %x0,%x1,%x3"
- [(set_attr "type" "<VStype_mul>")
- (set_attr "fp_type" "<VSfptype_mul>")])
-
-(define_insn "vsx_fnmsub<mode>4_1"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (neg:VSX_B
- (minus:VSX_B
- (mult:VSX_B
+ (fma:VSX_B
(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0"))
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
- "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD
- && HONOR_SIGNED_ZEROS (DFmode)"
- "@
- x<VSv>nmsuba<VSs> %x0,%x1,%x2
- x<VSv>nmsubm<VSs> %x0,%x1,%x3
- x<VSv>nmsuba<VSs> %x0,%x1,%x2
- x<VSv>nmsubm<VSs> %x0,%x1,%x3"
- [(set_attr "type" "<VStype_mul>")
- (set_attr "fp_type" "<VSfptype_mul>")])
-
-(define_insn "vsx_fnmsub<mode>4_2"
- [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
- (minus:VSX_B
- (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")
- (mult:VSX_B
- (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
- (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0"))))]
- "VECTOR_UNIT_VSX_P (<MODE>mode) && TARGET_FUSED_MADD
- && !HONOR_SIGNED_ZEROS (DFmode)"
+ (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+ (neg:VSX_B
+ (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))))]
+ "VECTOR_UNIT_VSX_P (<MODE>mode)"
"@
x<VSv>nmsuba<VSs> %x0,%x1,%x2
x<VSv>nmsubm<VSs> %x0,%x1,%x3
This patch continues the elimination -mfused-madd in favor of -ffp-contract, converting the rs6000 backend. This port was a whole lot trickier: First, it kinda-sorta attempted use of FMA in its reciprical generation functions. I made the assumption that these algorithms actually rely on the use of FMA, and the testing of !TARGET_FUSED_MADD is actually a red-herring and should *not* be converted to test fp-contract. So I just converted that code to always emit the FMA. Second, this target has -fma(a,b,c) (call this NFMA) -fma(a,b,-c) (call this NFMS) instead of fma(-a,b,c) (generically named FNMA) fma(-a,b,-c) (generically named FNMS) I represent the NFM[AS] instructions correctly, always. But if signed zeros are allowed to be ignored, I expand the FNM[AS] with the NFM[AS] patterns. I make no attempt to create NFM[AS] optabs; I'm not convinced that it's really necessary. I think combine will be able to handle these just fine. I'm happy to be proven wrong, however. This patch is UNTESTED beyond a cross-compile and running simple tests by hand. I'm hoping that Meissner can test this faster than me pushing this through the gcc compile farm -- it took 2 days to get a full test cycle done last time... r~ From 633fd70ac48a464cada140adf48b9867ff8a3ac7 Mon Sep 17 00:00:00 2001 From: Richard Henderson <rth@twiddle.net> Date: Wed, 10 Nov 2010 16:15:31 -0800 Subject: [PATCH] ppc: move -mfused-madd to -ffp-contract. Delete all patterns dependent on TARGET_FUSED_MADD; instead rely on gimple optimizations producing FMA_EXPR. Use macros to expand all of the named FMA patterns. Change the implementation insns to be unnamed. --- gcc/config.gcc | 4 +- gcc/config/rs6000/altivec.md | 74 +-------- gcc/config/rs6000/paired.md | 108 +++++++------- gcc/config/rs6000/rs6000.c | 150 +++++++------------- gcc/config/rs6000/rs6000.md | 335 +++++++++++++----------------------------- gcc/config/rs6000/rs6000.opt | 4 - gcc/config/rs6000/vector.md | 6 +- gcc/config/rs6000/vsx.md | 172 ++-------------------- 8 files changed, 236 insertions(+), 617 deletions(-)