Message ID | 4CBC9481.808@redhat.com |
---|---|
State | New |
Headers | show |
> +;; This insn is officially "-(a * b) + c" which is "-((a * b) - c)", > +;; except for the sign of a zero result. E.g. > +;; -(1 * 0) + 0 => -(0) + 0 => -0 + 0 => +0 > +;; -((1 * 0) - 0) => -(0 - 0) => -(0) => -0 > +(define_insn "*nfmssf4" > + [(set (match_operand:SF 0 "fr_register_operand" "=f") > + (neg:SF > + (fma:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") > + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG") > + (neg:SF > + (match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ")))))] > + "!flag_signed_zeros" > + "fnma.s %0 = %F1, %F2, %F3" > + [(set_attr "itanium_class" "fmac")]) I'm not sure why I didn't see it before but -(a * b) + c simplifies better to (-a) * b + c i.e. [(set (match_operand 0) (fma (neg (match_operand 1)) (match_operand 2) (match_operand 3)))] I could either replace the pattern above (assuming that some simplification routine could be induced to handle it), or leave it and add the new pattern. Thoughts? r~
On Mon, 18 Oct 2010, Richard Henderson wrote: > > +;; This insn is officially "-(a * b) + c" which is "-((a * b) - c)", > > +;; except for the sign of a zero result. E.g. > > +;; -(1 * 0) + 0 => -(0) + 0 => -0 + 0 => +0 > > +;; -((1 * 0) - 0) => -(0 - 0) => -(0) => -0 > > +(define_insn "*nfmssf4" > > + [(set (match_operand:SF 0 "fr_register_operand" "=f") > > + (neg:SF > > + (fma:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") > > + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG") > > + (neg:SF > > + (match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ")))))] > > + "!flag_signed_zeros" > > + "fnma.s %0 = %F1, %F2, %F3" > > + [(set_attr "itanium_class" "fmac")]) > > I'm not sure why I didn't see it before but > > -(a * b) + c > > simplifies better to > > (-a) * b + c > > i.e. > > [(set (match_operand 0) > (fma (neg (match_operand 1)) > (match_operand 2) > (match_operand 3)))] > > I could either replace the pattern above (assuming that some simplification > routine could be induced to handle it), or leave it and add the new pattern. > > Thoughts? On trees I also will end up with (-a) * b + c and expect the expander to grok it. So that indeed sounds better. Though I can't get expand / combine convince to generate vfmsubps from float r[256]; float x[256]; float y[256]; float z[256]; void foo (void) { int i; for (i = 0; i < 256; ++i) { r[i] = x[i] * y[i] - z[i]; } } it seems that expand already generates (insn 31 30 32 4 (set (reg:V8SF 99) (xor:V8SF (reg:V8SF 97) (reg:V8SF 100))) t.c:10 -1 (expr_list:REG_EQUAL (neg:V8SF (reg:V8SF 97)) (nil))) (insn 32 31 33 4 (set (reg:V8SF 98) (fma:V8SF (reg:V8SF 89 [ vect_var_.15 ]) (reg:V8SF 88 [ vect_var_.10 ]) (reg:V8SF 99))) t.c:10 -1 (nil)) from my gen_rtx_NEG. Well, preliminary patch below. Richard. Index: gcc/tree.def =================================================================== *** gcc/tree.def.orig 2010-10-14 14:00:03.000000000 +0200 --- gcc/tree.def 2010-10-19 14:36:28.000000000 +0200 *************** DEFTREECODE (WIDEN_MULT_PLUS_EXPR, "wide *** 1092,1097 **** --- 1092,1103 ---- is subtracted from t3. */ DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3) + /* Fused multiply-add. + All operands and the result are of the same type. No intermediate + rounding is performed after multiplying operand one with operand two + before adding operand three. */ + DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3) + /* Whole vector left/right shift in bits. Operand 0 is a vector to be shifted. Operand 1 is an integer shift amount in bits. */ Index: gcc/expr.c =================================================================== *** gcc/expr.c.orig 2010-10-14 10:33:08.000000000 +0200 --- gcc/expr.c 2010-10-19 16:12:55.000000000 +0200 *************** expand_expr_real_2 (sepops ops, rtx targ *** 7254,7260 **** int ignore; bool reduce_bit_field; location_t loc = ops->location; ! tree treeop0, treeop1; #define REDUCE_BIT_FIELD(expr) (reduce_bit_field \ ? reduce_to_bit_field_precision ((expr), \ target, \ --- 7254,7260 ---- int ignore; bool reduce_bit_field; location_t loc = ops->location; ! tree treeop0, treeop1, treeop2; #define REDUCE_BIT_FIELD(expr) (reduce_bit_field \ ? reduce_to_bit_field_precision ((expr), \ target, \ *************** expand_expr_real_2 (sepops ops, rtx targ *** 7267,7272 **** --- 7267,7273 ---- treeop0 = ops->op0; treeop1 = ops->op1; + treeop2 = ops->op2; /* We should be called only on simple (binary or unary) expressions, exactly those that are valid in gimple expressions that aren't *************** expand_expr_real_2 (sepops ops, rtx targ *** 7624,7630 **** case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); ! op2 = expand_normal (ops->op2); target = expand_widen_pattern_expr (ops, op0, op1, op2, target, unsignedp); return target; --- 7625,7631 ---- case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); ! op2 = expand_normal (treeop2); target = expand_widen_pattern_expr (ops, op0, op1, op2, target, unsignedp); return target; *************** expand_expr_real_2 (sepops ops, rtx targ *** 7711,7716 **** --- 7712,7743 ---- expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp)); + case FMA_EXPR: + { + gimple def; + def = get_def_for_expr (treeop0, NEGATE_EXPR); + if (def) + { + op0 = expand_normal (gimple_assign_rhs1 (def)); + op0 = force_reg (mode, op0); + op0 = gen_rtx_NEG (mode, op0); + } + else + op0 = expand_expr (treeop0, subtarget, VOIDmode, EXPAND_NORMAL); + op1 = expand_normal (treeop1); + def = get_def_for_expr (treeop2, NEGATE_EXPR); + if (def) + { + op2 = expand_normal (gimple_assign_rhs1 (def)); + op2 = force_reg (mode, op2); + op2 = gen_rtx_NEG (mode, op2); + } + else + op2 = expand_normal (treeop2); + return expand_ternary_op (TYPE_MODE (type), fma_optab, + op0, op1, op2, target, 0); + } + case MULT_EXPR: /* If this is a fixed-point operation, then we cannot use the code below because "expand_mult" doesn't support sat/no-sat fixed-point Index: gcc/gimple.c =================================================================== *** gcc/gimple.c.orig 2010-10-14 14:00:03.000000000 +0200 --- gcc/gimple.c 2010-10-19 14:36:28.000000000 +0200 *************** get_gimple_rhs_num_ops (enum tree_code c *** 2528,2534 **** || (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS \ : (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS \ : ((SYM) == WIDEN_MULT_PLUS_EXPR \ ! || (SYM) == WIDEN_MULT_MINUS_EXPR) ? GIMPLE_TERNARY_RHS \ : ((SYM) == COND_EXPR \ || (SYM) == CONSTRUCTOR \ || (SYM) == OBJ_TYPE_REF \ --- 2528,2535 ---- || (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS \ : (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS \ : ((SYM) == WIDEN_MULT_PLUS_EXPR \ ! || (SYM) == WIDEN_MULT_MINUS_EXPR \ ! || (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \ : ((SYM) == COND_EXPR \ || (SYM) == CONSTRUCTOR \ || (SYM) == OBJ_TYPE_REF \ Index: gcc/tree-cfg.c =================================================================== *** gcc/tree-cfg.c.orig 2010-10-19 12:24:32.000000000 +0200 --- gcc/tree-cfg.c 2010-10-19 14:36:28.000000000 +0200 *************** verify_gimple_assign_ternary (gimple stm *** 3749,3754 **** --- 3749,3768 ---- } break; + case FMA_EXPR: + if (!useless_type_conversion_p (lhs_type, rhs1_type) + || !useless_type_conversion_p (lhs_type, rhs2_type) + || !useless_type_conversion_p (lhs_type, rhs3_type)) + { + error ("type mismatch in fused multiply-add expression"); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + debug_generic_expr (rhs2_type); + debug_generic_expr (rhs3_type); + return true; + } + break; + default: gcc_unreachable (); } Index: gcc/tree-inline.c =================================================================== *** gcc/tree-inline.c.orig 2010-10-19 12:24:32.000000000 +0200 --- gcc/tree-inline.c 2010-10-19 14:36:28.000000000 +0200 *************** estimate_operator_cost (enum tree_code c *** 3284,3289 **** --- 3284,3290 ---- case POINTER_PLUS_EXPR: case MINUS_EXPR: case MULT_EXPR: + case FMA_EXPR: case ADDR_SPACE_CONVERT_EXPR: case FIXED_CONVERT_EXPR: Index: gcc/gimple-pretty-print.c =================================================================== *** gcc/gimple-pretty-print.c.orig 2010-08-30 15:45:07.000000000 +0200 --- gcc/gimple-pretty-print.c 2010-10-19 15:02:41.000000000 +0200 *************** dump_ternary_rhs (pretty_printer *buffer *** 400,405 **** --- 400,413 ---- pp_character (buffer, '>'); break; + case FMA_EXPR: + dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false); + pp_string (buffer, " * "); + dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false); + pp_string (buffer, " + "); + dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false); + break; + default: gcc_unreachable (); } Index: gcc/tree-ssa-math-opts.c =================================================================== *** gcc/tree-ssa-math-opts.c.orig 2010-09-24 13:58:07.000000000 +0200 --- gcc/tree-ssa-math-opts.c 2010-10-19 16:15:07.000000000 +0200 *************** convert_plusminus_to_widen (gimple_stmt_ *** 1494,1499 **** --- 1494,1601 ---- return true; } + /* Combine the multiplication at MUL_STMT with uses in additions and + subtractions to form fused multiply-add operations. Returns true + if successful. */ + + static bool + convert_mult_to_fma (gimple mul_stmt) + { + tree mul_result = gimple_assign_lhs (mul_stmt); + tree type = TREE_TYPE (mul_result); + gimple use_stmt, fma_stmt; + use_operand_p use_p; + imm_use_iterator imm_iter; + + /* If the target doesn't support it, don't generate it. */ + if (optab_handler (fma_optab, TYPE_MODE (type)) + == CODE_FOR_nothing) + return false; + + /* We don't want to do bitfield reduction ops. */ + if (INTEGRAL_TYPE_P (type) + && (TYPE_PRECISION (type) + != GET_MODE_PRECISION (TYPE_MODE (type)))) + return false; + + /* Make sure that the multiplication statement becomes dead after + the transformation, thus that all uses are transformed to FMAs. + This means we assume that an FMA operation has the same cost + as an addition. */ + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) + { + enum tree_code use_code; + + use_stmt = USE_STMT (use_p); + + if (!is_gimple_assign (use_stmt)) + return false; + use_code = gimple_assign_rhs_code (use_stmt); + /* ??? Handle MINUS_EXPR and NEGATE_EXPR. We have no way to + query HW support for this though. */ + if (use_code != PLUS_EXPR + && use_code != MINUS_EXPR) + return false; + + /* We can't handle a * b + a * b. */ + if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt)) + return false; + + /* For now restrict this operations to single basic blocks. In theory + we would want to support sinking the multiplication in + m = a*b; + if () + ma = m + c; + else + d = m; + to form a fma in the then block and sink the multiplication to the + else block. */ + if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) + return false; + } + + FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) + { + tree addop, mulop1; + gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); + + mulop1 = gimple_assign_rhs1 (mul_stmt); + if (gimple_assign_rhs1 (use_stmt) == mul_result) + { + addop = gimple_assign_rhs2 (use_stmt); + /* a * b - c -> a * b + (-c) */ + if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) + addop = force_gimple_operand_gsi (&gsi, + build1 (NEGATE_EXPR, + type, addop), + true, NULL_TREE, true, + GSI_SAME_STMT); + } + else + { + addop = gimple_assign_rhs1 (use_stmt); + /* a - b * c -> (-b) * c + a */ + if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) + mulop1 = force_gimple_operand_gsi (&gsi, + build1 (NEGATE_EXPR, + type, mulop1), + true, NULL_TREE, true, + GSI_SAME_STMT); + } + + fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR, + gimple_assign_lhs (use_stmt), + mulop1, + gimple_assign_rhs2 (mul_stmt), + addop); + gsi_replace (&gsi, fma_stmt, true); + } + + /* ??? Remove the current statement if it is unused. */ + + return true; + } + /* Find integer multiplications where the operands are extended from smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR where appropriate. */ *************** convert_plusminus_to_widen (gimple_stmt_ *** 1501,1507 **** static unsigned int execute_optimize_widening_mul (void) { - bool changed = false; basic_block bb; FOR_EACH_BB (bb) --- 1603,1608 ---- *************** execute_optimize_widening_mul (void) *** 1518,1531 **** code = gimple_assign_rhs_code (stmt); if (code == MULT_EXPR) ! changed |= convert_mult_to_widen (stmt); else if (code == PLUS_EXPR || code == MINUS_EXPR) ! changed |= convert_plusminus_to_widen (&gsi, stmt, code); } } ! return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa ! | TODO_verify_stmts : 0); } static bool --- 1619,1634 ---- code = gimple_assign_rhs_code (stmt); if (code == MULT_EXPR) ! { ! if (!convert_mult_to_widen (stmt)) ! convert_mult_to_fma (stmt); ! } else if (code == PLUS_EXPR || code == MINUS_EXPR) ! convert_plusminus_to_widen (&gsi, stmt, code); } } ! return 0; } static bool *************** struct gimple_opt_pass pass_optimize_wid *** 1549,1554 **** 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ ! 0 /* todo_flags_finish */ } }; --- 1652,1660 ---- 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ ! TODO_verify_ssa ! | TODO_verify_stmts ! | TODO_dump_func ! | TODO_update_ssa /* todo_flags_finish */ } };
On Mon, 2010-10-18 at 11:40 -0700, Richard Henderson wrote: > As with powerpc, as distinguished from that achievable with -mfused-madd. > > Tested on ia64-linux. Steve, can you make sure I got the dejaspoo right > for hpux? Given that we don't have a __builtin_fma variant that operates > on __float80 as opposed to long double, I can't see that it's possible to > test that case there. > > > r~ Yes, the HP-UX stuff looks good. I applied the patch to my local tree and ran a bootstrap and test last night and everything passed. Steve Ellcey sje@cup.hp.com
Index: testsuite/gcc.target/ia64/builtin-fma-1.c =================================================================== --- testsuite/gcc.target/ia64/builtin-fma-1.c (revision 0) +++ testsuite/gcc.target/ia64/builtin-fma-1.c (revision 0) @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* Don't confuse the fma insn with the fma in the filename. */ +/* { dg-final { scan-assembler-times "fma\\." 2 } } */ +/* { dg-final { scan-assembler-times "fms" 4 } } */ +/* { dg-final { scan-assembler-times "fnma." 0 } } */ + +#ifndef __FP_FAST_FMAF +# error "__FP_FAST_FMAF should be defined" +#endif +#ifndef __FP_FAST_FMA +# error "__FP_FAST_FMA should be defined" +#endif + +float f0(float x, float y, float z) { return __builtin_fmaf(x,y,z); } +float f1(float x, float y, float z) { return __builtin_fmaf(x,y,-z); } +float f2(float x, float y, float z) { return -__builtin_fmaf(x,y,-z); } + +double d0(double x, double y, double z) { return __builtin_fma(x,y,z); } +double d1(double x, double y, double z) { return __builtin_fma(x,y,-z); } +double d2(double x, double y, double z) { return -__builtin_fma(x,y,-z); } Index: testsuite/gcc.target/ia64/builtin-fma-2.c =================================================================== --- testsuite/gcc.target/ia64/builtin-fma-2.c (revision 0) +++ testsuite/gcc.target/ia64/builtin-fma-2.c (revision 0) @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fno-signed-zeros" } */ +/* Don't confuse the fma insn with the fma in the filename. */ +/* { dg-final { scan-assembler-times "fma\\." 2 } } */ +/* { dg-final { scan-assembler-times "fms" 2 } } */ +/* { dg-final { scan-assembler-times "fnma" 2 } } */ + +float f0(float x, float y, float z) { return __builtin_fmaf(x,y,z); } +float f1(float x, float y, float z) { return __builtin_fmaf(x,y,-z); } +float f2(float x, float y, float z) { return -__builtin_fmaf(x,y,-z); } + +double d0(double x, double y, double z) { return __builtin_fma(x,y,z); } +double d1(double x, double y, double z) { return __builtin_fma(x,y,-z); } +double d2(double x, double y, double z) { return -__builtin_fma(x,y,-z); } Index: testsuite/gcc.target/ia64/builtin-fma-3.c =================================================================== --- testsuite/gcc.target/ia64/builtin-fma-3.c (revision 0) +++ testsuite/gcc.target/ia64/builtin-fma-3.c (revision 0) @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-skip-if "128-bit long double" { *-*-hpux* } { "*" } { "" } } */ +/* { dg-options "-O" } */ +/* Don't confuse the fma insn with the fma in the filename. */ +/* { dg-final { scan-assembler-times "fma\[ \]" 1 } } */ +/* { dg-final { scan-assembler-times "fms" 2 } } */ +/* { dg-final { scan-assembler-times "fnma" 0 } } */ + +#ifndef __FP_FAST_FMAL +# error "__FP_FAST_FMAL should be defined" +#endif + +typedef long double LD; + +LD f0(LD x, LD y, LD z) { return __builtin_fmal(x,y,z); } +LD f1(LD x, LD y, LD z) { return __builtin_fmal(x,y,-z); } +LD f2(LD x, LD y, LD z) { return -__builtin_fmal(x,y,-z); } Index: testsuite/gcc.target/ia64/builtin-fma-4.c =================================================================== --- testsuite/gcc.target/ia64/builtin-fma-4.c (revision 0) +++ testsuite/gcc.target/ia64/builtin-fma-4.c (revision 0) @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-skip-if "128-bit long double" { *-*-hpux* } { "*" } { "" } } */ +/* { dg-options "-O -fno-signed-zeros" } */ +/* Don't confuse the fma insn with the fma in the filename. */ +/* { dg-final { scan-assembler-times "fma\[ \]" 1 } } */ +/* { dg-final { scan-assembler-times "fms" 1 } } */ +/* { dg-final { scan-assembler-times "fnma" 1 } } */ + +#ifndef __FP_FAST_FMAL +# error "__FP_FAST_FMAL should be defined" +#endif + +typedef long double LD; + +LD f0(LD x, LD y, LD z) { return __builtin_fmal(x,y,z); } +LD f1(LD x, LD y, LD z) { return __builtin_fmal(x,y,-z); } +LD f2(LD x, LD y, LD z) { return -__builtin_fmal(x,y,-z); } Index: config/ia64/ia64.md =================================================================== --- config/ia64/ia64.md (revision 165650) +++ config/ia64/ia64.md (working copy) @@ -2791,6 +2791,41 @@ "TARGET_FUSED_MADD" "fnma.s %0 = %F1, %F2, %F3" [(set_attr "itanium_class" "fmac")]) + +;; Official C99 versions of the fmaf family of operations. +(define_insn "fmasf4" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (fma:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG") + (match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ")))] + "" + "fma.s %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*fmssf4" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (fma:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG") + (neg:SF + (match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))] + "" + "fms.s %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +;; This insn is officially "-(a * b) + c" which is "-((a * b) - c)", +;; except for the sign of a zero result. E.g. +;; -(1 * 0) + 0 => -(0) + 0 => -0 + 0 => +0 +;; -((1 * 0) - 0) => -(0 - 0) => -(0) => -0 +(define_insn "*nfmssf4" + [(set (match_operand:SF 0 "fr_register_operand" "=f") + (neg:SF + (fma:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG") + (neg:SF + (match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ")))))] + "!flag_signed_zeros" + "fnma.s %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) ;; :::::::::::::::::::: ;; :: @@ -2977,6 +3012,38 @@ "TARGET_FUSED_MADD" "fnma.s %0 = %F1, %F2, %F3" [(set_attr "itanium_class" "fmac")]) + +;; Official C99 versions of the fma family of operations. +(define_insn "fmadf4" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (fma:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG") + (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ")))] + "" + "fma.d %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*fmsdf4" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (fma:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG") + (neg:DF + (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))] + "" + "fms.d %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +;; See comment for nfmssf4. +(define_insn "*nfmsdf4" + [(set (match_operand:DF 0 "fr_register_operand" "=f") + (neg:DF + (fma:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG") + (neg:DF + (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ")))))] + "!flag_signed_zeros" + "fnma.d %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) ;; :::::::::::::::::::: ;; :: @@ -3234,6 +3301,38 @@ "TARGET_FUSED_MADD" "fnma.d %0 = %F1, %F2, %F3" [(set_attr "itanium_class" "fmac")]) + +;; Official C99 versions of the fmal family of operations. +(define_insn "fmaxf4" + [(set (match_operand:XF 0 "fr_register_operand" "=f") + (fma:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG") + (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ")))] + "" + "fma %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*fmsxf4" + [(set (match_operand:XF 0 "fr_register_operand" "=f") + (fma:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG") + (neg:XF + (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))] + "" + "fms %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) + +;; See comment for nfmssf4. +(define_insn "*nfmsxf4" + [(set (match_operand:XF 0 "fr_register_operand" "=f") + (neg:XF + (fma:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG") + (neg:XF + (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ")))))] + "!flag_signed_zeros" + "fnma %0 = %F1, %F2, %F3" + [(set_attr "itanium_class" "fmac")]) ;; :::::::::::::::::::: ;; ::