Message ID | 1508942330.26707.184.camel@brimstone.rchland.ibm.com |
---|---|
State | New |
Headers | show |
Series | [rs6000] Gimple folding for vec_madd() | expand |
On Wed, Oct 25, 2017 at 4:38 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > Hi, > > Add support for gimple folding of the vec_madd() (vector multiply-add) > intrinsics. > Testcase coverage is provided by the existing tests > gcc.target/powerpc/fold-vec-madd-*.c > > Sniff-tests appear clean. A full regtest is currently running across assorted Power systems. (P6-P9). > OK for trunk (pending clean run results)? You can use FMA_EXPR on integer operands as well. Otherwise you risk the FMA be not matched by combine later when part of the operation is CSEd. Richard. > Thanks, > -Will > > [gcc] > > 2017-10-25 Will Schmidt <will_schmidt@vnet.ibm.com> > > * config/rs6000/rs6000.c: (rs6000_gimple_fold_builtin) Add support for > gimple folding of vec_madd() intrinsics. > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 4837e14..04c2b15 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -16606,10 +16606,43 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > build_int_cst (arg2_type, 0)), arg0); > gimple_set_location (g, loc); > gsi_replace (gsi, g, true); > return true; > } > + > + /* vec_madd (Float) */ > + case ALTIVEC_BUILTIN_VMADDFP: > + case VSX_BUILTIN_XVMADDDP: > + { > + arg0 = gimple_call_arg (stmt, 0); > + arg1 = gimple_call_arg (stmt, 1); > + tree arg2 = gimple_call_arg (stmt, 2); > + lhs = gimple_call_lhs (stmt); > + gimple *g = gimple_build_assign (lhs, FMA_EXPR , arg0, arg1, arg2); > + gimple_set_location (g, gimple_location (stmt)); > + gsi_replace (gsi, g, true); > + return true; > + } > + /* vec_madd (Integral) */ > + case ALTIVEC_BUILTIN_VMLADDUHM: > + { > + arg0 = gimple_call_arg (stmt, 0); > + arg1 = gimple_call_arg (stmt, 1); > + tree arg2 = gimple_call_arg (stmt, 2); > + lhs = gimple_call_lhs (stmt); > + tree lhs_type = TREE_TYPE (lhs); > + location_t loc = gimple_location (stmt); > + gimple_seq stmts = NULL; > + tree mult_result = gimple_build (&stmts, loc, MULT_EXPR, > + lhs_type, arg0, arg1); > + tree plus_result = gimple_build (&stmts, loc, PLUS_EXPR, > + lhs_type, mult_result, arg2); > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > + update_call_from_tree (gsi, plus_result); > + return true; > + } > + > default: > if (TARGET_DEBUG_BUILTIN) > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", > fn_code, fn_name1, fn_name2); > break; > >
On Thu, 2017-10-26 at 11:05 +0200, Richard Biener wrote: > On Wed, Oct 25, 2017 at 4:38 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > > Hi, > > > > Add support for gimple folding of the vec_madd() (vector multiply-add) > > intrinsics. > > Testcase coverage is provided by the existing tests > > gcc.target/powerpc/fold-vec-madd-*.c > > > > Sniff-tests appear clean. A full regtest is currently running across assorted Power systems. (P6-P9). > > OK for trunk (pending clean run results)? > > You can use FMA_EXPR on integer operands as well. Otherwise you risk > the FMA be not matched by combine later when part of the operation is > CSEd. I had tried that initially, without success,.. I'll probably need another hint. :-) Looking a bit closer, I think I see why the assert fired, but I'm not sure what the proper fix would be. So attempting to the FMA_EXPR on the integer operands. (vector shorts in this case), I end up triggering this error: /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/testsuite/gcc.target/powerpc/fold-vec-madd-short.c:14:10: internal compiler error: in expand_expr_real_2, at expr.c:8712 0x10813303 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier) /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:8712 0x1081822f expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:9787 0x1080f7bb expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:8084 ... which when followed back, I tripped an assert here: (gcc/expr.c: expand_expr_real_2() ~ line 8710) case FMA_EXPR: { optab opt = fma_optab; gimple *def0, *def2; if (optab_handler (fma_optab, mode) == CODE_FOR_nothing) { tree fn = mathfn_built_in (TREE_TYPE (treeop0), BUILT_IN_FMA); tree call_expr; gcc_assert (fn != NULL_TREE); where gcc/builtins.c mathfn_built_in()->mathfn_built_in_1->mathfn_built_in_2 looks to have returned END_BUILTINS/NULL_TREE, due to falling through the if/else tree: if (TYPE_MAIN_VARIANT (type) == double_type_node) return fcode; else if (TYPE_MAIN_VARIANT (type) == float_type_node) return fcodef; else if (TYPE_MAIN_VARIANT (type) == long_double_type_node) return fcodel; else return END_BUILTINS; Looks like that is all double/float/long double contents. First blush attempt would be to add V8HI_type_node/integer_type_node to that if/else tree, but that doesn't look like it would be near enough. Thanks -Will > > Richard. > > > Thanks, > > -Will > > > > [gcc] > > > > 2017-10-25 Will Schmidt <will_schmidt@vnet.ibm.com> > > > > * config/rs6000/rs6000.c: (rs6000_gimple_fold_builtin) Add support for > > gimple folding of vec_madd() intrinsics. > > > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > > index 4837e14..04c2b15 100644 > > --- a/gcc/config/rs6000/rs6000.c > > +++ b/gcc/config/rs6000/rs6000.c > > @@ -16606,10 +16606,43 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > > build_int_cst (arg2_type, 0)), arg0); > > gimple_set_location (g, loc); > > gsi_replace (gsi, g, true); > > return true; > > } > > + > > + /* vec_madd (Float) */ > > + case ALTIVEC_BUILTIN_VMADDFP: > > + case VSX_BUILTIN_XVMADDDP: > > + { > > + arg0 = gimple_call_arg (stmt, 0); > > + arg1 = gimple_call_arg (stmt, 1); > > + tree arg2 = gimple_call_arg (stmt, 2); > > + lhs = gimple_call_lhs (stmt); > > + gimple *g = gimple_build_assign (lhs, FMA_EXPR , arg0, arg1, arg2); > > + gimple_set_location (g, gimple_location (stmt)); > > + gsi_replace (gsi, g, true); > > + return true; > > + } > > + /* vec_madd (Integral) */ > > + case ALTIVEC_BUILTIN_VMLADDUHM: > > + { > > + arg0 = gimple_call_arg (stmt, 0); > > + arg1 = gimple_call_arg (stmt, 1); > > + tree arg2 = gimple_call_arg (stmt, 2); > > + lhs = gimple_call_lhs (stmt); > > + tree lhs_type = TREE_TYPE (lhs); > > + location_t loc = gimple_location (stmt); > > + gimple_seq stmts = NULL; > > + tree mult_result = gimple_build (&stmts, loc, MULT_EXPR, > > + lhs_type, arg0, arg1); > > + tree plus_result = gimple_build (&stmts, loc, PLUS_EXPR, > > + lhs_type, mult_result, arg2); > > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > > + update_call_from_tree (gsi, plus_result); > > + return true; > > + } > > + > > default: > > if (TARGET_DEBUG_BUILTIN) > > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", > > fn_code, fn_name1, fn_name2); > > break; > > > > >
On Thu, Oct 26, 2017 at 4:30 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > On Thu, 2017-10-26 at 11:05 +0200, Richard Biener wrote: >> On Wed, Oct 25, 2017 at 4:38 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: >> > Hi, >> > >> > Add support for gimple folding of the vec_madd() (vector multiply-add) >> > intrinsics. >> > Testcase coverage is provided by the existing tests >> > gcc.target/powerpc/fold-vec-madd-*.c >> > >> > Sniff-tests appear clean. A full regtest is currently running across assorted Power systems. (P6-P9). >> > OK for trunk (pending clean run results)? >> >> You can use FMA_EXPR on integer operands as well. Otherwise you risk >> the FMA be not matched by combine later when part of the operation is >> CSEd. > > I had tried that initially, without success,.. I'll probably need > another hint. :-) > Looking a bit closer, I think I see why the assert fired, but I'm not > sure what the proper fix would be. > > So attempting to the FMA_EXPR on the integer operands. (vector shorts in > this case), I end up triggering this error: > > /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/testsuite/gcc.target/powerpc/fold-vec-madd-short.c:14:10: internal compiler error: in expand_expr_real_2, at expr.c:8712 > 0x10813303 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier) > /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:8712 > 0x1081822f expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) > /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:9787 > 0x1080f7bb expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) > /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:8084 > ... > > > which when followed back, I tripped an assert here: (gcc/expr.c: > expand_expr_real_2() ~ line 8710) > > case FMA_EXPR: > { > optab opt = fma_optab; > gimple *def0, *def2; > if (optab_handler (fma_optab, mode) == CODE_FOR_nothing) > { > tree fn = mathfn_built_in (TREE_TYPE (treeop0), BUILT_IN_FMA); > tree call_expr; > > gcc_assert (fn != NULL_TREE); > > where gcc/builtins.c > mathfn_built_in()->mathfn_built_in_1->mathfn_built_in_2 looks to have > returned END_BUILTINS/NULL_TREE, due to falling through the if/else > tree: > > if (TYPE_MAIN_VARIANT (type) == double_type_node) > return fcode; > else if (TYPE_MAIN_VARIANT (type) == float_type_node) > return fcodef; > else if (TYPE_MAIN_VARIANT (type) == long_double_type_node) > return fcodel; > else > return END_BUILTINS; > > Looks like that is all double/float/long double contents. First blush > attempt would be to add V8HI_type_node/integer_type_node to that if/else > tree, but that doesn't look like it would be near enough. Well - we of course expect to have an optab for the fma with vector short. I thought you had one given you have the intrinsic. If you don't have an optab you of course have to open-code it. Just thought you expected an actual machine instruction doing the integer FMA. Richard. > Thanks > -Will > >> >> Richard. >> >> > Thanks, >> > -Will >> > >> > [gcc] >> > >> > 2017-10-25 Will Schmidt <will_schmidt@vnet.ibm.com> >> > >> > * config/rs6000/rs6000.c: (rs6000_gimple_fold_builtin) Add support for >> > gimple folding of vec_madd() intrinsics. >> > >> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c >> > index 4837e14..04c2b15 100644 >> > --- a/gcc/config/rs6000/rs6000.c >> > +++ b/gcc/config/rs6000/rs6000.c >> > @@ -16606,10 +16606,43 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) >> > build_int_cst (arg2_type, 0)), arg0); >> > gimple_set_location (g, loc); >> > gsi_replace (gsi, g, true); >> > return true; >> > } >> > + >> > + /* vec_madd (Float) */ >> > + case ALTIVEC_BUILTIN_VMADDFP: >> > + case VSX_BUILTIN_XVMADDDP: >> > + { >> > + arg0 = gimple_call_arg (stmt, 0); >> > + arg1 = gimple_call_arg (stmt, 1); >> > + tree arg2 = gimple_call_arg (stmt, 2); >> > + lhs = gimple_call_lhs (stmt); >> > + gimple *g = gimple_build_assign (lhs, FMA_EXPR , arg0, arg1, arg2); >> > + gimple_set_location (g, gimple_location (stmt)); >> > + gsi_replace (gsi, g, true); >> > + return true; >> > + } >> > + /* vec_madd (Integral) */ >> > + case ALTIVEC_BUILTIN_VMLADDUHM: >> > + { >> > + arg0 = gimple_call_arg (stmt, 0); >> > + arg1 = gimple_call_arg (stmt, 1); >> > + tree arg2 = gimple_call_arg (stmt, 2); >> > + lhs = gimple_call_lhs (stmt); >> > + tree lhs_type = TREE_TYPE (lhs); >> > + location_t loc = gimple_location (stmt); >> > + gimple_seq stmts = NULL; >> > + tree mult_result = gimple_build (&stmts, loc, MULT_EXPR, >> > + lhs_type, arg0, arg1); >> > + tree plus_result = gimple_build (&stmts, loc, PLUS_EXPR, >> > + lhs_type, mult_result, arg2); >> > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); >> > + update_call_from_tree (gsi, plus_result); >> > + return true; >> > + } >> > + >> > default: >> > if (TARGET_DEBUG_BUILTIN) >> > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", >> > fn_code, fn_name1, fn_name2); >> > break; >> > >> > >> > >
On Thu, Oct 26, 2017 at 5:13 PM, Richard Biener <richard.guenther@gmail.com> wrote: > On Thu, Oct 26, 2017 at 4:30 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: >> On Thu, 2017-10-26 at 11:05 +0200, Richard Biener wrote: >>> On Wed, Oct 25, 2017 at 4:38 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: >>> > Hi, >>> > >>> > Add support for gimple folding of the vec_madd() (vector multiply-add) >>> > intrinsics. >>> > Testcase coverage is provided by the existing tests >>> > gcc.target/powerpc/fold-vec-madd-*.c >>> > >>> > Sniff-tests appear clean. A full regtest is currently running across assorted Power systems. (P6-P9). >>> > OK for trunk (pending clean run results)? >>> >>> You can use FMA_EXPR on integer operands as well. Otherwise you risk >>> the FMA be not matched by combine later when part of the operation is >>> CSEd. >> >> I had tried that initially, without success,.. I'll probably need >> another hint. :-) >> Looking a bit closer, I think I see why the assert fired, but I'm not >> sure what the proper fix would be. >> >> So attempting to the FMA_EXPR on the integer operands. (vector shorts in >> this case), I end up triggering this error: >> >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/testsuite/gcc.target/powerpc/fold-vec-madd-short.c:14:10: internal compiler error: in expand_expr_real_2, at expr.c:8712 >> 0x10813303 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier) >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:8712 >> 0x1081822f expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:9787 >> 0x1080f7bb expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:8084 >> ... >> >> >> which when followed back, I tripped an assert here: (gcc/expr.c: >> expand_expr_real_2() ~ line 8710) >> >> case FMA_EXPR: >> { >> optab opt = fma_optab; >> gimple *def0, *def2; >> if (optab_handler (fma_optab, mode) == CODE_FOR_nothing) >> { >> tree fn = mathfn_built_in (TREE_TYPE (treeop0), BUILT_IN_FMA); >> tree call_expr; >> >> gcc_assert (fn != NULL_TREE); >> >> where gcc/builtins.c >> mathfn_built_in()->mathfn_built_in_1->mathfn_built_in_2 looks to have >> returned END_BUILTINS/NULL_TREE, due to falling through the if/else >> tree: >> >> if (TYPE_MAIN_VARIANT (type) == double_type_node) >> return fcode; >> else if (TYPE_MAIN_VARIANT (type) == float_type_node) >> return fcodef; >> else if (TYPE_MAIN_VARIANT (type) == long_double_type_node) >> return fcodel; >> else >> return END_BUILTINS; >> >> Looks like that is all double/float/long double contents. First blush >> attempt would be to add V8HI_type_node/integer_type_node to that if/else >> tree, but that doesn't look like it would be near enough. > > Well - we of course expect to have an optab for the fma with vector > short. I thought > you had one given you have the intrinsic. If you don't have an optab > you of course > have to open-code it. > > Just thought you expected an actual machine instruction doing the integer FMA. So you have (define_insn "altivec_vmladduhm" [(set (match_operand:V8HI 0 "register_operand" "=v") (plus:V8HI (mult:V8HI (match_operand:V8HI 1 "register_operand" "v") (match_operand:V8HI 2 "register_operand" "v")) (match_operand:V8HI 3 "register_operand" "v")))] "TARGET_ALTIVEC" "vmladduhm %0,%1,%2,%3" [(set_attr "type" "veccomplex")]) but not (define_expand "fmav8hi4" ... or define_insn in case that's also a way to register an optab. Richard. > Richard. > >> Thanks >> -Will >> >>> >>> Richard. >>> >>> > Thanks, >>> > -Will >>> > >>> > [gcc] >>> > >>> > 2017-10-25 Will Schmidt <will_schmidt@vnet.ibm.com> >>> > >>> > * config/rs6000/rs6000.c: (rs6000_gimple_fold_builtin) Add support for >>> > gimple folding of vec_madd() intrinsics. >>> > >>> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c >>> > index 4837e14..04c2b15 100644 >>> > --- a/gcc/config/rs6000/rs6000.c >>> > +++ b/gcc/config/rs6000/rs6000.c >>> > @@ -16606,10 +16606,43 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) >>> > build_int_cst (arg2_type, 0)), arg0); >>> > gimple_set_location (g, loc); >>> > gsi_replace (gsi, g, true); >>> > return true; >>> > } >>> > + >>> > + /* vec_madd (Float) */ >>> > + case ALTIVEC_BUILTIN_VMADDFP: >>> > + case VSX_BUILTIN_XVMADDDP: >>> > + { >>> > + arg0 = gimple_call_arg (stmt, 0); >>> > + arg1 = gimple_call_arg (stmt, 1); >>> > + tree arg2 = gimple_call_arg (stmt, 2); >>> > + lhs = gimple_call_lhs (stmt); >>> > + gimple *g = gimple_build_assign (lhs, FMA_EXPR , arg0, arg1, arg2); >>> > + gimple_set_location (g, gimple_location (stmt)); >>> > + gsi_replace (gsi, g, true); >>> > + return true; >>> > + } >>> > + /* vec_madd (Integral) */ >>> > + case ALTIVEC_BUILTIN_VMLADDUHM: >>> > + { >>> > + arg0 = gimple_call_arg (stmt, 0); >>> > + arg1 = gimple_call_arg (stmt, 1); >>> > + tree arg2 = gimple_call_arg (stmt, 2); >>> > + lhs = gimple_call_lhs (stmt); >>> > + tree lhs_type = TREE_TYPE (lhs); >>> > + location_t loc = gimple_location (stmt); >>> > + gimple_seq stmts = NULL; >>> > + tree mult_result = gimple_build (&stmts, loc, MULT_EXPR, >>> > + lhs_type, arg0, arg1); >>> > + tree plus_result = gimple_build (&stmts, loc, PLUS_EXPR, >>> > + lhs_type, mult_result, arg2); >>> > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); >>> > + update_call_from_tree (gsi, plus_result); >>> > + return true; >>> > + } >>> > + >>> > default: >>> > if (TARGET_DEBUG_BUILTIN) >>> > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", >>> > fn_code, fn_name1, fn_name2); >>> > break; >>> > >>> > >>> >> >>
On Thu, 2017-10-26 at 17:18 +0200, Richard Biener wrote: > On Thu, Oct 26, 2017 at 5:13 PM, Richard Biener > <richard.guenther@gmail.com> wrote: > > On Thu, Oct 26, 2017 at 4:30 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > >> On Thu, 2017-10-26 at 11:05 +0200, Richard Biener wrote: > >>> On Wed, Oct 25, 2017 at 4:38 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > >>> > Hi, > >>> > > >>> > Add support for gimple folding of the vec_madd() (vector multiply-add) > >>> > intrinsics. > >>> > Testcase coverage is provided by the existing tests > >>> > gcc.target/powerpc/fold-vec-madd-*.c > >>> > > >>> > Sniff-tests appear clean. A full regtest is currently running across assorted Power systems. (P6-P9). > >>> > OK for trunk (pending clean run results)? > >>> > >>> You can use FMA_EXPR on integer operands as well. Otherwise you risk > >>> the FMA be not matched by combine later when part of the operation is > >>> CSEd. > >> > >> I had tried that initially, without success,.. I'll probably need > >> another hint. :-) > >> Looking a bit closer, I think I see why the assert fired, but I'm not > >> sure what the proper fix would be. > >> > >> So attempting to the FMA_EXPR on the integer operands. (vector shorts in > >> this case), I end up triggering this error: > >> > >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/testsuite/gcc.target/powerpc/fold-vec-madd-short.c:14:10: internal compiler error: in expand_expr_real_2, at expr.c:8712 > >> 0x10813303 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier) > >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:8712 > >> 0x1081822f expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) > >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:9787 > >> 0x1080f7bb expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) > >> /home/willschm/gcc/gcc-mainline-vec_fold_misc/gcc/expr.c:8084 > >> ... > >> > >> > >> which when followed back, I tripped an assert here: (gcc/expr.c: > >> expand_expr_real_2() ~ line 8710) > >> > >> case FMA_EXPR: > >> { > >> optab opt = fma_optab; > >> gimple *def0, *def2; > >> if (optab_handler (fma_optab, mode) == CODE_FOR_nothing) > >> { > >> tree fn = mathfn_built_in (TREE_TYPE (treeop0), BUILT_IN_FMA); > >> tree call_expr; > >> > >> gcc_assert (fn != NULL_TREE); > >> > >> where gcc/builtins.c > >> mathfn_built_in()->mathfn_built_in_1->mathfn_built_in_2 looks to have > >> returned END_BUILTINS/NULL_TREE, due to falling through the if/else > >> tree: > >> > >> if (TYPE_MAIN_VARIANT (type) == double_type_node) > >> return fcode; > >> else if (TYPE_MAIN_VARIANT (type) == float_type_node) > >> return fcodef; > >> else if (TYPE_MAIN_VARIANT (type) == long_double_type_node) > >> return fcodel; > >> else > >> return END_BUILTINS; > >> > >> Looks like that is all double/float/long double contents. First blush > >> attempt would be to add V8HI_type_node/integer_type_node to that if/else > >> tree, but that doesn't look like it would be near enough. > > > > Well - we of course expect to have an optab for the fma with vector > > short. I thought > > you had one given you have the intrinsic. If you don't have an optab > > you of course > > have to open-code it. > > > > Just thought you expected an actual machine instruction doing the integer FMA. > > So you have > > (define_insn "altivec_vmladduhm" > [(set (match_operand:V8HI 0 "register_operand" "=v") > (plus:V8HI (mult:V8HI (match_operand:V8HI 1 "register_operand" "v") > (match_operand:V8HI 2 "register_operand" "v")) > (match_operand:V8HI 3 "register_operand" "v")))] > "TARGET_ALTIVEC" > "vmladduhm %0,%1,%2,%3" > [(set_attr "type" "veccomplex")]) > > but not > > (define_expand "fmav8hi4" > ... > > or define_insn in case that's also a way to register an optab. > > Richard. Ok. Thanks for the guidance. :-) -Will > > > > > Richard. > > > >> Thanks > >> -Will > >> > >>> > >>> Richard. > >>> > >>> > Thanks, > >>> > -Will > >>> > > >>> > [gcc] > >>> > > >>> > 2017-10-25 Will Schmidt <will_schmidt@vnet.ibm.com> > >>> > > >>> > * config/rs6000/rs6000.c: (rs6000_gimple_fold_builtin) Add support for > >>> > gimple folding of vec_madd() intrinsics. > >>> > > >>> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > >>> > index 4837e14..04c2b15 100644 > >>> > --- a/gcc/config/rs6000/rs6000.c > >>> > +++ b/gcc/config/rs6000/rs6000.c > >>> > @@ -16606,10 +16606,43 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > >>> > build_int_cst (arg2_type, 0)), arg0); > >>> > gimple_set_location (g, loc); > >>> > gsi_replace (gsi, g, true); > >>> > return true; > >>> > } > >>> > + > >>> > + /* vec_madd (Float) */ > >>> > + case ALTIVEC_BUILTIN_VMADDFP: > >>> > + case VSX_BUILTIN_XVMADDDP: > >>> > + { > >>> > + arg0 = gimple_call_arg (stmt, 0); > >>> > + arg1 = gimple_call_arg (stmt, 1); > >>> > + tree arg2 = gimple_call_arg (stmt, 2); > >>> > + lhs = gimple_call_lhs (stmt); > >>> > + gimple *g = gimple_build_assign (lhs, FMA_EXPR , arg0, arg1, arg2); > >>> > + gimple_set_location (g, gimple_location (stmt)); > >>> > + gsi_replace (gsi, g, true); > >>> > + return true; > >>> > + } > >>> > + /* vec_madd (Integral) */ > >>> > + case ALTIVEC_BUILTIN_VMLADDUHM: > >>> > + { > >>> > + arg0 = gimple_call_arg (stmt, 0); > >>> > + arg1 = gimple_call_arg (stmt, 1); > >>> > + tree arg2 = gimple_call_arg (stmt, 2); > >>> > + lhs = gimple_call_lhs (stmt); > >>> > + tree lhs_type = TREE_TYPE (lhs); > >>> > + location_t loc = gimple_location (stmt); > >>> > + gimple_seq stmts = NULL; > >>> > + tree mult_result = gimple_build (&stmts, loc, MULT_EXPR, > >>> > + lhs_type, arg0, arg1); > >>> > + tree plus_result = gimple_build (&stmts, loc, PLUS_EXPR, > >>> > + lhs_type, mult_result, arg2); > >>> > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > >>> > + update_call_from_tree (gsi, plus_result); > >>> > + return true; > >>> > + } > >>> > + > >>> > default: > >>> > if (TARGET_DEBUG_BUILTIN) > >>> > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", > >>> > fn_code, fn_name1, fn_name2); > >>> > break; > >>> > > >>> > > >>> > >> > >> >
Hi, [PATCH, rs6000] (v2) Gimple folding for vec_madd() Add support for gimple folding of the vec_madd() (vector multiply-add) intrinsics. Per earlier feedback and education, this now includes the addition of a "define_expand fmav8hi4" in altivec.md. Testcase coverage is provided by the existing tests as gcc.target/powerpc/fold-vec-madd-*.c Sniff-tests passed. Regtests will be kicked off shortly. OK for trunk? (pending successful test results, of course:-) ) Thanks, -Will [gcc] 2017-10-26 Will Schmidt <will_schmidt@vnet.ibm.com> * config/rs6000/rs6000.c: (rs6000_gimple_fold_builtin) Add support for gimple folding of vec_madd() intrinsics. * config/rs6000/altivec.md: Add define_expand fmav8hi4 diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 6ea529b..36e6ddd 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -943,10 +943,22 @@ (match_operand:V8HI 3 "register_operand" "v")))] "TARGET_ALTIVEC" "vmladduhm %0,%1,%2,%3" [(set_attr "type" "veccomplex")]) +(define_expand "fmav8hi4" + [(use (match_operand:V8HI 0 "register_operand" "")) + (use (match_operand:V8HI 1 "register_operand" "")) + (use (match_operand:V8HI 2 "register_operand" "")) + (use (match_operand:V8HI 3 "register_operand" ""))] + "TARGET_ALTIVEC" +{ + emit_insn (gen_altivec_vmladduhm (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + (define_expand "altivec_vmrghb" [(use (match_operand:V16QI 0 "register_operand" "")) (use (match_operand:V16QI 1 "register_operand" "")) (use (match_operand:V16QI 2 "register_operand" ""))] "TARGET_ALTIVEC" diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 4837e14..1cd4278 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16606,10 +16606,25 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) build_int_cst (arg2_type, 0)), arg0); gimple_set_location (g, loc); gsi_replace (gsi, g, true); return true; } + /* vec_madd (Float) */ + case ALTIVEC_BUILTIN_VMADDFP: + case VSX_BUILTIN_XVMADDDP: + case ALTIVEC_BUILTIN_VMLADDUHM: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + tree arg2 = gimple_call_arg (stmt, 2); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, FMA_EXPR , arg0, arg1, arg2); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + default: if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", fn_code, fn_name1, fn_name2); break;
Hi Will, On Thu, Oct 26, 2017 at 05:13:38PM -0500, Will Schmidt wrote: > * config/rs6000/rs6000.c: (rs6000_gimple_fold_builtin) Add support for > gimple folding of vec_madd() intrinsics. The colon goes after the closing parenthesis, and continuation lines should not have extra indent. I.e. like: * config/rs6000/rs6000.c: (rs6000_gimple_fold_builtin) Add support for gimple folding of vec_madd() intrinsics. > * config/rs6000/altivec.md: Add define_expand fmav8hi4 * config/rs6000/altivec.md (fmav8hi4): New define_expand. > +(define_expand "fmav8hi4" > + [(use (match_operand:V8HI 0 "register_operand" "")) > + (use (match_operand:V8HI 1 "register_operand" "")) > + (use (match_operand:V8HI 2 "register_operand" "")) > + (use (match_operand:V8HI 3 "register_operand" ""))] > + "TARGET_ALTIVEC" > +{ > + emit_insn (gen_altivec_vmladduhm (operands[0], operands[1], > + operands[2], operands[3])); > + DONE; > +}) Just leave out the default, empty constraint strings please. Maybe the altivec_vmladduhm pattern should just be renamed? Or this expander moved to there, at least. > + /* vec_madd (Float) */ The "Float" part here is misleading now. > + case ALTIVEC_BUILTIN_VMADDFP: > + case VSX_BUILTIN_XVMADDDP: > + case ALTIVEC_BUILTIN_VMLADDUHM: > + { > + arg0 = gimple_call_arg (stmt, 0); > + arg1 = gimple_call_arg (stmt, 1); > + tree arg2 = gimple_call_arg (stmt, 2); > + lhs = gimple_call_lhs (stmt); > + gimple *g = gimple_build_assign (lhs, FMA_EXPR , arg0, arg1, arg2); > + gimple_set_location (g, gimple_location (stmt)); > + gsi_replace (gsi, g, true); > + return true; > + } Okay for trunk with that fixed. Thanks! Segher
Hi, V3. :-) [PATCH, rs6000] (v2) Gimple folding for vec_madd() Add support for gimple folding of the vec_madd() (vector multiply-add) intrinsics. Renamed the define_insn of altivec_vmladduhm to fmav8hi4, Refreshed the caller of gen_altivec_vmladduhm to call gen_fmav8hi, and updated the rs6000-builtin.def entry for VMLADDUHM to point to the new name. With this refresh I am no longer adding a define_expand. Plus a few cosmetic tweaks per feedback. Testcase coverage is provided by the existing tests as gcc.target/powerpc/fold-vec-madd-*.c Sniff-tests passed. Regtests will be kicked off shortly. OK for trunk? Thanks, -Will [gcc] 2017-10-27 Will Schmidt <will_schmidt@vnet.ibm.com> * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for gimple folding of vec_madd() intrinsics. * config/rs6000/altivec.md: Rename altivec_vmladduhm to fmav8hi4 * config/rs6000/rs6000-builtin.def: Rename vmladduhm to fmav8hi4 diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 6ea529b..b2f173d 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -793,15 +793,16 @@ "TARGET_ALTIVEC" { rtx zero = gen_reg_rtx (V8HImode); emit_insn (gen_altivec_vspltish (zero, const0_rtx)); - emit_insn (gen_altivec_vmladduhm(operands[0], operands[1], operands[2], zero)); + emit_insn (gen_fmav8hi4 (operands[0], operands[1], operands[2], zero)); DONE; }) + ;; Fused multiply subtract (define_insn "*altivec_vnmsubfp" [(set (match_operand:V4SF 0 "register_operand" "=v") (neg:V4SF (fma:V4SF (match_operand:V4SF 1 "register_operand" "v") @@ -934,11 +935,11 @@ (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))] "TARGET_ALTIVEC" "vmhraddshs %0,%1,%2,%3" [(set_attr "type" "veccomplex")]) -(define_insn "altivec_vmladduhm" +(define_insn "fmav8hi4" [(set (match_operand:V8HI 0 "register_operand" "=v") (plus:V8HI (mult:V8HI (match_operand:V8HI 1 "register_operand" "v") (match_operand:V8HI 2 "register_operand" "v")) (match_operand:V8HI 3 "register_operand" "v")))] "TARGET_ALTIVEC" diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index ac9ddae..7834bef 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -959,11 +959,11 @@ BU_SPECIAL_X (RS6000_BUILTIN_NONE, NULL, 0, RS6000_BTC_MISC) /* 3 argument Altivec builtins. */ BU_ALTIVEC_3 (VMADDFP, "vmaddfp", FP, fmav4sf4) BU_ALTIVEC_3 (VMHADDSHS, "vmhaddshs", SAT, altivec_vmhaddshs) BU_ALTIVEC_3 (VMHRADDSHS, "vmhraddshs", SAT, altivec_vmhraddshs) -BU_ALTIVEC_3 (VMLADDUHM, "vmladduhm", CONST, altivec_vmladduhm) +BU_ALTIVEC_3 (VMLADDUHM, "vmladduhm", CONST, fmav8hi4) BU_ALTIVEC_3 (VMSUMUBM, "vmsumubm", CONST, altivec_vmsumubm) BU_ALTIVEC_3 (VMSUMMBM, "vmsummbm", CONST, altivec_vmsummbm) BU_ALTIVEC_3 (VMSUMUHM, "vmsumuhm", CONST, altivec_vmsumuhm) BU_ALTIVEC_3 (VMSUMSHM, "vmsumshm", CONST, altivec_vmsumshm) BU_ALTIVEC_3 (VMSUMUHS, "vmsumuhs", SAT, altivec_vmsumuhs) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 4837e14..aef34b7 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16606,10 +16606,26 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) build_int_cst (arg2_type, 0)), arg0); gimple_set_location (g, loc); gsi_replace (gsi, g, true); return true; } + + /* Vector Fused multiply-add (fma). */ + case ALTIVEC_BUILTIN_VMADDFP: + case VSX_BUILTIN_XVMADDDP: + case ALTIVEC_BUILTIN_VMLADDUHM: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + tree arg2 = gimple_call_arg (stmt, 2); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, FMA_EXPR , arg0, arg1, arg2); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + default: if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", fn_code, fn_name1, fn_name2); break;
On Fri, Oct 27, 2017 at 11:51 AM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote: > Hi, > V3. :-) > > [PATCH, rs6000] (v2) Gimple folding for vec_madd() > > Add support for gimple folding of the vec_madd() (vector multiply-add) > intrinsics. > Renamed the define_insn of altivec_vmladduhm to fmav8hi4, Refreshed the > caller of gen_altivec_vmladduhm to call gen_fmav8hi, and updated the > rs6000-builtin.def entry for VMLADDUHM to point to the new name. > With this refresh I am no longer adding a define_expand. > Plus a few cosmetic tweaks per feedback. > > Testcase coverage is provided by the existing tests as > gcc.target/powerpc/fold-vec-madd-*.c > > Sniff-tests passed. Regtests will be kicked off shortly. OK for trunk? > > Thanks, > -Will > > [gcc] Thanks for spinning the patch again without the define_expand. The altivec.md ChangeLog entry should be more explicit for each change. > > 2017-10-27 Will Schmidt <will_schmidt@vnet.ibm.com> > > * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for > gimple folding of vec_madd() intrinsics. > * config/rs6000/altivec.md: Rename altivec_vmladduhm to fmav8hi4 * config/rs6000/altivec.md (mulv8hi3): Rename altivec_vmladduhm to fmav8hi4. (altivec_vmladduhm): Rename to fmav8hi4. > * config/rs6000/rs6000-builtin.def: Rename vmladduhm to fmav8hi4 Thanks, David
On Fri, Oct 27, 2017 at 12:51:56PM -0400, David Edelsohn wrote: > On Fri, Oct 27, 2017 at 11:51 AM, Will Schmidt > <will_schmidt@vnet.ibm.com> wrote: > > [PATCH, rs6000] (v2) Gimple folding for vec_madd() > > > > Add support for gimple folding of the vec_madd() (vector multiply-add) > > intrinsics. > > Renamed the define_insn of altivec_vmladduhm to fmav8hi4, Refreshed the > > caller of gen_altivec_vmladduhm to call gen_fmav8hi, and updated the > > rs6000-builtin.def entry for VMLADDUHM to point to the new name. > > With this refresh I am no longer adding a define_expand. > > Plus a few cosmetic tweaks per feedback. > > > > Testcase coverage is provided by the existing tests as > > gcc.target/powerpc/fold-vec-madd-*.c > > > > Sniff-tests passed. Regtests will be kicked off shortly. OK for trunk? Okay, with David's comments taken care of. Thanks! Segher
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 4837e14..04c2b15 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16606,10 +16606,43 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) build_int_cst (arg2_type, 0)), arg0); gimple_set_location (g, loc); gsi_replace (gsi, g, true); return true; } + + /* vec_madd (Float) */ + case ALTIVEC_BUILTIN_VMADDFP: + case VSX_BUILTIN_XVMADDDP: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + tree arg2 = gimple_call_arg (stmt, 2); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, FMA_EXPR , arg0, arg1, arg2); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* vec_madd (Integral) */ + case ALTIVEC_BUILTIN_VMLADDUHM: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + tree arg2 = gimple_call_arg (stmt, 2); + lhs = gimple_call_lhs (stmt); + tree lhs_type = TREE_TYPE (lhs); + location_t loc = gimple_location (stmt); + gimple_seq stmts = NULL; + tree mult_result = gimple_build (&stmts, loc, MULT_EXPR, + lhs_type, arg0, arg1); + tree plus_result = gimple_build (&stmts, loc, PLUS_EXPR, + lhs_type, mult_result, arg2); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + update_call_from_tree (gsi, plus_result); + return true; + } + default: if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", fn_code, fn_name1, fn_name2); break;