Message ID | 20221124094148.125303-1-guojiufu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [V2] Update block move for struct param or returns | expand |
Based on the discussions in previous mails: https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607139.html https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607197.html I will update the patch accordingly, and then submit a new version. BR, Jeff (Jiufu) Jiufu Guo <guojiufu@linux.ibm.com> writes: > Hi, > > When assigning a parameter to a variable, or assigning a variable to > return value with struct type, "block move" are used to expand > the assignment. It would be better to use the register mode according > to the target/ABI to move the blocks. And then this would raise more > opportunities for other optimization passes(cse/dse/xprop). > > As the example code (like code in PR65421): > > typedef struct SA {double a[3];} A; > A ret_arg_pt (A *a){return *a;} // on ppc64le, only 3 lfd(s) > A ret_arg (A a) {return a;} // just empty fun body > void st_arg (A a, A *p) {*p = a;} //only 3 stfd(s) > > This patch is based on the previous version which supports assignments > from parameter: > https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605709.html > This patch also supports returns. > > I also tried to update gimplify/nrv to replace "return D.xxx;" with > "return <retval>;". While there is one issue: "<retval>" with > PARALLEL code can not be accessed through address/component_ref. > This issue blocks a few passes (e.g. sra, expand). > > On ppc64, some dead stores are not eliminated. e.g. for ret_arg: > .cfi_startproc > std 4,56(1)//reductant > std 5,64(1)//reductant > std 6,72(1)//reductant > std 4,0(3) > std 5,8(3) > std 6,16(3) > blr > > Bootstraped and regtested on ppc64le and x86_64. > > I'm wondering if this patch could be committed first. > Thanks for the comments and suggestions. > > > BR, > Jeff (Jiufu) > > PR target/65421 > > gcc/ChangeLog: > > * cfgexpand.cc (expand_used_vars): Add collecting return VARs. > (expand_gimple_stmt_1): Call expand_special_struct_assignment. > (pass_expand::execute): Free collections of return VARs. > * expr.cc (expand_special_struct_assignment): New function. > * expr.h (expand_special_struct_assignment): Declare. > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/pr65421-1.c: New test. > * gcc.target/powerpc/pr65421.c: New test. > > --- > gcc/cfgexpand.cc | 37 +++++++++++++++++ > gcc/expr.cc | 43 ++++++++++++++++++++ > gcc/expr.h | 3 ++ > gcc/testsuite/gcc.target/powerpc/pr65421-1.c | 21 ++++++++++ > gcc/testsuite/gcc.target/powerpc/pr65421.c | 19 +++++++++ > 5 files changed, 123 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-1.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421.c > > diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc > index dd29ffffc03..f185de39341 100644 > --- a/gcc/cfgexpand.cc > +++ b/gcc/cfgexpand.cc > @@ -341,6 +341,9 @@ static hash_map<tree, size_t> *decl_to_stack_part; > all of them in one big sweep. */ > static bitmap_obstack stack_var_bitmap_obstack; > > +/* Those VARs on returns. */ > +static bitmap return_vars; > + > /* An array of indices such that stack_vars[stack_vars_sorted[i]].size > is non-decreasing. */ > static size_t *stack_vars_sorted; > @@ -2158,6 +2161,24 @@ expand_used_vars (bitmap forced_stack_vars) > frame_phase = off ? align - off : 0; > } > > + /* Collect VARs on returns. */ > + return_vars = NULL; > + if (DECL_RESULT (current_function_decl) > + && TYPE_MODE (TREE_TYPE (DECL_RESULT (current_function_decl))) == BLKmode) > + { > + return_vars = BITMAP_ALLOC (NULL); > + > + edge_iterator ei; > + edge e; > + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > + if (greturn *ret = safe_dyn_cast<greturn *> (last_stmt (e->src))) > + { > + tree val = gimple_return_retval (ret); > + if (val && VAR_P (val)) > + bitmap_set_bit (return_vars, DECL_UID (val)); > + } > + } > + > /* Set TREE_USED on all variables in the local_decls. */ > FOR_EACH_LOCAL_DECL (cfun, i, var) > TREE_USED (var) = 1; > @@ -3942,6 +3963,17 @@ expand_gimple_stmt_1 (gimple *stmt) > /* This is a clobber to mark the going out of scope for > this LHS. */ > expand_clobber (lhs); > + else if ((TREE_CODE (rhs) == PARM_DECL && DECL_INCOMING_RTL (rhs) > + && TYPE_MODE (TREE_TYPE (rhs)) == BLKmode > + && (GET_CODE (DECL_INCOMING_RTL (rhs)) == PARALLEL > + || REG_P (DECL_INCOMING_RTL (rhs)))) > + || (VAR_P (lhs) && return_vars > + && DECL_RTL_SET_P (DECL_RESULT (current_function_decl)) > + && GET_CODE ( > + DECL_RTL (DECL_RESULT (current_function_decl))) > + == PARALLEL > + && bitmap_bit_p (return_vars, DECL_UID (lhs)))) > + expand_special_struct_assignment (lhs, rhs); > else > expand_assignment (lhs, rhs, > gimple_assign_nontemporal_move_p ( > @@ -7025,6 +7057,11 @@ pass_expand::execute (function *fun) > /* After expanding, the return labels are no longer needed. */ > return_label = NULL; > naked_return_label = NULL; > + if (return_vars) > + { > + BITMAP_FREE (return_vars); > + return_vars = NULL; > + } > > /* After expanding, the tm_restart map is no longer needed. */ > if (fun->gimple_df->tm_restart) > diff --git a/gcc/expr.cc b/gcc/expr.cc > index d9407432ea5..6ffd9439188 100644 > --- a/gcc/expr.cc > +++ b/gcc/expr.cc > @@ -5559,6 +5559,49 @@ mem_ref_refers_to_non_mem_p (tree ref) > return non_mem_decl_p (base); > } > > +/* Expand the assignment from parameter or to returns if it needs > + "block move" on struct type. */ > + > +void > +expand_special_struct_assignment (tree to, tree from) > +{ > + rtx result; > + > + push_temp_slots (); > + rtx par_ret = TREE_CODE (from) == PARM_DECL > + ? DECL_INCOMING_RTL (from) > + : DECL_RTL (DECL_RESULT (current_function_decl)); > + machine_mode mode = GET_CODE (par_ret) == PARALLEL > + ? GET_MODE (XEXP (XVECEXP (par_ret, 0, 0), 0)) > + : word_mode; > + int mode_size = GET_MODE_SIZE (mode).to_constant (); > + int size = INTVAL (expr_size (from)); > + rtx to_rtx = expand_expr (to, NULL_RTX, VOIDmode, EXPAND_WRITE); > + > + /* Here using a heurisitic number for how many words may pass via gprs. */ > + int hurstc_num = 8; > + if (size < mode_size || (size % mode_size) != 0 > + || (GET_CODE (par_ret) != PARALLEL && size > (mode_size * hurstc_num))) > + result = store_expr (from, to_rtx, 0, false, false); > + else > + { > + rtx from_rtx > + = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL); > + for (int i = 0; i < size / mode_size; i++) > + { > + rtx temp = gen_reg_rtx (mode); > + rtx src = adjust_address (from_rtx, mode, mode_size * i); > + rtx dest = adjust_address (to_rtx, mode, mode_size * i); > + emit_move_insn (temp, src); > + emit_move_insn (dest, temp); > + } > + result = to_rtx; > + } > + > + preserve_temp_slots (result); > + pop_temp_slots (); > +} > + > /* Expand an assignment that stores the value of FROM into TO. If NONTEMPORAL > is true, try generating a nontemporal store. */ > > diff --git a/gcc/expr.h b/gcc/expr.h > index 08b59b8d869..10527f23a56 100644 > --- a/gcc/expr.h > +++ b/gcc/expr.h > @@ -281,6 +281,9 @@ extern void get_bit_range (poly_uint64_pod *, poly_uint64_pod *, tree, > /* Expand an assignment that stores the value of FROM into TO. */ > extern void expand_assignment (tree, tree, bool); > > +/* Expand an assignment from parameters or to returns. */ > +extern void expand_special_struct_assignment (tree, tree); > + > /* Generate code for computing expression EXP, > and storing the value into TARGET. > If SUGGEST_REG is nonzero, copy the value through a register > diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421-1.c b/gcc/testsuite/gcc.target/powerpc/pr65421-1.c > new file mode 100644 > index 00000000000..f55a0fe0002 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr65421-1.c > @@ -0,0 +1,21 @@ > +/* PR target/65421 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -m64" } */ > + > +typedef struct SA > +{ > + double a[3]; > + long l; > +} A; > + > +A ret_arg_pt (A *a){return *a;} > + > +A ret_arg (A a) {return a;} > + > +void st_arg (A a, A *p) {*p = a;} > + > +/* { dg-final { scan-assembler-times {\mlxvd2x\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mstxvd2x\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mstd\M} 8 } } */ > +/* { dg-final { scan-assembler-times {\mblr\M} 3 } } */ > +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 16 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421.c b/gcc/testsuite/gcc.target/powerpc/pr65421.c > new file mode 100644 > index 00000000000..26e85468470 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr65421.c > @@ -0,0 +1,19 @@ > +/* PR target/65421 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -m64" } */ > + > +typedef struct SA > +{ > + double a[3]; > +} A; > + > +A ret_arg_pt (A *a){return *a;} > + > +A ret_arg (A a) {return a;} > + > +void st_arg (A a, A *p) {*p = a;} > + > +/* { dg-final { scan-assembler-times {\mlfd\M} 3 } } */ > +/* { dg-final { scan-assembler-times {\mstfd\M} 3 } } */ > +/* { dg-final { scan-assembler-times {\mblr\M} 3 } } */ > +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 9 } } */
diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index dd29ffffc03..f185de39341 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -341,6 +341,9 @@ static hash_map<tree, size_t> *decl_to_stack_part; all of them in one big sweep. */ static bitmap_obstack stack_var_bitmap_obstack; +/* Those VARs on returns. */ +static bitmap return_vars; + /* An array of indices such that stack_vars[stack_vars_sorted[i]].size is non-decreasing. */ static size_t *stack_vars_sorted; @@ -2158,6 +2161,24 @@ expand_used_vars (bitmap forced_stack_vars) frame_phase = off ? align - off : 0; } + /* Collect VARs on returns. */ + return_vars = NULL; + if (DECL_RESULT (current_function_decl) + && TYPE_MODE (TREE_TYPE (DECL_RESULT (current_function_decl))) == BLKmode) + { + return_vars = BITMAP_ALLOC (NULL); + + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + if (greturn *ret = safe_dyn_cast<greturn *> (last_stmt (e->src))) + { + tree val = gimple_return_retval (ret); + if (val && VAR_P (val)) + bitmap_set_bit (return_vars, DECL_UID (val)); + } + } + /* Set TREE_USED on all variables in the local_decls. */ FOR_EACH_LOCAL_DECL (cfun, i, var) TREE_USED (var) = 1; @@ -3942,6 +3963,17 @@ expand_gimple_stmt_1 (gimple *stmt) /* This is a clobber to mark the going out of scope for this LHS. */ expand_clobber (lhs); + else if ((TREE_CODE (rhs) == PARM_DECL && DECL_INCOMING_RTL (rhs) + && TYPE_MODE (TREE_TYPE (rhs)) == BLKmode + && (GET_CODE (DECL_INCOMING_RTL (rhs)) == PARALLEL + || REG_P (DECL_INCOMING_RTL (rhs)))) + || (VAR_P (lhs) && return_vars + && DECL_RTL_SET_P (DECL_RESULT (current_function_decl)) + && GET_CODE ( + DECL_RTL (DECL_RESULT (current_function_decl))) + == PARALLEL + && bitmap_bit_p (return_vars, DECL_UID (lhs)))) + expand_special_struct_assignment (lhs, rhs); else expand_assignment (lhs, rhs, gimple_assign_nontemporal_move_p ( @@ -7025,6 +7057,11 @@ pass_expand::execute (function *fun) /* After expanding, the return labels are no longer needed. */ return_label = NULL; naked_return_label = NULL; + if (return_vars) + { + BITMAP_FREE (return_vars); + return_vars = NULL; + } /* After expanding, the tm_restart map is no longer needed. */ if (fun->gimple_df->tm_restart) diff --git a/gcc/expr.cc b/gcc/expr.cc index d9407432ea5..6ffd9439188 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -5559,6 +5559,49 @@ mem_ref_refers_to_non_mem_p (tree ref) return non_mem_decl_p (base); } +/* Expand the assignment from parameter or to returns if it needs + "block move" on struct type. */ + +void +expand_special_struct_assignment (tree to, tree from) +{ + rtx result; + + push_temp_slots (); + rtx par_ret = TREE_CODE (from) == PARM_DECL + ? DECL_INCOMING_RTL (from) + : DECL_RTL (DECL_RESULT (current_function_decl)); + machine_mode mode = GET_CODE (par_ret) == PARALLEL + ? GET_MODE (XEXP (XVECEXP (par_ret, 0, 0), 0)) + : word_mode; + int mode_size = GET_MODE_SIZE (mode).to_constant (); + int size = INTVAL (expr_size (from)); + rtx to_rtx = expand_expr (to, NULL_RTX, VOIDmode, EXPAND_WRITE); + + /* Here using a heurisitic number for how many words may pass via gprs. */ + int hurstc_num = 8; + if (size < mode_size || (size % mode_size) != 0 + || (GET_CODE (par_ret) != PARALLEL && size > (mode_size * hurstc_num))) + result = store_expr (from, to_rtx, 0, false, false); + else + { + rtx from_rtx + = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL); + for (int i = 0; i < size / mode_size; i++) + { + rtx temp = gen_reg_rtx (mode); + rtx src = adjust_address (from_rtx, mode, mode_size * i); + rtx dest = adjust_address (to_rtx, mode, mode_size * i); + emit_move_insn (temp, src); + emit_move_insn (dest, temp); + } + result = to_rtx; + } + + preserve_temp_slots (result); + pop_temp_slots (); +} + /* Expand an assignment that stores the value of FROM into TO. If NONTEMPORAL is true, try generating a nontemporal store. */ diff --git a/gcc/expr.h b/gcc/expr.h index 08b59b8d869..10527f23a56 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -281,6 +281,9 @@ extern void get_bit_range (poly_uint64_pod *, poly_uint64_pod *, tree, /* Expand an assignment that stores the value of FROM into TO. */ extern void expand_assignment (tree, tree, bool); +/* Expand an assignment from parameters or to returns. */ +extern void expand_special_struct_assignment (tree, tree); + /* Generate code for computing expression EXP, and storing the value into TARGET. If SUGGEST_REG is nonzero, copy the value through a register diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421-1.c b/gcc/testsuite/gcc.target/powerpc/pr65421-1.c new file mode 100644 index 00000000000..f55a0fe0002 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr65421-1.c @@ -0,0 +1,21 @@ +/* PR target/65421 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -m64" } */ + +typedef struct SA +{ + double a[3]; + long l; +} A; + +A ret_arg_pt (A *a){return *a;} + +A ret_arg (A a) {return a;} + +void st_arg (A a, A *p) {*p = a;} + +/* { dg-final { scan-assembler-times {\mlxvd2x\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstd\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mblr\M} 3 } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 16 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421.c b/gcc/testsuite/gcc.target/powerpc/pr65421.c new file mode 100644 index 00000000000..26e85468470 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr65421.c @@ -0,0 +1,19 @@ +/* PR target/65421 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -m64" } */ + +typedef struct SA +{ + double a[3]; +} A; + +A ret_arg_pt (A *a){return *a;} + +A ret_arg (A a) {return a;} + +void st_arg (A a, A *p) {*p = a;} + +/* { dg-final { scan-assembler-times {\mlfd\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mstfd\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mblr\M} 3 } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 9 } } */