Message ID | 992fc509-51d6-3dbd-ee9d-393378227fc5@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [PATCH-1v4,expand] Enable vector mode for compare_by_pieces [PR111449] | expand |
HAO CHEN GUI <guihaoc@linux.ibm.com> writes: > Hi, > Vector mode instructions are efficient for compare on some targets. > This patch enables vector mode for compare_by_pieces. Two help > functions are added to check if vector mode is available for certain > by pieces operations and if if optabs exists for the mode and certain > by pieces operations. One member is added in class op_by_pieces_d to > record the type of operations. > > The test case is in the second patch which is rs6000 specific. > > Compared to last version, the main change is to add a target hook > check - scalar_mode_supported_p when retrieving the available scalar > modes. The mode which is not supported for a target should be skipped. > (e.g. TImode on ppc). Also some function names and comments are refined > according to reviewer's advice. > > Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no > regressions. > > Thanks > Gui Haochen > > ChangeLog > Expand: Enable vector mode for by pieces compares > > Vector mode compare instructions are efficient for equality compare on > rs6000. This patch refactors the codes of by pieces operation to enable > vector mode for compare. > > gcc/ > PR target/111449 > * expr.cc (can_use_qi_vectors): New function to return true if > we know how to implement OP using vectors of bytes. > (qi_vector_mode_supported_p): New function to check if optabs > exists for the mode and certain by pieces operations. > (widest_fixed_size_mode_for_size): Replace the second argument > with the type of by pieces operations. Call can_use_qi_vectors > and qi_vector_mode_supported_p to do the check. Call > scalar_mode_supported_p to check if the scalar mode is supported. > (by_pieces_ninsns): Pass the type of by pieces operation to > widest_fixed_size_mode_for_size. > (class op_by_pieces_d): Remove m_qi_vector_mode. Add m_op to > record the type of by pieces operations. > (op_by_pieces_d::op_by_pieces_d): Change last argument to the > type of by pieces operations, initialize m_op with it. Pass > m_op to function widest_fixed_size_mode_for_size. > (op_by_pieces_d::get_usable_mode): Pass m_op to function > widest_fixed_size_mode_for_size. > (op_by_pieces_d::smallest_fixed_size_mode_for_size): Call > can_use_qi_vectors and qi_vector_mode_supported_p to do the > check. > (op_by_pieces_d::run): Pass m_op to function > widest_fixed_size_mode_for_size. > (move_by_pieces_d::move_by_pieces_d): Set m_op to MOVE_BY_PIECES. > (store_by_pieces_d::store_by_pieces_d): Set m_op with the op. > (can_store_by_pieces): Pass the type of by pieces operations to > widest_fixed_size_mode_for_size. > (clear_by_pieces): Initialize class store_by_pieces_d with > CLEAR_BY_PIECES. > (compare_by_pieces_d::compare_by_pieces_d): Set m_op to > COMPARE_BY_PIECES. OK, thanks. And thanks for your patience. Richard > patch.diff > diff --git a/gcc/expr.cc b/gcc/expr.cc > index 2c9930ec674..ad5f9dd8ec2 100644 > --- a/gcc/expr.cc > +++ b/gcc/expr.cc > @@ -988,18 +988,44 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align) > return align; > } > > -/* Return the widest QI vector, if QI_MODE is true, or integer mode > - that is narrower than SIZE bytes. */ > +/* Return true if we know how to implement OP using vectors of bytes. */ > +static bool > +can_use_qi_vectors (by_pieces_operation op) > +{ > + return (op == COMPARE_BY_PIECES > + || op == SET_BY_PIECES > + || op == CLEAR_BY_PIECES); > +} > + > +/* Return true if optabs exists for the mode and certain by pieces > + operations. */ > +static bool > +qi_vector_mode_supported_p (fixed_size_mode mode, by_pieces_operation op) > +{ > + if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) > + && optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing) > + return true; > + > + if (op == COMPARE_BY_PIECES > + && optab_handler (mov_optab, mode) != CODE_FOR_nothing > + && can_compare_p (EQ, mode, ccp_jump)) > + return true; > > + return false; > +} > + > +/* Return the widest mode that can be used to perform part of an > + operation OP on SIZE bytes. Try to use QI vector modes where > + possible. */ > static fixed_size_mode > -widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) > +widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op) > { > fixed_size_mode result = NARROWEST_INT_MODE; > > gcc_checking_assert (size > 1); > > /* Use QI vector only if size is wider than a WORD. */ > - if (qi_vector && size > UNITS_PER_WORD) > + if (can_use_qi_vectors (op) && size > UNITS_PER_WORD) > { > machine_mode mode; > fixed_size_mode candidate; > @@ -1009,8 +1035,7 @@ widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) > { > if (GET_MODE_SIZE (candidate) >= size) > break; > - if (optab_handler (vec_duplicate_optab, candidate) > - != CODE_FOR_nothing) > + if (qi_vector_mode_supported_p (candidate, op)) > result = candidate; > } > > @@ -1019,9 +1044,14 @@ widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) > } > > opt_scalar_int_mode tmode; > + scalar_int_mode mode; > FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) > - if (GET_MODE_SIZE (tmode.require ()) < size) > - result = tmode.require (); > + { > + mode = tmode.require (); > + if (GET_MODE_SIZE (mode) < size > + && targetm.scalar_mode_supported_p (mode)) > + result = mode; > + } > > return result; > } > @@ -1061,8 +1091,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align, > { > /* NB: Round up L and ALIGN to the widest integer mode for > MAX_SIZE. */ > - mode = widest_fixed_size_mode_for_size (max_size, > - op == SET_BY_PIECES); > + mode = widest_fixed_size_mode_for_size (max_size, op); > if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) > { > unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode)); > @@ -1076,8 +1105,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align, > > while (max_size > 1 && l > 0) > { > - mode = widest_fixed_size_mode_for_size (max_size, > - op == SET_BY_PIECES); > + mode = widest_fixed_size_mode_for_size (max_size, op); > enum insn_code icode; > > unsigned int modesize = GET_MODE_SIZE (mode); > @@ -1317,8 +1345,8 @@ class op_by_pieces_d > bool m_push; > /* True if targetm.overlap_op_by_pieces_p () returns true. */ > bool m_overlap_op_by_pieces; > - /* True if QI vector mode can be used. */ > - bool m_qi_vector_mode; > + /* The type of operation that we're performing. */ > + by_pieces_operation m_op; > > /* Virtual functions, overriden by derived classes for the specific > operation. */ > @@ -1331,7 +1359,7 @@ class op_by_pieces_d > public: > op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn, > void *, unsigned HOST_WIDE_INT, unsigned int, bool, > - bool = false); > + by_pieces_operation); > void run (); > }; > > @@ -1349,11 +1377,11 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to, > void *from_cfn_data, > unsigned HOST_WIDE_INT len, > unsigned int align, bool push, > - bool qi_vector_mode) > + by_pieces_operation op) > : m_to (to, to_load, NULL, NULL), > m_from (from, from_load, from_cfn, from_cfn_data), > m_len (len), m_max_size (max_pieces + 1), > - m_push (push), m_qi_vector_mode (qi_vector_mode) > + m_push (push), m_op (op) > { > int toi = m_to.get_addr_inc (); > int fromi = m_from.get_addr_inc (); > @@ -1375,8 +1403,7 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to, > { > /* Find the mode of the largest comparison. */ > fixed_size_mode mode > - = widest_fixed_size_mode_for_size (m_max_size, > - m_qi_vector_mode); > + = widest_fixed_size_mode_for_size (m_max_size, m_op); > > m_from.decide_autoinc (mode, m_reverse, len); > m_to.decide_autoinc (mode, m_reverse, len); > @@ -1401,7 +1428,7 @@ op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len) > if (len >= size && prepare_mode (mode, m_align)) > break; > /* widest_fixed_size_mode_for_size checks SIZE > 1. */ > - mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); > + mode = widest_fixed_size_mode_for_size (size, m_op); > } > while (1); > return mode; > @@ -1414,7 +1441,7 @@ fixed_size_mode > op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) > { > /* Use QI vector only for > size of WORD. */ > - if (m_qi_vector_mode && size > UNITS_PER_WORD) > + if (can_use_qi_vectors (m_op) && size > UNITS_PER_WORD) > { > machine_mode mode; > fixed_size_mode candidate; > @@ -1427,8 +1454,7 @@ op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) > break; > > if (GET_MODE_SIZE (candidate) >= size > - && (optab_handler (vec_duplicate_optab, candidate) > - != CODE_FOR_nothing)) > + && qi_vector_mode_supported_p (candidate, m_op)) > return candidate; > } > } > @@ -1451,7 +1477,7 @@ op_by_pieces_d::run () > > /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */ > fixed_size_mode mode > - = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode); > + = widest_fixed_size_mode_for_size (m_max_size, m_op); > mode = get_usable_mode (mode, length); > > by_pieces_prev to_prev = { nullptr, mode }; > @@ -1516,8 +1542,7 @@ op_by_pieces_d::run () > else > { > /* widest_fixed_size_mode_for_size checks SIZE > 1. */ > - mode = widest_fixed_size_mode_for_size (size, > - m_qi_vector_mode); > + mode = widest_fixed_size_mode_for_size (size, m_op); > mode = get_usable_mode (mode, length); > } > } > @@ -1543,7 +1568,7 @@ class move_by_pieces_d : public op_by_pieces_d > move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, > unsigned int align) > : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL, > - NULL, len, align, PUSHG_P (to)) > + NULL, len, align, PUSHG_P (to), MOVE_BY_PIECES) > { > } > rtx finish_retmode (memop_ret); > @@ -1632,15 +1657,16 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len, > class store_by_pieces_d : public op_by_pieces_d > { > insn_gen_fn m_gen_fun; > + > void generate (rtx, rtx, machine_mode) final override; > bool prepare_mode (machine_mode, unsigned int) final override; > > public: > store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, > unsigned HOST_WIDE_INT len, unsigned int align, > - bool qi_vector_mode) > + by_pieces_operation op) > : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn, > - cfn_data, len, align, false, qi_vector_mode) > + cfn_data, len, align, false, op) > { > } > rtx finish_retmode (memop_ret); > @@ -1729,8 +1755,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, > max_size = STORE_MAX_PIECES + 1; > while (max_size > 1 && l > 0) > { > - fixed_size_mode mode > - = widest_fixed_size_mode_for_size (max_size, memsetp); > + auto op = memsetp ? SET_BY_PIECES : STORE_BY_PIECES; > + auto mode = widest_fixed_size_mode_for_size (max_size, op); > > icode = optab_handler (mov_optab, mode); > if (icode != CODE_FOR_nothing > @@ -1793,7 +1819,7 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, > optimize_insn_for_speed_p ())); > > store_by_pieces_d data (to, constfun, constfundata, len, align, > - memsetp); > + memsetp ? SET_BY_PIECES : STORE_BY_PIECES); > data.run (); > > if (retmode != RETURN_BEGIN) > @@ -1814,7 +1840,7 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align) > /* Use builtin_memset_read_str to support vector mode broadcast. */ > char c = 0; > store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align, > - true); > + CLEAR_BY_PIECES); > data.run (); > } > > @@ -1832,12 +1858,13 @@ class compare_by_pieces_d : public op_by_pieces_d > void generate (rtx, rtx, machine_mode) final override; > bool prepare_mode (machine_mode, unsigned int) final override; > void finish_mode (machine_mode) final override; > + > public: > compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, > void *op1_cfn_data, HOST_WIDE_INT len, int align, > rtx_code_label *fail_label) > : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn, > - op1_cfn_data, len, align, false) > + op1_cfn_data, len, align, false, COMPARE_BY_PIECES) > { > m_fail_label = fail_label; > }
Committed as r14-4835. https://gcc.gnu.org/g:f08ca5903c7a02b450b93143467f70b9fd8e0085 Thanks Gui Haochen 在 2023/10/20 16:49, Richard Sandiford 写道: > HAO CHEN GUI <guihaoc@linux.ibm.com> writes: >> Hi, >> Vector mode instructions are efficient for compare on some targets. >> This patch enables vector mode for compare_by_pieces. Two help >> functions are added to check if vector mode is available for certain >> by pieces operations and if if optabs exists for the mode and certain >> by pieces operations. One member is added in class op_by_pieces_d to >> record the type of operations. >> >> The test case is in the second patch which is rs6000 specific. >> >> Compared to last version, the main change is to add a target hook >> check - scalar_mode_supported_p when retrieving the available scalar >> modes. The mode which is not supported for a target should be skipped. >> (e.g. TImode on ppc). Also some function names and comments are refined >> according to reviewer's advice. >> >> Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no >> regressions. >> >> Thanks >> Gui Haochen >> >> ChangeLog >> Expand: Enable vector mode for by pieces compares >> >> Vector mode compare instructions are efficient for equality compare on >> rs6000. This patch refactors the codes of by pieces operation to enable >> vector mode for compare. >> >> gcc/ >> PR target/111449 >> * expr.cc (can_use_qi_vectors): New function to return true if >> we know how to implement OP using vectors of bytes. >> (qi_vector_mode_supported_p): New function to check if optabs >> exists for the mode and certain by pieces operations. >> (widest_fixed_size_mode_for_size): Replace the second argument >> with the type of by pieces operations. Call can_use_qi_vectors >> and qi_vector_mode_supported_p to do the check. Call >> scalar_mode_supported_p to check if the scalar mode is supported. >> (by_pieces_ninsns): Pass the type of by pieces operation to >> widest_fixed_size_mode_for_size. >> (class op_by_pieces_d): Remove m_qi_vector_mode. Add m_op to >> record the type of by pieces operations. >> (op_by_pieces_d::op_by_pieces_d): Change last argument to the >> type of by pieces operations, initialize m_op with it. Pass >> m_op to function widest_fixed_size_mode_for_size. >> (op_by_pieces_d::get_usable_mode): Pass m_op to function >> widest_fixed_size_mode_for_size. >> (op_by_pieces_d::smallest_fixed_size_mode_for_size): Call >> can_use_qi_vectors and qi_vector_mode_supported_p to do the >> check. >> (op_by_pieces_d::run): Pass m_op to function >> widest_fixed_size_mode_for_size. >> (move_by_pieces_d::move_by_pieces_d): Set m_op to MOVE_BY_PIECES. >> (store_by_pieces_d::store_by_pieces_d): Set m_op with the op. >> (can_store_by_pieces): Pass the type of by pieces operations to >> widest_fixed_size_mode_for_size. >> (clear_by_pieces): Initialize class store_by_pieces_d with >> CLEAR_BY_PIECES. >> (compare_by_pieces_d::compare_by_pieces_d): Set m_op to >> COMPARE_BY_PIECES. > > OK, thanks. And thanks for your patience. > > Richard > >> patch.diff >> diff --git a/gcc/expr.cc b/gcc/expr.cc >> index 2c9930ec674..ad5f9dd8ec2 100644 >> --- a/gcc/expr.cc >> +++ b/gcc/expr.cc >> @@ -988,18 +988,44 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align) >> return align; >> } >> >> -/* Return the widest QI vector, if QI_MODE is true, or integer mode >> - that is narrower than SIZE bytes. */ >> +/* Return true if we know how to implement OP using vectors of bytes. */ >> +static bool >> +can_use_qi_vectors (by_pieces_operation op) >> +{ >> + return (op == COMPARE_BY_PIECES >> + || op == SET_BY_PIECES >> + || op == CLEAR_BY_PIECES); >> +} >> + >> +/* Return true if optabs exists for the mode and certain by pieces >> + operations. */ >> +static bool >> +qi_vector_mode_supported_p (fixed_size_mode mode, by_pieces_operation op) >> +{ >> + if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) >> + && optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing) >> + return true; >> + >> + if (op == COMPARE_BY_PIECES >> + && optab_handler (mov_optab, mode) != CODE_FOR_nothing >> + && can_compare_p (EQ, mode, ccp_jump)) >> + return true; >> >> + return false; >> +} >> + >> +/* Return the widest mode that can be used to perform part of an >> + operation OP on SIZE bytes. Try to use QI vector modes where >> + possible. */ >> static fixed_size_mode >> -widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) >> +widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op) >> { >> fixed_size_mode result = NARROWEST_INT_MODE; >> >> gcc_checking_assert (size > 1); >> >> /* Use QI vector only if size is wider than a WORD. */ >> - if (qi_vector && size > UNITS_PER_WORD) >> + if (can_use_qi_vectors (op) && size > UNITS_PER_WORD) >> { >> machine_mode mode; >> fixed_size_mode candidate; >> @@ -1009,8 +1035,7 @@ widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) >> { >> if (GET_MODE_SIZE (candidate) >= size) >> break; >> - if (optab_handler (vec_duplicate_optab, candidate) >> - != CODE_FOR_nothing) >> + if (qi_vector_mode_supported_p (candidate, op)) >> result = candidate; >> } >> >> @@ -1019,9 +1044,14 @@ widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) >> } >> >> opt_scalar_int_mode tmode; >> + scalar_int_mode mode; >> FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) >> - if (GET_MODE_SIZE (tmode.require ()) < size) >> - result = tmode.require (); >> + { >> + mode = tmode.require (); >> + if (GET_MODE_SIZE (mode) < size >> + && targetm.scalar_mode_supported_p (mode)) >> + result = mode; >> + } >> >> return result; >> } >> @@ -1061,8 +1091,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align, >> { >> /* NB: Round up L and ALIGN to the widest integer mode for >> MAX_SIZE. */ >> - mode = widest_fixed_size_mode_for_size (max_size, >> - op == SET_BY_PIECES); >> + mode = widest_fixed_size_mode_for_size (max_size, op); >> if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) >> { >> unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode)); >> @@ -1076,8 +1105,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align, >> >> while (max_size > 1 && l > 0) >> { >> - mode = widest_fixed_size_mode_for_size (max_size, >> - op == SET_BY_PIECES); >> + mode = widest_fixed_size_mode_for_size (max_size, op); >> enum insn_code icode; >> >> unsigned int modesize = GET_MODE_SIZE (mode); >> @@ -1317,8 +1345,8 @@ class op_by_pieces_d >> bool m_push; >> /* True if targetm.overlap_op_by_pieces_p () returns true. */ >> bool m_overlap_op_by_pieces; >> - /* True if QI vector mode can be used. */ >> - bool m_qi_vector_mode; >> + /* The type of operation that we're performing. */ >> + by_pieces_operation m_op; >> >> /* Virtual functions, overriden by derived classes for the specific >> operation. */ >> @@ -1331,7 +1359,7 @@ class op_by_pieces_d >> public: >> op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn, >> void *, unsigned HOST_WIDE_INT, unsigned int, bool, >> - bool = false); >> + by_pieces_operation); >> void run (); >> }; >> >> @@ -1349,11 +1377,11 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to, >> void *from_cfn_data, >> unsigned HOST_WIDE_INT len, >> unsigned int align, bool push, >> - bool qi_vector_mode) >> + by_pieces_operation op) >> : m_to (to, to_load, NULL, NULL), >> m_from (from, from_load, from_cfn, from_cfn_data), >> m_len (len), m_max_size (max_pieces + 1), >> - m_push (push), m_qi_vector_mode (qi_vector_mode) >> + m_push (push), m_op (op) >> { >> int toi = m_to.get_addr_inc (); >> int fromi = m_from.get_addr_inc (); >> @@ -1375,8 +1403,7 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to, >> { >> /* Find the mode of the largest comparison. */ >> fixed_size_mode mode >> - = widest_fixed_size_mode_for_size (m_max_size, >> - m_qi_vector_mode); >> + = widest_fixed_size_mode_for_size (m_max_size, m_op); >> >> m_from.decide_autoinc (mode, m_reverse, len); >> m_to.decide_autoinc (mode, m_reverse, len); >> @@ -1401,7 +1428,7 @@ op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len) >> if (len >= size && prepare_mode (mode, m_align)) >> break; >> /* widest_fixed_size_mode_for_size checks SIZE > 1. */ >> - mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); >> + mode = widest_fixed_size_mode_for_size (size, m_op); >> } >> while (1); >> return mode; >> @@ -1414,7 +1441,7 @@ fixed_size_mode >> op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) >> { >> /* Use QI vector only for > size of WORD. */ >> - if (m_qi_vector_mode && size > UNITS_PER_WORD) >> + if (can_use_qi_vectors (m_op) && size > UNITS_PER_WORD) >> { >> machine_mode mode; >> fixed_size_mode candidate; >> @@ -1427,8 +1454,7 @@ op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) >> break; >> >> if (GET_MODE_SIZE (candidate) >= size >> - && (optab_handler (vec_duplicate_optab, candidate) >> - != CODE_FOR_nothing)) >> + && qi_vector_mode_supported_p (candidate, m_op)) >> return candidate; >> } >> } >> @@ -1451,7 +1477,7 @@ op_by_pieces_d::run () >> >> /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */ >> fixed_size_mode mode >> - = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode); >> + = widest_fixed_size_mode_for_size (m_max_size, m_op); >> mode = get_usable_mode (mode, length); >> >> by_pieces_prev to_prev = { nullptr, mode }; >> @@ -1516,8 +1542,7 @@ op_by_pieces_d::run () >> else >> { >> /* widest_fixed_size_mode_for_size checks SIZE > 1. */ >> - mode = widest_fixed_size_mode_for_size (size, >> - m_qi_vector_mode); >> + mode = widest_fixed_size_mode_for_size (size, m_op); >> mode = get_usable_mode (mode, length); >> } >> } >> @@ -1543,7 +1568,7 @@ class move_by_pieces_d : public op_by_pieces_d >> move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, >> unsigned int align) >> : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL, >> - NULL, len, align, PUSHG_P (to)) >> + NULL, len, align, PUSHG_P (to), MOVE_BY_PIECES) >> { >> } >> rtx finish_retmode (memop_ret); >> @@ -1632,15 +1657,16 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len, >> class store_by_pieces_d : public op_by_pieces_d >> { >> insn_gen_fn m_gen_fun; >> + >> void generate (rtx, rtx, machine_mode) final override; >> bool prepare_mode (machine_mode, unsigned int) final override; >> >> public: >> store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, >> unsigned HOST_WIDE_INT len, unsigned int align, >> - bool qi_vector_mode) >> + by_pieces_operation op) >> : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn, >> - cfn_data, len, align, false, qi_vector_mode) >> + cfn_data, len, align, false, op) >> { >> } >> rtx finish_retmode (memop_ret); >> @@ -1729,8 +1755,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, >> max_size = STORE_MAX_PIECES + 1; >> while (max_size > 1 && l > 0) >> { >> - fixed_size_mode mode >> - = widest_fixed_size_mode_for_size (max_size, memsetp); >> + auto op = memsetp ? SET_BY_PIECES : STORE_BY_PIECES; >> + auto mode = widest_fixed_size_mode_for_size (max_size, op); >> >> icode = optab_handler (mov_optab, mode); >> if (icode != CODE_FOR_nothing >> @@ -1793,7 +1819,7 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, >> optimize_insn_for_speed_p ())); >> >> store_by_pieces_d data (to, constfun, constfundata, len, align, >> - memsetp); >> + memsetp ? SET_BY_PIECES : STORE_BY_PIECES); >> data.run (); >> >> if (retmode != RETURN_BEGIN) >> @@ -1814,7 +1840,7 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align) >> /* Use builtin_memset_read_str to support vector mode broadcast. */ >> char c = 0; >> store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align, >> - true); >> + CLEAR_BY_PIECES); >> data.run (); >> } >> >> @@ -1832,12 +1858,13 @@ class compare_by_pieces_d : public op_by_pieces_d >> void generate (rtx, rtx, machine_mode) final override; >> bool prepare_mode (machine_mode, unsigned int) final override; >> void finish_mode (machine_mode) final override; >> + >> public: >> compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, >> void *op1_cfn_data, HOST_WIDE_INT len, int align, >> rtx_code_label *fail_label) >> : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn, >> - op1_cfn_data, len, align, false) >> + op1_cfn_data, len, align, false, COMPARE_BY_PIECES) >> { >> m_fail_label = fail_label; >> }
Hi Haochen Gui, It seems that the commit caused lots of test case fail on x86 platforms: https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078379.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078380.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078381.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078382.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078383.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078384.html Please help verify that if we need some testcase change or we get bug here. A simple reproducer under build folder is: make check RUNTESTFLAGS="i386.exp=g++.target/i386/pr80566-2.C --target_board='unix{-m64\ -march=cascadelake,-m32\ -march=cascadelake,-m32,-m64}'" Thx, Haochen > -----Original Message----- > From: HAO CHEN GUI <guihaoc@linux.ibm.com> > Sent: Monday, October 23, 2023 9:30 AM > To: Richard Sandiford <richard.sandiford@arm.com> > Cc: gcc-patches <gcc-patches@gcc.gnu.org> > Subject: Re: [PATCH-1v4, expand] Enable vector mode for compare_by_pieces > [PR111449] > > Committed as r14-4835. > > https://gcc.gnu.org/g:f08ca5903c7a02b450b93143467f70b9fd8e0085 > > Thanks > Gui Haochen > > 在 2023/10/20 16:49, Richard Sandiford 写道: > > HAO CHEN GUI <guihaoc@linux.ibm.com> writes: > >> Hi, > >> Vector mode instructions are efficient for compare on some targets. > >> This patch enables vector mode for compare_by_pieces. Two help > >> functions are added to check if vector mode is available for certain > >> by pieces operations and if if optabs exists for the mode and certain > >> by pieces operations. One member is added in class op_by_pieces_d to > >> record the type of operations. > >> > >> The test case is in the second patch which is rs6000 specific. > >> > >> Compared to last version, the main change is to add a target hook > >> check - scalar_mode_supported_p when retrieving the available scalar > >> modes. The mode which is not supported for a target should be skipped. > >> (e.g. TImode on ppc). Also some function names and comments are refined > >> according to reviewer's advice. > >> > >> Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no > >> regressions. > >> > >> Thanks > >> Gui Haochen > >> > >> ChangeLog > >> Expand: Enable vector mode for by pieces compares > >> > >> Vector mode compare instructions are efficient for equality compare on > >> rs6000. This patch refactors the codes of by pieces operation to enable > >> vector mode for compare. > >> > >> gcc/ > >> PR target/111449 > >> * expr.cc (can_use_qi_vectors): New function to return true if > >> we know how to implement OP using vectors of bytes. > >> (qi_vector_mode_supported_p): New function to check if optabs > >> exists for the mode and certain by pieces operations. > >> (widest_fixed_size_mode_for_size): Replace the second argument > >> with the type of by pieces operations. Call can_use_qi_vectors > >> and qi_vector_mode_supported_p to do the check. Call > >> scalar_mode_supported_p to check if the scalar mode is supported. > >> (by_pieces_ninsns): Pass the type of by pieces operation to > >> widest_fixed_size_mode_for_size. > >> (class op_by_pieces_d): Remove m_qi_vector_mode. Add m_op to > >> record the type of by pieces operations. > >> (op_by_pieces_d::op_by_pieces_d): Change last argument to the > >> type of by pieces operations, initialize m_op with it. Pass > >> m_op to function widest_fixed_size_mode_for_size. > >> (op_by_pieces_d::get_usable_mode): Pass m_op to function > >> widest_fixed_size_mode_for_size. > >> (op_by_pieces_d::smallest_fixed_size_mode_for_size): Call > >> can_use_qi_vectors and qi_vector_mode_supported_p to do the > >> check. > >> (op_by_pieces_d::run): Pass m_op to function > >> widest_fixed_size_mode_for_size. > >> (move_by_pieces_d::move_by_pieces_d): Set m_op to > MOVE_BY_PIECES. > >> (store_by_pieces_d::store_by_pieces_d): Set m_op with the op. > >> (can_store_by_pieces): Pass the type of by pieces operations to > >> widest_fixed_size_mode_for_size. > >> (clear_by_pieces): Initialize class store_by_pieces_d with > >> CLEAR_BY_PIECES. > >> (compare_by_pieces_d::compare_by_pieces_d): Set m_op to > >> COMPARE_BY_PIECES. > > > > OK, thanks. And thanks for your patience. > > > > Richard > > > >> patch.diff > >> diff --git a/gcc/expr.cc b/gcc/expr.cc > >> index 2c9930ec674..ad5f9dd8ec2 100644 > >> --- a/gcc/expr.cc > >> +++ b/gcc/expr.cc > >> @@ -988,18 +988,44 @@ alignment_for_piecewise_move (unsigned int > max_pieces, unsigned int align) > >> return align; > >> } > >> > >> -/* Return the widest QI vector, if QI_MODE is true, or integer mode > >> - that is narrower than SIZE bytes. */ > >> +/* Return true if we know how to implement OP using vectors of bytes. */ > >> +static bool > >> +can_use_qi_vectors (by_pieces_operation op) > >> +{ > >> + return (op == COMPARE_BY_PIECES > >> + || op == SET_BY_PIECES > >> + || op == CLEAR_BY_PIECES); > >> +} > >> + > >> +/* Return true if optabs exists for the mode and certain by pieces > >> + operations. */ > >> +static bool > >> +qi_vector_mode_supported_p (fixed_size_mode mode, > by_pieces_operation op) > >> +{ > >> + if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) > >> + && optab_handler (vec_duplicate_optab, mode) != > CODE_FOR_nothing) > >> + return true; > >> + > >> + if (op == COMPARE_BY_PIECES > >> + && optab_handler (mov_optab, mode) != CODE_FOR_nothing > >> + && can_compare_p (EQ, mode, ccp_jump)) > >> + return true; > >> > >> + return false; > >> +} > >> + > >> +/* Return the widest mode that can be used to perform part of an > >> + operation OP on SIZE bytes. Try to use QI vector modes where > >> + possible. */ > >> static fixed_size_mode > >> -widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) > >> +widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation > op) > >> { > >> fixed_size_mode result = NARROWEST_INT_MODE; > >> > >> gcc_checking_assert (size > 1); > >> > >> /* Use QI vector only if size is wider than a WORD. */ > >> - if (qi_vector && size > UNITS_PER_WORD) > >> + if (can_use_qi_vectors (op) && size > UNITS_PER_WORD) > >> { > >> machine_mode mode; > >> fixed_size_mode candidate; > >> @@ -1009,8 +1035,7 @@ widest_fixed_size_mode_for_size (unsigned int > size, bool qi_vector) > >> { > >> if (GET_MODE_SIZE (candidate) >= size) > >> break; > >> - if (optab_handler (vec_duplicate_optab, candidate) > >> - != CODE_FOR_nothing) > >> + if (qi_vector_mode_supported_p (candidate, op)) > >> result = candidate; > >> } > >> > >> @@ -1019,9 +1044,14 @@ widest_fixed_size_mode_for_size (unsigned > int size, bool qi_vector) > >> } > >> > >> opt_scalar_int_mode tmode; > >> + scalar_int_mode mode; > >> FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) > >> - if (GET_MODE_SIZE (tmode.require ()) < size) > >> - result = tmode.require (); > >> + { > >> + mode = tmode.require (); > >> + if (GET_MODE_SIZE (mode) < size > >> + && targetm.scalar_mode_supported_p (mode)) > >> + result = mode; > >> + } > >> > >> return result; > >> } > >> @@ -1061,8 +1091,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, > unsigned int align, > >> { > >> /* NB: Round up L and ALIGN to the widest integer mode for > >> MAX_SIZE. */ > >> - mode = widest_fixed_size_mode_for_size (max_size, > >> - op == SET_BY_PIECES); > >> + mode = widest_fixed_size_mode_for_size (max_size, op); > >> if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) > >> { > >> unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE > (mode)); > >> @@ -1076,8 +1105,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, > unsigned int align, > >> > >> while (max_size > 1 && l > 0) > >> { > >> - mode = widest_fixed_size_mode_for_size (max_size, > >> - op == SET_BY_PIECES); > >> + mode = widest_fixed_size_mode_for_size (max_size, op); > >> enum insn_code icode; > >> > >> unsigned int modesize = GET_MODE_SIZE (mode); > >> @@ -1317,8 +1345,8 @@ class op_by_pieces_d > >> bool m_push; > >> /* True if targetm.overlap_op_by_pieces_p () returns true. */ > >> bool m_overlap_op_by_pieces; > >> - /* True if QI vector mode can be used. */ > >> - bool m_qi_vector_mode; > >> + /* The type of operation that we're performing. */ > >> + by_pieces_operation m_op; > >> > >> /* Virtual functions, overriden by derived classes for the specific > >> operation. */ > >> @@ -1331,7 +1359,7 @@ class op_by_pieces_d > >> public: > >> op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn, > >> void *, unsigned HOST_WIDE_INT, unsigned int, bool, > >> - bool = false); > >> + by_pieces_operation); > >> void run (); > >> }; > >> > >> @@ -1349,11 +1377,11 @@ op_by_pieces_d::op_by_pieces_d (unsigned > int max_pieces, rtx to, > >> void *from_cfn_data, > >> unsigned HOST_WIDE_INT len, > >> unsigned int align, bool push, > >> - bool qi_vector_mode) > >> + by_pieces_operation op) > >> : m_to (to, to_load, NULL, NULL), > >> m_from (from, from_load, from_cfn, from_cfn_data), > >> m_len (len), m_max_size (max_pieces + 1), > >> - m_push (push), m_qi_vector_mode (qi_vector_mode) > >> + m_push (push), m_op (op) > >> { > >> int toi = m_to.get_addr_inc (); > >> int fromi = m_from.get_addr_inc (); > >> @@ -1375,8 +1403,7 @@ op_by_pieces_d::op_by_pieces_d (unsigned int > max_pieces, rtx to, > >> { > >> /* Find the mode of the largest comparison. */ > >> fixed_size_mode mode > >> - = widest_fixed_size_mode_for_size (m_max_size, > >> - m_qi_vector_mode); > >> + = widest_fixed_size_mode_for_size (m_max_size, m_op); > >> > >> m_from.decide_autoinc (mode, m_reverse, len); > >> m_to.decide_autoinc (mode, m_reverse, len); > >> @@ -1401,7 +1428,7 @@ op_by_pieces_d::get_usable_mode > (fixed_size_mode mode, unsigned int len) > >> if (len >= size && prepare_mode (mode, m_align)) > >> break; > >> /* widest_fixed_size_mode_for_size checks SIZE > 1. */ > >> - mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); > >> + mode = widest_fixed_size_mode_for_size (size, m_op); > >> } > >> while (1); > >> return mode; > >> @@ -1414,7 +1441,7 @@ fixed_size_mode > >> op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) > >> { > >> /* Use QI vector only for > size of WORD. */ > >> - if (m_qi_vector_mode && size > UNITS_PER_WORD) > >> + if (can_use_qi_vectors (m_op) && size > UNITS_PER_WORD) > >> { > >> machine_mode mode; > >> fixed_size_mode candidate; > >> @@ -1427,8 +1454,7 @@ > op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) > >> break; > >> > >> if (GET_MODE_SIZE (candidate) >= size > >> - && (optab_handler (vec_duplicate_optab, candidate) > >> - != CODE_FOR_nothing)) > >> + && qi_vector_mode_supported_p (candidate, m_op)) > >> return candidate; > >> } > >> } > >> @@ -1451,7 +1477,7 @@ op_by_pieces_d::run () > >> > >> /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */ > >> fixed_size_mode mode > >> - = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode); > >> + = widest_fixed_size_mode_for_size (m_max_size, m_op); > >> mode = get_usable_mode (mode, length); > >> > >> by_pieces_prev to_prev = { nullptr, mode }; > >> @@ -1516,8 +1542,7 @@ op_by_pieces_d::run () > >> else > >> { > >> /* widest_fixed_size_mode_for_size checks SIZE > 1. */ > >> - mode = widest_fixed_size_mode_for_size (size, > >> - m_qi_vector_mode); > >> + mode = widest_fixed_size_mode_for_size (size, m_op); > >> mode = get_usable_mode (mode, length); > >> } > >> } > >> @@ -1543,7 +1568,7 @@ class move_by_pieces_d : public > op_by_pieces_d > >> move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, > >> unsigned int align) > >> : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL, > >> - NULL, len, align, PUSHG_P (to)) > >> + NULL, len, align, PUSHG_P (to), MOVE_BY_PIECES) > >> { > >> } > >> rtx finish_retmode (memop_ret); > >> @@ -1632,15 +1657,16 @@ move_by_pieces (rtx to, rtx from, unsigned > HOST_WIDE_INT len, > >> class store_by_pieces_d : public op_by_pieces_d > >> { > >> insn_gen_fn m_gen_fun; > >> + > >> void generate (rtx, rtx, machine_mode) final override; > >> bool prepare_mode (machine_mode, unsigned int) final override; > >> > >> public: > >> store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, > >> unsigned HOST_WIDE_INT len, unsigned int align, > >> - bool qi_vector_mode) > >> + by_pieces_operation op) > >> : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn, > >> - cfn_data, len, align, false, qi_vector_mode) > >> + cfn_data, len, align, false, op) > >> { > >> } > >> rtx finish_retmode (memop_ret); > >> @@ -1729,8 +1755,8 @@ can_store_by_pieces (unsigned > HOST_WIDE_INT len, > >> max_size = STORE_MAX_PIECES + 1; > >> while (max_size > 1 && l > 0) > >> { > >> - fixed_size_mode mode > >> - = widest_fixed_size_mode_for_size (max_size, memsetp); > >> + auto op = memsetp ? SET_BY_PIECES : STORE_BY_PIECES; > >> + auto mode = widest_fixed_size_mode_for_size (max_size, op); > >> > >> icode = optab_handler (mov_optab, mode); > >> if (icode != CODE_FOR_nothing > >> @@ -1793,7 +1819,7 @@ store_by_pieces (rtx to, unsigned > HOST_WIDE_INT len, > >> optimize_insn_for_speed_p ())); > >> > >> store_by_pieces_d data (to, constfun, constfundata, len, align, > >> - memsetp); > >> + memsetp ? SET_BY_PIECES : STORE_BY_PIECES); > >> data.run (); > >> > >> if (retmode != RETURN_BEGIN) > >> @@ -1814,7 +1840,7 @@ clear_by_pieces (rtx to, unsigned > HOST_WIDE_INT len, unsigned int align) > >> /* Use builtin_memset_read_str to support vector mode broadcast. */ > >> char c = 0; > >> store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align, > >> - true); > >> + CLEAR_BY_PIECES); > >> data.run (); > >> } > >> > >> @@ -1832,12 +1858,13 @@ class compare_by_pieces_d : public > op_by_pieces_d > >> void generate (rtx, rtx, machine_mode) final override; > >> bool prepare_mode (machine_mode, unsigned int) final override; > >> void finish_mode (machine_mode) final override; > >> + > >> public: > >> compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, > >> void *op1_cfn_data, HOST_WIDE_INT len, int align, > >> rtx_code_label *fail_label) > >> : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, > op1_cfn, > >> - op1_cfn_data, len, align, false) > >> + op1_cfn_data, len, align, false, COMPARE_BY_PIECES) > >> { > >> m_fail_label = fail_label; > >> }
It seems that the mail got caught elsewhere and did not send into gcc-patches mailing thread. Resending that. Thx, Haochen -----Original Message----- From: Jiang, Haochen Sent: Tuesday, October 24, 2023 4:43 PM To: HAO CHEN GUI <guihaoc@linux.ibm.com>; Richard Sandiford <richard.sandiford@arm.com> Cc: gcc-patches <gcc-patches@gcc.gnu.org> Subject: RE: [PATCH-1v4, expand] Enable vector mode for compare_by_pieces [PR111449] Hi Haochen Gui, It seems that the commit caused lots of test case fail on x86 platforms: https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078379.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078380.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078381.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078382.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078383.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078384.html Please help verify that if we need some testcase change or we get bug here. A simple reproducer under build folder is: make check RUNTESTFLAGS="i386.exp=g++.target/i386/pr80566-2.C --target_board='unix{-m64\ -march=cascadelake,-m32\ -march=cascadelake,-m32,-m64}'" Thx, Haochen > -----Original Message----- > From: HAO CHEN GUI <guihaoc@linux.ibm.com> > Sent: Monday, October 23, 2023 9:30 AM > To: Richard Sandiford <richard.sandiford@arm.com> > Cc: gcc-patches <gcc-patches@gcc.gnu.org> > Subject: Re: [PATCH-1v4, expand] Enable vector mode for > compare_by_pieces [PR111449] > > Committed as r14-4835. > > https://gcc.gnu.org/g:f08ca5903c7a02b450b93143467f70b9fd8e0085 > > Thanks > Gui Haochen > > 在 2023/10/20 16:49, Richard Sandiford 写道: > > HAO CHEN GUI <guihaoc@linux.ibm.com> writes: > >> Hi, > >> Vector mode instructions are efficient for compare on some targets. > >> This patch enables vector mode for compare_by_pieces. Two help > >> functions are added to check if vector mode is available for > >> certain by pieces operations and if if optabs exists for the mode > >> and certain by pieces operations. One member is added in class > >> op_by_pieces_d to record the type of operations. > >> > >> The test case is in the second patch which is rs6000 specific. > >> > >> Compared to last version, the main change is to add a target hook > >> check - scalar_mode_supported_p when retrieving the available > >> scalar modes. The mode which is not supported for a target should be skipped. > >> (e.g. TImode on ppc). Also some function names and comments are > >> refined according to reviewer's advice. > >> > >> Bootstrapped and tested on x86 and powerpc64-linux BE and LE with > >> no regressions. > >> > >> Thanks > >> Gui Haochen > >> > >> ChangeLog > >> Expand: Enable vector mode for by pieces compares > >> > >> Vector mode compare instructions are efficient for equality compare > >> on rs6000. This patch refactors the codes of by pieces operation to > >> enable vector mode for compare. > >> > >> gcc/ > >> PR target/111449 > >> * expr.cc (can_use_qi_vectors): New function to return true if > >> we know how to implement OP using vectors of bytes. > >> (qi_vector_mode_supported_p): New function to check if optabs > >> exists for the mode and certain by pieces operations. > >> (widest_fixed_size_mode_for_size): Replace the second argument > >> with the type of by pieces operations. Call can_use_qi_vectors > >> and qi_vector_mode_supported_p to do the check. Call > >> scalar_mode_supported_p to check if the scalar mode is supported. > >> (by_pieces_ninsns): Pass the type of by pieces operation to > >> widest_fixed_size_mode_for_size. > >> (class op_by_pieces_d): Remove m_qi_vector_mode. Add m_op to > >> record the type of by pieces operations. > >> (op_by_pieces_d::op_by_pieces_d): Change last argument to the > >> type of by pieces operations, initialize m_op with it. Pass > >> m_op to function widest_fixed_size_mode_for_size. > >> (op_by_pieces_d::get_usable_mode): Pass m_op to function > >> widest_fixed_size_mode_for_size. > >> (op_by_pieces_d::smallest_fixed_size_mode_for_size): Call > >> can_use_qi_vectors and qi_vector_mode_supported_p to do the > >> check. > >> (op_by_pieces_d::run): Pass m_op to function > >> widest_fixed_size_mode_for_size. > >> (move_by_pieces_d::move_by_pieces_d): Set m_op to > MOVE_BY_PIECES. > >> (store_by_pieces_d::store_by_pieces_d): Set m_op with the op. > >> (can_store_by_pieces): Pass the type of by pieces operations to > >> widest_fixed_size_mode_for_size. > >> (clear_by_pieces): Initialize class store_by_pieces_d with > >> CLEAR_BY_PIECES. > >> (compare_by_pieces_d::compare_by_pieces_d): Set m_op to > >> COMPARE_BY_PIECES. > > > > OK, thanks. And thanks for your patience. > > > > Richard > > > >> patch.diff > >> diff --git a/gcc/expr.cc b/gcc/expr.cc index > >> 2c9930ec674..ad5f9dd8ec2 100644 > >> --- a/gcc/expr.cc > >> +++ b/gcc/expr.cc > >> @@ -988,18 +988,44 @@ alignment_for_piecewise_move (unsigned int > max_pieces, unsigned int align) > >> return align; > >> } > >> > >> -/* Return the widest QI vector, if QI_MODE is true, or integer mode > >> - that is narrower than SIZE bytes. */ > >> +/* Return true if we know how to implement OP using vectors of > >> +bytes. */ static bool can_use_qi_vectors (by_pieces_operation op) > >> +{ > >> + return (op == COMPARE_BY_PIECES > >> + || op == SET_BY_PIECES > >> + || op == CLEAR_BY_PIECES); > >> +} > >> + > >> +/* Return true if optabs exists for the mode and certain by pieces > >> + operations. */ > >> +static bool > >> +qi_vector_mode_supported_p (fixed_size_mode mode, > by_pieces_operation op) > >> +{ > >> + if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) > >> + && optab_handler (vec_duplicate_optab, mode) != > CODE_FOR_nothing) > >> + return true; > >> + > >> + if (op == COMPARE_BY_PIECES > >> + && optab_handler (mov_optab, mode) != CODE_FOR_nothing > >> + && can_compare_p (EQ, mode, ccp_jump)) > >> + return true; > >> > >> + return false; > >> +} > >> + > >> +/* Return the widest mode that can be used to perform part of an > >> + operation OP on SIZE bytes. Try to use QI vector modes where > >> + possible. */ > >> static fixed_size_mode > >> -widest_fixed_size_mode_for_size (unsigned int size, bool > >> qi_vector) > >> +widest_fixed_size_mode_for_size (unsigned int size, > >> +by_pieces_operation > op) > >> { > >> fixed_size_mode result = NARROWEST_INT_MODE; > >> > >> gcc_checking_assert (size > 1); > >> > >> /* Use QI vector only if size is wider than a WORD. */ > >> - if (qi_vector && size > UNITS_PER_WORD) > >> + if (can_use_qi_vectors (op) && size > UNITS_PER_WORD) > >> { > >> machine_mode mode; > >> fixed_size_mode candidate; > >> @@ -1009,8 +1035,7 @@ widest_fixed_size_mode_for_size (unsigned int > size, bool qi_vector) > >> { > >> if (GET_MODE_SIZE (candidate) >= size) > >> break; > >> - if (optab_handler (vec_duplicate_optab, candidate) > >> - != CODE_FOR_nothing) > >> + if (qi_vector_mode_supported_p (candidate, op)) > >> result = candidate; > >> } > >> > >> @@ -1019,9 +1044,14 @@ widest_fixed_size_mode_for_size (unsigned > int size, bool qi_vector) > >> } > >> > >> opt_scalar_int_mode tmode; > >> + scalar_int_mode mode; > >> FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) > >> - if (GET_MODE_SIZE (tmode.require ()) < size) > >> - result = tmode.require (); > >> + { > >> + mode = tmode.require (); > >> + if (GET_MODE_SIZE (mode) < size > >> + && targetm.scalar_mode_supported_p (mode)) > >> + result = mode; > >> + } > >> > >> return result; > >> } > >> @@ -1061,8 +1091,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, > unsigned int align, > >> { > >> /* NB: Round up L and ALIGN to the widest integer mode for > >> MAX_SIZE. */ > >> - mode = widest_fixed_size_mode_for_size (max_size, > >> - op == SET_BY_PIECES); > >> + mode = widest_fixed_size_mode_for_size (max_size, op); > >> if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) > >> { > >> unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE > (mode)); > >> @@ -1076,8 +1105,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, > unsigned int align, > >> > >> while (max_size > 1 && l > 0) > >> { > >> - mode = widest_fixed_size_mode_for_size (max_size, > >> - op == SET_BY_PIECES); > >> + mode = widest_fixed_size_mode_for_size (max_size, op); > >> enum insn_code icode; > >> > >> unsigned int modesize = GET_MODE_SIZE (mode); @@ -1317,8 > >> +1345,8 @@ class op_by_pieces_d > >> bool m_push; > >> /* True if targetm.overlap_op_by_pieces_p () returns true. */ > >> bool m_overlap_op_by_pieces; > >> - /* True if QI vector mode can be used. */ > >> - bool m_qi_vector_mode; > >> + /* The type of operation that we're performing. */ > >> + by_pieces_operation m_op; > >> > >> /* Virtual functions, overriden by derived classes for the specific > >> operation. */ > >> @@ -1331,7 +1359,7 @@ class op_by_pieces_d > >> public: > >> op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn, > >> void *, unsigned HOST_WIDE_INT, unsigned int, bool, > >> - bool = false); > >> + by_pieces_operation); > >> void run (); > >> }; > >> > >> @@ -1349,11 +1377,11 @@ op_by_pieces_d::op_by_pieces_d (unsigned > int max_pieces, rtx to, > >> void *from_cfn_data, > >> unsigned HOST_WIDE_INT len, > >> unsigned int align, bool push, > >> - bool qi_vector_mode) > >> + by_pieces_operation op) > >> : m_to (to, to_load, NULL, NULL), > >> m_from (from, from_load, from_cfn, from_cfn_data), > >> m_len (len), m_max_size (max_pieces + 1), > >> - m_push (push), m_qi_vector_mode (qi_vector_mode) > >> + m_push (push), m_op (op) > >> { > >> int toi = m_to.get_addr_inc (); > >> int fromi = m_from.get_addr_inc (); @@ -1375,8 +1403,7 @@ > >> op_by_pieces_d::op_by_pieces_d (unsigned int > max_pieces, rtx to, > >> { > >> /* Find the mode of the largest comparison. */ > >> fixed_size_mode mode > >> - = widest_fixed_size_mode_for_size (m_max_size, > >> - m_qi_vector_mode); > >> + = widest_fixed_size_mode_for_size (m_max_size, m_op); > >> > >> m_from.decide_autoinc (mode, m_reverse, len); > >> m_to.decide_autoinc (mode, m_reverse, len); @@ -1401,7 > >> +1428,7 @@ op_by_pieces_d::get_usable_mode > (fixed_size_mode mode, unsigned int len) > >> if (len >= size && prepare_mode (mode, m_align)) > >> break; > >> /* widest_fixed_size_mode_for_size checks SIZE > 1. */ > >> - mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); > >> + mode = widest_fixed_size_mode_for_size (size, m_op); > >> } > >> while (1); > >> return mode; > >> @@ -1414,7 +1441,7 @@ fixed_size_mode > >> op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int > >> size) { > >> /* Use QI vector only for > size of WORD. */ > >> - if (m_qi_vector_mode && size > UNITS_PER_WORD) > >> + if (can_use_qi_vectors (m_op) && size > UNITS_PER_WORD) > >> { > >> machine_mode mode; > >> fixed_size_mode candidate; > >> @@ -1427,8 +1454,7 @@ > op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) > >> break; > >> > >> if (GET_MODE_SIZE (candidate) >= size > >> - && (optab_handler (vec_duplicate_optab, candidate) > >> - != CODE_FOR_nothing)) > >> + && qi_vector_mode_supported_p (candidate, m_op)) > >> return candidate; > >> } > >> } > >> @@ -1451,7 +1477,7 @@ op_by_pieces_d::run () > >> > >> /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */ > >> fixed_size_mode mode > >> - = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode); > >> + = widest_fixed_size_mode_for_size (m_max_size, m_op); > >> mode = get_usable_mode (mode, length); > >> > >> by_pieces_prev to_prev = { nullptr, mode }; @@ -1516,8 +1542,7 > >> @@ op_by_pieces_d::run () > >> else > >> { > >> /* widest_fixed_size_mode_for_size checks SIZE > 1. */ > >> - mode = widest_fixed_size_mode_for_size (size, > >> - m_qi_vector_mode); > >> + mode = widest_fixed_size_mode_for_size (size, m_op); > >> mode = get_usable_mode (mode, length); > >> } > >> } > >> @@ -1543,7 +1568,7 @@ class move_by_pieces_d : public > op_by_pieces_d > >> move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, > >> unsigned int align) > >> : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL, > >> - NULL, len, align, PUSHG_P (to)) > >> + NULL, len, align, PUSHG_P (to), MOVE_BY_PIECES) > >> { > >> } > >> rtx finish_retmode (memop_ret); > >> @@ -1632,15 +1657,16 @@ move_by_pieces (rtx to, rtx from, unsigned > HOST_WIDE_INT len, > >> class store_by_pieces_d : public op_by_pieces_d { > >> insn_gen_fn m_gen_fun; > >> + > >> void generate (rtx, rtx, machine_mode) final override; > >> bool prepare_mode (machine_mode, unsigned int) final override; > >> > >> public: > >> store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, > >> unsigned HOST_WIDE_INT len, unsigned int align, > >> - bool qi_vector_mode) > >> + by_pieces_operation op) > >> : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn, > >> - cfn_data, len, align, false, qi_vector_mode) > >> + cfn_data, len, align, false, op) > >> { > >> } > >> rtx finish_retmode (memop_ret); > >> @@ -1729,8 +1755,8 @@ can_store_by_pieces (unsigned > HOST_WIDE_INT len, > >> max_size = STORE_MAX_PIECES + 1; > >> while (max_size > 1 && l > 0) > >> { > >> - fixed_size_mode mode > >> - = widest_fixed_size_mode_for_size (max_size, memsetp); > >> + auto op = memsetp ? SET_BY_PIECES : STORE_BY_PIECES; > >> + auto mode = widest_fixed_size_mode_for_size (max_size, op); > >> > >> icode = optab_handler (mov_optab, mode); > >> if (icode != CODE_FOR_nothing > >> @@ -1793,7 +1819,7 @@ store_by_pieces (rtx to, unsigned > HOST_WIDE_INT len, > >> optimize_insn_for_speed_p ())); > >> > >> store_by_pieces_d data (to, constfun, constfundata, len, align, > >> - memsetp); > >> + memsetp ? SET_BY_PIECES : STORE_BY_PIECES); > >> data.run (); > >> > >> if (retmode != RETURN_BEGIN) > >> @@ -1814,7 +1840,7 @@ clear_by_pieces (rtx to, unsigned > HOST_WIDE_INT len, unsigned int align) > >> /* Use builtin_memset_read_str to support vector mode broadcast. */ > >> char c = 0; > >> store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align, > >> - true); > >> + CLEAR_BY_PIECES); > >> data.run (); > >> } > >> > >> @@ -1832,12 +1858,13 @@ class compare_by_pieces_d : public > op_by_pieces_d > >> void generate (rtx, rtx, machine_mode) final override; > >> bool prepare_mode (machine_mode, unsigned int) final override; > >> void finish_mode (machine_mode) final override; > >> + > >> public: > >> compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, > >> void *op1_cfn_data, HOST_WIDE_INT len, int align, > >> rtx_code_label *fail_label) > >> : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, > op1_cfn, > >> - op1_cfn_data, len, align, false) > >> + op1_cfn_data, len, align, false, COMPARE_BY_PIECES) > >> { > >> m_fail_label = fail_label; > >> }
OK, I will take it. Thanks Gui Haochen 在 2023/10/24 16:49, Jiang, Haochen 写道: > It seems that the mail got caught elsewhere and did not send into gcc-patches > mailing thread. Resending that. > > Thx, > Haochen > > -----Original Message----- > From: Jiang, Haochen > Sent: Tuesday, October 24, 2023 4:43 PM > To: HAO CHEN GUI <guihaoc@linux.ibm.com>; Richard Sandiford <richard.sandiford@arm.com> > Cc: gcc-patches <gcc-patches@gcc.gnu.org> > Subject: RE: [PATCH-1v4, expand] Enable vector mode for compare_by_pieces [PR111449] > > Hi Haochen Gui, > > It seems that the commit caused lots of test case fail on x86 platforms: > > https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078379.html > https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078380.html > https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078381.html > https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078382.html > https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078383.html > https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078384.html > > Please help verify that if we need some testcase change or we get bug here. > > A simple reproducer under build folder is: > > make check RUNTESTFLAGS="i386.exp=g++.target/i386/pr80566-2.C --target_board='unix{-m64\ -march=cascadelake,-m32\ -march=cascadelake,-m32,-m64}'" > > Thx, > Haochen > >> -----Original Message----- >> From: HAO CHEN GUI <guihaoc@linux.ibm.com> >> Sent: Monday, October 23, 2023 9:30 AM >> To: Richard Sandiford <richard.sandiford@arm.com> >> Cc: gcc-patches <gcc-patches@gcc.gnu.org> >> Subject: Re: [PATCH-1v4, expand] Enable vector mode for >> compare_by_pieces [PR111449] >> >> Committed as r14-4835. >> >> https://gcc.gnu.org/g:f08ca5903c7a02b450b93143467f70b9fd8e0085 >> >> Thanks >> Gui Haochen >> >> 在 2023/10/20 16:49, Richard Sandiford 写道: >>> HAO CHEN GUI <guihaoc@linux.ibm.com> writes: >>>> Hi, >>>> Vector mode instructions are efficient for compare on some targets. >>>> This patch enables vector mode for compare_by_pieces. Two help >>>> functions are added to check if vector mode is available for >>>> certain by pieces operations and if if optabs exists for the mode >>>> and certain by pieces operations. One member is added in class >>>> op_by_pieces_d to record the type of operations. >>>> >>>> The test case is in the second patch which is rs6000 specific. >>>> >>>> Compared to last version, the main change is to add a target hook >>>> check - scalar_mode_supported_p when retrieving the available >>>> scalar modes. The mode which is not supported for a target should be skipped. >>>> (e.g. TImode on ppc). Also some function names and comments are >>>> refined according to reviewer's advice. >>>> >>>> Bootstrapped and tested on x86 and powerpc64-linux BE and LE with >>>> no regressions. >>>> >>>> Thanks >>>> Gui Haochen >>>> >>>> ChangeLog >>>> Expand: Enable vector mode for by pieces compares >>>> >>>> Vector mode compare instructions are efficient for equality compare >>>> on rs6000. This patch refactors the codes of by pieces operation to >>>> enable vector mode for compare. >>>> >>>> gcc/ >>>> PR target/111449 >>>> * expr.cc (can_use_qi_vectors): New function to return true if >>>> we know how to implement OP using vectors of bytes. >>>> (qi_vector_mode_supported_p): New function to check if optabs >>>> exists for the mode and certain by pieces operations. >>>> (widest_fixed_size_mode_for_size): Replace the second argument >>>> with the type of by pieces operations. Call can_use_qi_vectors >>>> and qi_vector_mode_supported_p to do the check. Call >>>> scalar_mode_supported_p to check if the scalar mode is supported. >>>> (by_pieces_ninsns): Pass the type of by pieces operation to >>>> widest_fixed_size_mode_for_size. >>>> (class op_by_pieces_d): Remove m_qi_vector_mode. Add m_op to >>>> record the type of by pieces operations. >>>> (op_by_pieces_d::op_by_pieces_d): Change last argument to the >>>> type of by pieces operations, initialize m_op with it. Pass >>>> m_op to function widest_fixed_size_mode_for_size. >>>> (op_by_pieces_d::get_usable_mode): Pass m_op to function >>>> widest_fixed_size_mode_for_size. >>>> (op_by_pieces_d::smallest_fixed_size_mode_for_size): Call >>>> can_use_qi_vectors and qi_vector_mode_supported_p to do the >>>> check. >>>> (op_by_pieces_d::run): Pass m_op to function >>>> widest_fixed_size_mode_for_size. >>>> (move_by_pieces_d::move_by_pieces_d): Set m_op to >> MOVE_BY_PIECES. >>>> (store_by_pieces_d::store_by_pieces_d): Set m_op with the op. >>>> (can_store_by_pieces): Pass the type of by pieces operations to >>>> widest_fixed_size_mode_for_size. >>>> (clear_by_pieces): Initialize class store_by_pieces_d with >>>> CLEAR_BY_PIECES. >>>> (compare_by_pieces_d::compare_by_pieces_d): Set m_op to >>>> COMPARE_BY_PIECES. >>> >>> OK, thanks. And thanks for your patience. >>> >>> Richard >>> >>>> patch.diff >>>> diff --git a/gcc/expr.cc b/gcc/expr.cc index >>>> 2c9930ec674..ad5f9dd8ec2 100644 >>>> --- a/gcc/expr.cc >>>> +++ b/gcc/expr.cc >>>> @@ -988,18 +988,44 @@ alignment_for_piecewise_move (unsigned int >> max_pieces, unsigned int align) >>>> return align; >>>> } >>>> >>>> -/* Return the widest QI vector, if QI_MODE is true, or integer mode >>>> - that is narrower than SIZE bytes. */ >>>> +/* Return true if we know how to implement OP using vectors of >>>> +bytes. */ static bool can_use_qi_vectors (by_pieces_operation op) >>>> +{ >>>> + return (op == COMPARE_BY_PIECES >>>> + || op == SET_BY_PIECES >>>> + || op == CLEAR_BY_PIECES); >>>> +} >>>> + >>>> +/* Return true if optabs exists for the mode and certain by pieces >>>> + operations. */ >>>> +static bool >>>> +qi_vector_mode_supported_p (fixed_size_mode mode, >> by_pieces_operation op) >>>> +{ >>>> + if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) >>>> + && optab_handler (vec_duplicate_optab, mode) != >> CODE_FOR_nothing) >>>> + return true; >>>> + >>>> + if (op == COMPARE_BY_PIECES >>>> + && optab_handler (mov_optab, mode) != CODE_FOR_nothing >>>> + && can_compare_p (EQ, mode, ccp_jump)) >>>> + return true; >>>> >>>> + return false; >>>> +} >>>> + >>>> +/* Return the widest mode that can be used to perform part of an >>>> + operation OP on SIZE bytes. Try to use QI vector modes where >>>> + possible. */ >>>> static fixed_size_mode >>>> -widest_fixed_size_mode_for_size (unsigned int size, bool >>>> qi_vector) >>>> +widest_fixed_size_mode_for_size (unsigned int size, >>>> +by_pieces_operation >> op) >>>> { >>>> fixed_size_mode result = NARROWEST_INT_MODE; >>>> >>>> gcc_checking_assert (size > 1); >>>> >>>> /* Use QI vector only if size is wider than a WORD. */ >>>> - if (qi_vector && size > UNITS_PER_WORD) >>>> + if (can_use_qi_vectors (op) && size > UNITS_PER_WORD) >>>> { >>>> machine_mode mode; >>>> fixed_size_mode candidate; >>>> @@ -1009,8 +1035,7 @@ widest_fixed_size_mode_for_size (unsigned int >> size, bool qi_vector) >>>> { >>>> if (GET_MODE_SIZE (candidate) >= size) >>>> break; >>>> - if (optab_handler (vec_duplicate_optab, candidate) >>>> - != CODE_FOR_nothing) >>>> + if (qi_vector_mode_supported_p (candidate, op)) >>>> result = candidate; >>>> } >>>> >>>> @@ -1019,9 +1044,14 @@ widest_fixed_size_mode_for_size (unsigned >> int size, bool qi_vector) >>>> } >>>> >>>> opt_scalar_int_mode tmode; >>>> + scalar_int_mode mode; >>>> FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) >>>> - if (GET_MODE_SIZE (tmode.require ()) < size) >>>> - result = tmode.require (); >>>> + { >>>> + mode = tmode.require (); >>>> + if (GET_MODE_SIZE (mode) < size >>>> + && targetm.scalar_mode_supported_p (mode)) >>>> + result = mode; >>>> + } >>>> >>>> return result; >>>> } >>>> @@ -1061,8 +1091,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, >> unsigned int align, >>>> { >>>> /* NB: Round up L and ALIGN to the widest integer mode for >>>> MAX_SIZE. */ >>>> - mode = widest_fixed_size_mode_for_size (max_size, >>>> - op == SET_BY_PIECES); >>>> + mode = widest_fixed_size_mode_for_size (max_size, op); >>>> if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) >>>> { >>>> unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE >> (mode)); >>>> @@ -1076,8 +1105,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, >> unsigned int align, >>>> >>>> while (max_size > 1 && l > 0) >>>> { >>>> - mode = widest_fixed_size_mode_for_size (max_size, >>>> - op == SET_BY_PIECES); >>>> + mode = widest_fixed_size_mode_for_size (max_size, op); >>>> enum insn_code icode; >>>> >>>> unsigned int modesize = GET_MODE_SIZE (mode); @@ -1317,8 >>>> +1345,8 @@ class op_by_pieces_d >>>> bool m_push; >>>> /* True if targetm.overlap_op_by_pieces_p () returns true. */ >>>> bool m_overlap_op_by_pieces; >>>> - /* True if QI vector mode can be used. */ >>>> - bool m_qi_vector_mode; >>>> + /* The type of operation that we're performing. */ >>>> + by_pieces_operation m_op; >>>> >>>> /* Virtual functions, overriden by derived classes for the specific >>>> operation. */ >>>> @@ -1331,7 +1359,7 @@ class op_by_pieces_d >>>> public: >>>> op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn, >>>> void *, unsigned HOST_WIDE_INT, unsigned int, bool, >>>> - bool = false); >>>> + by_pieces_operation); >>>> void run (); >>>> }; >>>> >>>> @@ -1349,11 +1377,11 @@ op_by_pieces_d::op_by_pieces_d (unsigned >> int max_pieces, rtx to, >>>> void *from_cfn_data, >>>> unsigned HOST_WIDE_INT len, >>>> unsigned int align, bool push, >>>> - bool qi_vector_mode) >>>> + by_pieces_operation op) >>>> : m_to (to, to_load, NULL, NULL), >>>> m_from (from, from_load, from_cfn, from_cfn_data), >>>> m_len (len), m_max_size (max_pieces + 1), >>>> - m_push (push), m_qi_vector_mode (qi_vector_mode) >>>> + m_push (push), m_op (op) >>>> { >>>> int toi = m_to.get_addr_inc (); >>>> int fromi = m_from.get_addr_inc (); @@ -1375,8 +1403,7 @@ >>>> op_by_pieces_d::op_by_pieces_d (unsigned int >> max_pieces, rtx to, >>>> { >>>> /* Find the mode of the largest comparison. */ >>>> fixed_size_mode mode >>>> - = widest_fixed_size_mode_for_size (m_max_size, >>>> - m_qi_vector_mode); >>>> + = widest_fixed_size_mode_for_size (m_max_size, m_op); >>>> >>>> m_from.decide_autoinc (mode, m_reverse, len); >>>> m_to.decide_autoinc (mode, m_reverse, len); @@ -1401,7 >>>> +1428,7 @@ op_by_pieces_d::get_usable_mode >> (fixed_size_mode mode, unsigned int len) >>>> if (len >= size && prepare_mode (mode, m_align)) >>>> break; >>>> /* widest_fixed_size_mode_for_size checks SIZE > 1. */ >>>> - mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); >>>> + mode = widest_fixed_size_mode_for_size (size, m_op); >>>> } >>>> while (1); >>>> return mode; >>>> @@ -1414,7 +1441,7 @@ fixed_size_mode >>>> op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int >>>> size) { >>>> /* Use QI vector only for > size of WORD. */ >>>> - if (m_qi_vector_mode && size > UNITS_PER_WORD) >>>> + if (can_use_qi_vectors (m_op) && size > UNITS_PER_WORD) >>>> { >>>> machine_mode mode; >>>> fixed_size_mode candidate; >>>> @@ -1427,8 +1454,7 @@ >> op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) >>>> break; >>>> >>>> if (GET_MODE_SIZE (candidate) >= size >>>> - && (optab_handler (vec_duplicate_optab, candidate) >>>> - != CODE_FOR_nothing)) >>>> + && qi_vector_mode_supported_p (candidate, m_op)) >>>> return candidate; >>>> } >>>> } >>>> @@ -1451,7 +1477,7 @@ op_by_pieces_d::run () >>>> >>>> /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */ >>>> fixed_size_mode mode >>>> - = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode); >>>> + = widest_fixed_size_mode_for_size (m_max_size, m_op); >>>> mode = get_usable_mode (mode, length); >>>> >>>> by_pieces_prev to_prev = { nullptr, mode }; @@ -1516,8 +1542,7 >>>> @@ op_by_pieces_d::run () >>>> else >>>> { >>>> /* widest_fixed_size_mode_for_size checks SIZE > 1. */ >>>> - mode = widest_fixed_size_mode_for_size (size, >>>> - m_qi_vector_mode); >>>> + mode = widest_fixed_size_mode_for_size (size, m_op); >>>> mode = get_usable_mode (mode, length); >>>> } >>>> } >>>> @@ -1543,7 +1568,7 @@ class move_by_pieces_d : public >> op_by_pieces_d >>>> move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, >>>> unsigned int align) >>>> : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL, >>>> - NULL, len, align, PUSHG_P (to)) >>>> + NULL, len, align, PUSHG_P (to), MOVE_BY_PIECES) >>>> { >>>> } >>>> rtx finish_retmode (memop_ret); >>>> @@ -1632,15 +1657,16 @@ move_by_pieces (rtx to, rtx from, unsigned >> HOST_WIDE_INT len, >>>> class store_by_pieces_d : public op_by_pieces_d { >>>> insn_gen_fn m_gen_fun; >>>> + >>>> void generate (rtx, rtx, machine_mode) final override; >>>> bool prepare_mode (machine_mode, unsigned int) final override; >>>> >>>> public: >>>> store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, >>>> unsigned HOST_WIDE_INT len, unsigned int align, >>>> - bool qi_vector_mode) >>>> + by_pieces_operation op) >>>> : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn, >>>> - cfn_data, len, align, false, qi_vector_mode) >>>> + cfn_data, len, align, false, op) >>>> { >>>> } >>>> rtx finish_retmode (memop_ret); >>>> @@ -1729,8 +1755,8 @@ can_store_by_pieces (unsigned >> HOST_WIDE_INT len, >>>> max_size = STORE_MAX_PIECES + 1; >>>> while (max_size > 1 && l > 0) >>>> { >>>> - fixed_size_mode mode >>>> - = widest_fixed_size_mode_for_size (max_size, memsetp); >>>> + auto op = memsetp ? SET_BY_PIECES : STORE_BY_PIECES; >>>> + auto mode = widest_fixed_size_mode_for_size (max_size, op); >>>> >>>> icode = optab_handler (mov_optab, mode); >>>> if (icode != CODE_FOR_nothing >>>> @@ -1793,7 +1819,7 @@ store_by_pieces (rtx to, unsigned >> HOST_WIDE_INT len, >>>> optimize_insn_for_speed_p ())); >>>> >>>> store_by_pieces_d data (to, constfun, constfundata, len, align, >>>> - memsetp); >>>> + memsetp ? SET_BY_PIECES : STORE_BY_PIECES); >>>> data.run (); >>>> >>>> if (retmode != RETURN_BEGIN) >>>> @@ -1814,7 +1840,7 @@ clear_by_pieces (rtx to, unsigned >> HOST_WIDE_INT len, unsigned int align) >>>> /* Use builtin_memset_read_str to support vector mode broadcast. */ >>>> char c = 0; >>>> store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align, >>>> - true); >>>> + CLEAR_BY_PIECES); >>>> data.run (); >>>> } >>>> >>>> @@ -1832,12 +1858,13 @@ class compare_by_pieces_d : public >> op_by_pieces_d >>>> void generate (rtx, rtx, machine_mode) final override; >>>> bool prepare_mode (machine_mode, unsigned int) final override; >>>> void finish_mode (machine_mode) final override; >>>> + >>>> public: >>>> compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, >>>> void *op1_cfn_data, HOST_WIDE_INT len, int align, >>>> rtx_code_label *fail_label) >>>> : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, >> op1_cfn, >>>> - op1_cfn_data, len, align, false) >>>> + op1_cfn_data, len, align, false, COMPARE_BY_PIECES) >>>> { >>>> m_fail_label = fail_label; >>>> }
Hi Haochen, The regression cases are caused by "targetm.scalar_mode_supported_p" I added for scalar mode checking. XImode, OImode and TImode (with -m32) are not enabled in ix86_scalar_mode_supported_p. So they're excluded from by pieces operations on i386. The original code doesn't do a check for scalar modes. I think it might be incorrect as not all scalar modes support move and compare optabs. (e.g. TImode with -m32 on rs6000). I drafted a new patch to manually check optabs for scalar mode. Now both vector and scalar modes are checked for optabs. I did a simple test. All former regression cases are back. Could you help do a full regression test? I am worry about the coverage of my CI system. Thanks Gui Haochen patch.diff diff --git a/gcc/expr.cc b/gcc/expr.cc index 7aac575eff8..2af9fcbed18 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -1000,18 +1000,21 @@ can_use_qi_vectors (by_pieces_operation op) /* Return true if optabs exists for the mode and certain by pieces operations. */ static bool -qi_vector_mode_supported_p (fixed_size_mode mode, by_pieces_operation op) +mode_supported_p (fixed_size_mode mode, by_pieces_operation op) { + if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) + return false; + if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) - && optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing) - return true; + && VECTOR_MODE_P (mode) + && optab_handler (vec_duplicate_optab, mode) == CODE_FOR_nothing) + return false; if (op == COMPARE_BY_PIECES - && optab_handler (mov_optab, mode) != CODE_FOR_nothing - && can_compare_p (EQ, mode, ccp_jump)) - return true; + && !can_compare_p (EQ, mode, ccp_jump)) + return false; - return false; + return true; } /* Return the widest mode that can be used to perform part of an @@ -1035,7 +1038,7 @@ widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op) { if (GET_MODE_SIZE (candidate) >= size) break; - if (qi_vector_mode_supported_p (candidate, op)) + if (mode_supported_p (candidate, op)) result = candidate; } @@ -1049,7 +1052,7 @@ widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op) { mode = tmode.require (); if (GET_MODE_SIZE (mode) < size - && targetm.scalar_mode_supported_p (mode)) + && mode_supported_p (mode, op)) result = mode; } @@ -1454,7 +1457,7 @@ op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) break; if (GET_MODE_SIZE (candidate) >= size - && qi_vector_mode_supported_p (candidate, m_op)) + && mode_supported_p (candidate, m_op)) return candidate; } }
HAO CHEN GUI <guihaoc@linux.ibm.com> writes: > Hi Haochen, > The regression cases are caused by "targetm.scalar_mode_supported_p" I added > for scalar mode checking. XImode, OImode and TImode (with -m32) are not > enabled in ix86_scalar_mode_supported_p. So they're excluded from by pieces > operations on i386. > > The original code doesn't do a check for scalar modes. I think it might be > incorrect as not all scalar modes support move and compare optabs. (e.g. > TImode with -m32 on rs6000). > > I drafted a new patch to manually check optabs for scalar mode. Now both > vector and scalar modes are checked for optabs. > > I did a simple test. All former regression cases are back. Could you help do > a full regression test? I am worry about the coverage of my CI system. Thanks for the quick fix. The patch LGTM FWIW. Just a small suggestion for the function name: > > Thanks > Gui Haochen > > patch.diff > diff --git a/gcc/expr.cc b/gcc/expr.cc > index 7aac575eff8..2af9fcbed18 100644 > --- a/gcc/expr.cc > +++ b/gcc/expr.cc > @@ -1000,18 +1000,21 @@ can_use_qi_vectors (by_pieces_operation op) > /* Return true if optabs exists for the mode and certain by pieces > operations. */ > static bool > -qi_vector_mode_supported_p (fixed_size_mode mode, by_pieces_operation op) > +mode_supported_p (fixed_size_mode mode, by_pieces_operation op) Might be worth calling this something more specific, such as by_pieces_mode_supported_p. Otherwise the patch is OK for trunk if it passes the x86 testing. Thanks, Richard > { > + if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) > + return false; > + > if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) > - && optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing) > - return true; > + && VECTOR_MODE_P (mode) > + && optab_handler (vec_duplicate_optab, mode) == CODE_FOR_nothing) > + return false; > > if (op == COMPARE_BY_PIECES > - && optab_handler (mov_optab, mode) != CODE_FOR_nothing > - && can_compare_p (EQ, mode, ccp_jump)) > - return true; > + && !can_compare_p (EQ, mode, ccp_jump)) > + return false; > > - return false; > + return true; > } > > /* Return the widest mode that can be used to perform part of an > @@ -1035,7 +1038,7 @@ widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op) > { > if (GET_MODE_SIZE (candidate) >= size) > break; > - if (qi_vector_mode_supported_p (candidate, op)) > + if (mode_supported_p (candidate, op)) > result = candidate; > } > > @@ -1049,7 +1052,7 @@ widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op) > { > mode = tmode.require (); > if (GET_MODE_SIZE (mode) < size > - && targetm.scalar_mode_supported_p (mode)) > + && mode_supported_p (mode, op)) > result = mode; > } > > @@ -1454,7 +1457,7 @@ op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) > break; > > if (GET_MODE_SIZE (candidate) >= size > - && qi_vector_mode_supported_p (candidate, m_op)) > + && mode_supported_p (candidate, m_op)) > return candidate; > } > }
> -----Original Message----- > From: Richard Sandiford <richard.sandiford@arm.com> > Sent: Wednesday, October 25, 2023 4:40 PM > To: HAO CHEN GUI <guihaoc@linux.ibm.com> > Cc: Jiang, Haochen <haochen.jiang@intel.com>; gcc-patches <gcc- > patches@gcc.gnu.org> > Subject: Re: [PATCH-1v4, expand] Enable vector mode for compare_by_pieces > [PR111449] > > HAO CHEN GUI <guihaoc@linux.ibm.com> writes: > > Hi Haochen, > > The regression cases are caused by "targetm.scalar_mode_supported_p" > > I added for scalar mode checking. XImode, OImode and TImode (with > > -m32) are not enabled in ix86_scalar_mode_supported_p. So they're > > excluded from by pieces operations on i386. > > > > The original code doesn't do a check for scalar modes. I think it > > might be incorrect as not all scalar modes support move and compare optabs. (e.g. > > TImode with -m32 on rs6000). > > > > I drafted a new patch to manually check optabs for scalar mode. Now > > both vector and scalar modes are checked for optabs. > > > > I did a simple test. All former regression cases are back. Could you > > help do a full regression test? I am worry about the coverage of my CI system. Thanks for that. I am running the regression test now. Thx, Haochen > > Thanks for the quick fix. The patch LGTM FWIW. Just a small suggestion for > the function name: > > > > > Thanks > > Gui Haochen > > > > patch.diff > > diff --git a/gcc/expr.cc b/gcc/expr.cc index 7aac575eff8..2af9fcbed18 > > 100644 > > --- a/gcc/expr.cc > > +++ b/gcc/expr.cc > > @@ -1000,18 +1000,21 @@ can_use_qi_vectors (by_pieces_operation op) > > /* Return true if optabs exists for the mode and certain by pieces > > operations. */ > > static bool > > -qi_vector_mode_supported_p (fixed_size_mode mode, > by_pieces_operation > > op) > > +mode_supported_p (fixed_size_mode mode, by_pieces_operation op) > > Might be worth calling this something more specific, such as > by_pieces_mode_supported_p. > > Otherwise the patch is OK for trunk if it passes the x86 testing. > > Thanks, > Richard > > > { > > + if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) > > + return false; > > + > > if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) > > - && optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing) > > - return true; > > + && VECTOR_MODE_P (mode) > > + && optab_handler (vec_duplicate_optab, mode) == > CODE_FOR_nothing) > > + return false; > > > > if (op == COMPARE_BY_PIECES > > - && optab_handler (mov_optab, mode) != CODE_FOR_nothing > > - && can_compare_p (EQ, mode, ccp_jump)) > > - return true; > > + && !can_compare_p (EQ, mode, ccp_jump)) > > + return false; > > > > - return false; > > + return true; > > } > > > > /* Return the widest mode that can be used to perform part of an @@ > > -1035,7 +1038,7 @@ widest_fixed_size_mode_for_size (unsigned int size, > by_pieces_operation op) > > { > > if (GET_MODE_SIZE (candidate) >= size) > > break; > > - if (qi_vector_mode_supported_p (candidate, op)) > > + if (mode_supported_p (candidate, op)) > > result = candidate; > > } > > > > @@ -1049,7 +1052,7 @@ widest_fixed_size_mode_for_size (unsigned int > size, by_pieces_operation op) > > { > > mode = tmode.require (); > > if (GET_MODE_SIZE (mode) < size > > - && targetm.scalar_mode_supported_p (mode)) > > + && mode_supported_p (mode, op)) > > result = mode; > > } > > > > @@ -1454,7 +1457,7 @@ > op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) > > break; > > > > if (GET_MODE_SIZE (candidate) >= size > > - && qi_vector_mode_supported_p (candidate, m_op)) > > + && mode_supported_p (candidate, m_op)) > > return candidate; > > } > > }
> -----Original Message----- > From: Jiang, Haochen > Sent: Wednesday, October 25, 2023 4:47 PM > To: Richard Sandiford <richard.sandiford@arm.com>; HAO CHEN GUI > <guihaoc@linux.ibm.com> > Cc: gcc-patches <gcc-patches@gcc.gnu.org> > Subject: RE: [PATCH-1v4, expand] Enable vector mode for compare_by_pieces > [PR111449] > > > -----Original Message----- > > From: Richard Sandiford <richard.sandiford@arm.com> > > Sent: Wednesday, October 25, 2023 4:40 PM > > To: HAO CHEN GUI <guihaoc@linux.ibm.com> > > Cc: Jiang, Haochen <haochen.jiang@intel.com>; gcc-patches <gcc- > > patches@gcc.gnu.org> > > Subject: Re: [PATCH-1v4, expand] Enable vector mode for > > compare_by_pieces [PR111449] > > > > HAO CHEN GUI <guihaoc@linux.ibm.com> writes: > > > Hi Haochen, > > > The regression cases are caused by "targetm.scalar_mode_supported_p" > > > I added for scalar mode checking. XImode, OImode and TImode (with > > > -m32) are not enabled in ix86_scalar_mode_supported_p. So they're > > > excluded from by pieces operations on i386. > > > > > > The original code doesn't do a check for scalar modes. I think it > > > might be incorrect as not all scalar modes support move and compare > optabs. (e.g. > > > TImode with -m32 on rs6000). > > > > > > I drafted a new patch to manually check optabs for scalar mode. > > > Now both vector and scalar modes are checked for optabs. > > > > > > I did a simple test. All former regression cases are back. Could > > > you help do a full regression test? I am worry about the coverage of my CI > system. > > Thanks for that. I am running the regression test now. The patch works. Thanks a lot! Sorry for the delay since my CI accidentally crashed. Thx, Haochen > > Thx, > Haochen > > > > > Thanks for the quick fix. The patch LGTM FWIW. Just a small > > suggestion for the function name: > > > > > > > > Thanks > > > Gui Haochen > > > > > > patch.diff > > > diff --git a/gcc/expr.cc b/gcc/expr.cc index > > > 7aac575eff8..2af9fcbed18 > > > 100644 > > > --- a/gcc/expr.cc > > > +++ b/gcc/expr.cc > > > @@ -1000,18 +1000,21 @@ can_use_qi_vectors (by_pieces_operation > op) > > > /* Return true if optabs exists for the mode and certain by pieces > > > operations. */ > > > static bool > > > -qi_vector_mode_supported_p (fixed_size_mode mode, > > by_pieces_operation > > > op) > > > +mode_supported_p (fixed_size_mode mode, by_pieces_operation op) > > > > Might be worth calling this something more specific, such as > > by_pieces_mode_supported_p. > > > > Otherwise the patch is OK for trunk if it passes the x86 testing. > > > > Thanks, > > Richard > > > > > { > > > + if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) > > > + return false; > > > + > > > if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) > > > - && optab_handler (vec_duplicate_optab, mode) != > CODE_FOR_nothing) > > > - return true; > > > + && VECTOR_MODE_P (mode) > > > + && optab_handler (vec_duplicate_optab, mode) == > > CODE_FOR_nothing) > > > + return false; > > > > > > if (op == COMPARE_BY_PIECES > > > - && optab_handler (mov_optab, mode) != CODE_FOR_nothing > > > - && can_compare_p (EQ, mode, ccp_jump)) > > > - return true; > > > + && !can_compare_p (EQ, mode, ccp_jump)) > > > + return false; > > > > > > - return false; > > > + return true; > > > } > > > > > > /* Return the widest mode that can be used to perform part of an @@ > > > -1035,7 +1038,7 @@ widest_fixed_size_mode_for_size (unsigned int > > > size, > > by_pieces_operation op) > > > { > > > if (GET_MODE_SIZE (candidate) >= size) > > > break; > > > - if (qi_vector_mode_supported_p (candidate, op)) > > > + if (mode_supported_p (candidate, op)) > > > result = candidate; > > > } > > > > > > @@ -1049,7 +1052,7 @@ widest_fixed_size_mode_for_size (unsigned int > > size, by_pieces_operation op) > > > { > > > mode = tmode.require (); > > > if (GET_MODE_SIZE (mode) < size > > > - && targetm.scalar_mode_supported_p (mode)) > > > + && mode_supported_p (mode, op)) > > > result = mode; > > > } > > > > > > @@ -1454,7 +1457,7 @@ > > op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) > > > break; > > > > > > if (GET_MODE_SIZE (candidate) >= size > > > - && qi_vector_mode_supported_p (candidate, m_op)) > > > + && mode_supported_p (candidate, m_op)) > > > return candidate; > > > } > > > }
diff --git a/gcc/expr.cc b/gcc/expr.cc index 2c9930ec674..ad5f9dd8ec2 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -988,18 +988,44 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align) return align; } -/* Return the widest QI vector, if QI_MODE is true, or integer mode - that is narrower than SIZE bytes. */ +/* Return true if we know how to implement OP using vectors of bytes. */ +static bool +can_use_qi_vectors (by_pieces_operation op) +{ + return (op == COMPARE_BY_PIECES + || op == SET_BY_PIECES + || op == CLEAR_BY_PIECES); +} + +/* Return true if optabs exists for the mode and certain by pieces + operations. */ +static bool +qi_vector_mode_supported_p (fixed_size_mode mode, by_pieces_operation op) +{ + if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) + && optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing) + return true; + + if (op == COMPARE_BY_PIECES + && optab_handler (mov_optab, mode) != CODE_FOR_nothing + && can_compare_p (EQ, mode, ccp_jump)) + return true; + return false; +} + +/* Return the widest mode that can be used to perform part of an + operation OP on SIZE bytes. Try to use QI vector modes where + possible. */ static fixed_size_mode -widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) +widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op) { fixed_size_mode result = NARROWEST_INT_MODE; gcc_checking_assert (size > 1); /* Use QI vector only if size is wider than a WORD. */ - if (qi_vector && size > UNITS_PER_WORD) + if (can_use_qi_vectors (op) && size > UNITS_PER_WORD) { machine_mode mode; fixed_size_mode candidate; @@ -1009,8 +1035,7 @@ widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) { if (GET_MODE_SIZE (candidate) >= size) break; - if (optab_handler (vec_duplicate_optab, candidate) - != CODE_FOR_nothing) + if (qi_vector_mode_supported_p (candidate, op)) result = candidate; } @@ -1019,9 +1044,14 @@ widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) } opt_scalar_int_mode tmode; + scalar_int_mode mode; FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) - if (GET_MODE_SIZE (tmode.require ()) < size) - result = tmode.require (); + { + mode = tmode.require (); + if (GET_MODE_SIZE (mode) < size + && targetm.scalar_mode_supported_p (mode)) + result = mode; + } return result; } @@ -1061,8 +1091,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align, { /* NB: Round up L and ALIGN to the widest integer mode for MAX_SIZE. */ - mode = widest_fixed_size_mode_for_size (max_size, - op == SET_BY_PIECES); + mode = widest_fixed_size_mode_for_size (max_size, op); if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) { unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode)); @@ -1076,8 +1105,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align, while (max_size > 1 && l > 0) { - mode = widest_fixed_size_mode_for_size (max_size, - op == SET_BY_PIECES); + mode = widest_fixed_size_mode_for_size (max_size, op); enum insn_code icode; unsigned int modesize = GET_MODE_SIZE (mode); @@ -1317,8 +1345,8 @@ class op_by_pieces_d bool m_push; /* True if targetm.overlap_op_by_pieces_p () returns true. */ bool m_overlap_op_by_pieces; - /* True if QI vector mode can be used. */ - bool m_qi_vector_mode; + /* The type of operation that we're performing. */ + by_pieces_operation m_op; /* Virtual functions, overriden by derived classes for the specific operation. */ @@ -1331,7 +1359,7 @@ class op_by_pieces_d public: op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn, void *, unsigned HOST_WIDE_INT, unsigned int, bool, - bool = false); + by_pieces_operation); void run (); }; @@ -1349,11 +1377,11 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to, void *from_cfn_data, unsigned HOST_WIDE_INT len, unsigned int align, bool push, - bool qi_vector_mode) + by_pieces_operation op) : m_to (to, to_load, NULL, NULL), m_from (from, from_load, from_cfn, from_cfn_data), m_len (len), m_max_size (max_pieces + 1), - m_push (push), m_qi_vector_mode (qi_vector_mode) + m_push (push), m_op (op) { int toi = m_to.get_addr_inc (); int fromi = m_from.get_addr_inc (); @@ -1375,8 +1403,7 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to, { /* Find the mode of the largest comparison. */ fixed_size_mode mode - = widest_fixed_size_mode_for_size (m_max_size, - m_qi_vector_mode); + = widest_fixed_size_mode_for_size (m_max_size, m_op); m_from.decide_autoinc (mode, m_reverse, len); m_to.decide_autoinc (mode, m_reverse, len); @@ -1401,7 +1428,7 @@ op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len) if (len >= size && prepare_mode (mode, m_align)) break; /* widest_fixed_size_mode_for_size checks SIZE > 1. */ - mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); + mode = widest_fixed_size_mode_for_size (size, m_op); } while (1); return mode; @@ -1414,7 +1441,7 @@ fixed_size_mode op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) { /* Use QI vector only for > size of WORD. */ - if (m_qi_vector_mode && size > UNITS_PER_WORD) + if (can_use_qi_vectors (m_op) && size > UNITS_PER_WORD) { machine_mode mode; fixed_size_mode candidate; @@ -1427,8 +1454,7 @@ op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) break; if (GET_MODE_SIZE (candidate) >= size - && (optab_handler (vec_duplicate_optab, candidate) - != CODE_FOR_nothing)) + && qi_vector_mode_supported_p (candidate, m_op)) return candidate; } } @@ -1451,7 +1477,7 @@ op_by_pieces_d::run () /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */ fixed_size_mode mode - = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode); + = widest_fixed_size_mode_for_size (m_max_size, m_op); mode = get_usable_mode (mode, length); by_pieces_prev to_prev = { nullptr, mode }; @@ -1516,8 +1542,7 @@ op_by_pieces_d::run () else { /* widest_fixed_size_mode_for_size checks SIZE > 1. */ - mode = widest_fixed_size_mode_for_size (size, - m_qi_vector_mode); + mode = widest_fixed_size_mode_for_size (size, m_op); mode = get_usable_mode (mode, length); } } @@ -1543,7 +1568,7 @@ class move_by_pieces_d : public op_by_pieces_d move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, unsigned int align) : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL, - NULL, len, align, PUSHG_P (to)) + NULL, len, align, PUSHG_P (to), MOVE_BY_PIECES) { } rtx finish_retmode (memop_ret); @@ -1632,15 +1657,16 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len, class store_by_pieces_d : public op_by_pieces_d { insn_gen_fn m_gen_fun; + void generate (rtx, rtx, machine_mode) final override; bool prepare_mode (machine_mode, unsigned int) final override; public: store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, unsigned HOST_WIDE_INT len, unsigned int align, - bool qi_vector_mode) + by_pieces_operation op) : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn, - cfn_data, len, align, false, qi_vector_mode) + cfn_data, len, align, false, op) { } rtx finish_retmode (memop_ret); @@ -1729,8 +1755,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, max_size = STORE_MAX_PIECES + 1; while (max_size > 1 && l > 0) { - fixed_size_mode mode - = widest_fixed_size_mode_for_size (max_size, memsetp); + auto op = memsetp ? SET_BY_PIECES : STORE_BY_PIECES; + auto mode = widest_fixed_size_mode_for_size (max_size, op); icode = optab_handler (mov_optab, mode); if (icode != CODE_FOR_nothing @@ -1793,7 +1819,7 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, optimize_insn_for_speed_p ())); store_by_pieces_d data (to, constfun, constfundata, len, align, - memsetp); + memsetp ? SET_BY_PIECES : STORE_BY_PIECES); data.run (); if (retmode != RETURN_BEGIN) @@ -1814,7 +1840,7 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align) /* Use builtin_memset_read_str to support vector mode broadcast. */ char c = 0; store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align, - true); + CLEAR_BY_PIECES); data.run (); } @@ -1832,12 +1858,13 @@ class compare_by_pieces_d : public op_by_pieces_d void generate (rtx, rtx, machine_mode) final override; bool prepare_mode (machine_mode, unsigned int) final override; void finish_mode (machine_mode) final override; + public: compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, void *op1_cfn_data, HOST_WIDE_INT len, int align, rtx_code_label *fail_label) : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn, - op1_cfn_data, len, align, false) + op1_cfn_data, len, align, false, COMPARE_BY_PIECES) { m_fail_label = fail_label; }