Message ID | 20231106122643.3639195-1-juzhe.zhong@rivai.ai |
---|---|
State | New |
Headers | show |
Series | RISC-V: Early expand DImode vec_duplicate in RV32 system | expand |
Could you add a testcase? other than that LGTM. On Mon, Nov 6, 2023 at 8:27 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote: > > An ICE was discovered in recent rounding autovec support: > > config/riscv/riscv-v.cc:4314 > 65 | } > | ^ > 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**, > rtx_def*, bool) > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314 > 0x1fb1aa2 pre_vsetvl::remove_avl_operand() > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342 > 0x1fb18c1 pre_vsetvl::cleaup() > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308 > 0x1fb216d pass_vsetvl::lazy_vsetvl() > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480 > 0x1fb2214 pass_vsetvl::execute(function*) > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504 > > The root cause is that the RA reload into (set (reg) vec_duplicate:DI). However, it is not valid in RV32 system > since we don't have a single broadcast instruction DI scalar in RV32 system. > We should expand it early for RV32 system. > > gcc/ChangeLog: > > * config/riscv/predicates.md: Refine predicate. > * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function. > * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto. > * config/riscv/vector.md (vec_duplicate<mode>): New pattern. > (*vec_duplicate<mode>): Adapt pattern. > > --- > gcc/config/riscv/predicates.md | 9 +-------- > gcc/config/riscv/riscv-protos.h | 1 + > gcc/config/riscv/riscv-v.cc | 20 ++++++++++++++++++++ > gcc/config/riscv/vector.md | 20 +++++++++++++++++++- > 4 files changed, 41 insertions(+), 9 deletions(-) > > diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md > index db18054607f..df1c66f3a76 100644 > --- a/gcc/config/riscv/predicates.md > +++ b/gcc/config/riscv/predicates.md > @@ -553,14 +553,7 @@ > > ;; The scalar operand can be directly broadcast by RVV instructions. > (define_predicate "direct_broadcast_operand" > - (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op)) > - && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op) > - || rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))) > - && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE (Pmode)))") > - (ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))") > - (ior (match_code "const_int,const_poly_int") > - (ior (match_operand 0 "register_operand") > - (match_test "satisfies_constraint_Wdm (op)")))))) > + (match_test "riscv_vector::can_be_broadcasted_p (op)")) > > ;; A CONST_INT operand that has exactly two bits cleared. > (define_predicate "const_nottwobits_operand" > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > index 6cbf2130f88..acae00f653f 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *); > enum vlmul_type get_vlmul (rtx_insn *); > int count_regno_occurrences (rtx_insn *, unsigned int); > bool imm_avl_p (machine_mode); > +bool can_be_broadcasted_p (rtx); > } > > /* We classify builtin types into two classes: > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 80d2bb9e289..a64946213c3 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -4417,4 +4417,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) > return count; > } > > +/* Return true if the OP can be directly broadcasted. */ > +bool > +can_be_broadcasted_p (rtx op) > +{ > + machine_mode mode = GET_MODE (op); > + /* We don't allow RA (register allocation) reload generate > + (vec_duplicate:DI reg) in RV32 system wheras we allow > + (vec_duplicate:DI mem) in RV32 system. */ > + if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode) > + && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)) > + && !satisfies_constraint_Wdm (op)) > + return false; > + > + if (satisfies_constraint_K (op) || register_operand (op, mode) > + || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode))) > + return true; > + > + return can_create_pseudo_p () && nonmemory_operand (op, mode); > +} > + > } // namespace riscv_vector > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index 8509c4fe5f2..e23f64938b7 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -1370,11 +1370,29 @@ > ;; ---- Duplicate Operations > ;; ----------------------------------------------------------------- > > +(define_expand "vec_duplicate<mode>" > + [(set (match_operand:V_VLS 0 "register_operand") > + (vec_duplicate:V_VLS > + (match_operand:<VEL> 1 "direct_broadcast_operand")))] > + "TARGET_VECTOR" > + { > + /* Early expand DImode broadcast in RV32 system to avoid RA reload > + generate (set (reg) (vec_duplicate:DI)). */ > + if (maybe_gt (GET_MODE_SIZE (<VEL>mode), GET_MODE_SIZE (Pmode))) > + { > + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), > + riscv_vector::UNARY_OP, operands); > + DONE; > + } > + /* Otherwise, allow it fall into general vec_duplicate pattern > + which allow us to have vv->vx combine optimization in later pass. */ > + }) > + > ;; According to GCC internal: > ;; This pattern only handles duplicates of non-constant inputs. > ;; Constant vectors go through the movm pattern instead. > ;; So "direct_broadcast_operand" can only be mem or reg, no CONSTANT. > -(define_insn_and_split "vec_duplicate<mode>" > +(define_insn_and_split "*vec_duplicate<mode>" > [(set (match_operand:V_VLS 0 "register_operand") > (vec_duplicate:V_VLS > (match_operand:<VEL> 1 "direct_broadcast_operand")))] > -- > 2.36.3 >
Testcase already existed on the trunk, which is added by Li Pan added recently when supporting rounding mode autovec. https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635280.html math-llrintf-run-0.c passed on RV64 but cause ICE on RV32. juzhe.zhong@rivai.ai From: Kito Cheng Date: 2023-11-06 20:38 To: Juzhe-Zhong CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc Subject: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system Could you add a testcase? other than that LGTM. On Mon, Nov 6, 2023 at 8:27 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote: > > An ICE was discovered in recent rounding autovec support: > > config/riscv/riscv-v.cc:4314 > 65 | } > | ^ > 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**, > rtx_def*, bool) > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314 > 0x1fb1aa2 pre_vsetvl::remove_avl_operand() > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342 > 0x1fb18c1 pre_vsetvl::cleaup() > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308 > 0x1fb216d pass_vsetvl::lazy_vsetvl() > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480 > 0x1fb2214 pass_vsetvl::execute(function*) > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504 > > The root cause is that the RA reload into (set (reg) vec_duplicate:DI). However, it is not valid in RV32 system > since we don't have a single broadcast instruction DI scalar in RV32 system. > We should expand it early for RV32 system. > > gcc/ChangeLog: > > * config/riscv/predicates.md: Refine predicate. > * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function. > * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto. > * config/riscv/vector.md (vec_duplicate<mode>): New pattern. > (*vec_duplicate<mode>): Adapt pattern. > > --- > gcc/config/riscv/predicates.md | 9 +-------- > gcc/config/riscv/riscv-protos.h | 1 + > gcc/config/riscv/riscv-v.cc | 20 ++++++++++++++++++++ > gcc/config/riscv/vector.md | 20 +++++++++++++++++++- > 4 files changed, 41 insertions(+), 9 deletions(-) > > diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md > index db18054607f..df1c66f3a76 100644 > --- a/gcc/config/riscv/predicates.md > +++ b/gcc/config/riscv/predicates.md > @@ -553,14 +553,7 @@ > > ;; The scalar operand can be directly broadcast by RVV instructions. > (define_predicate "direct_broadcast_operand" > - (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op)) > - && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op) > - || rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))) > - && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE (Pmode)))") > - (ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))") > - (ior (match_code "const_int,const_poly_int") > - (ior (match_operand 0 "register_operand") > - (match_test "satisfies_constraint_Wdm (op)")))))) > + (match_test "riscv_vector::can_be_broadcasted_p (op)")) > > ;; A CONST_INT operand that has exactly two bits cleared. > (define_predicate "const_nottwobits_operand" > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > index 6cbf2130f88..acae00f653f 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *); > enum vlmul_type get_vlmul (rtx_insn *); > int count_regno_occurrences (rtx_insn *, unsigned int); > bool imm_avl_p (machine_mode); > +bool can_be_broadcasted_p (rtx); > } > > /* We classify builtin types into two classes: > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 80d2bb9e289..a64946213c3 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -4417,4 +4417,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) > return count; > } > > +/* Return true if the OP can be directly broadcasted. */ > +bool > +can_be_broadcasted_p (rtx op) > +{ > + machine_mode mode = GET_MODE (op); > + /* We don't allow RA (register allocation) reload generate > + (vec_duplicate:DI reg) in RV32 system wheras we allow > + (vec_duplicate:DI mem) in RV32 system. */ > + if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode) > + && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)) > + && !satisfies_constraint_Wdm (op)) > + return false; > + > + if (satisfies_constraint_K (op) || register_operand (op, mode) > + || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode))) > + return true; > + > + return can_create_pseudo_p () && nonmemory_operand (op, mode); > +} > + > } // namespace riscv_vector > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index 8509c4fe5f2..e23f64938b7 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -1370,11 +1370,29 @@ > ;; ---- Duplicate Operations > ;; ----------------------------------------------------------------- > > +(define_expand "vec_duplicate<mode>" > + [(set (match_operand:V_VLS 0 "register_operand") > + (vec_duplicate:V_VLS > + (match_operand:<VEL> 1 "direct_broadcast_operand")))] > + "TARGET_VECTOR" > + { > + /* Early expand DImode broadcast in RV32 system to avoid RA reload > + generate (set (reg) (vec_duplicate:DI)). */ > + if (maybe_gt (GET_MODE_SIZE (<VEL>mode), GET_MODE_SIZE (Pmode))) > + { > + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), > + riscv_vector::UNARY_OP, operands); > + DONE; > + } > + /* Otherwise, allow it fall into general vec_duplicate pattern > + which allow us to have vv->vx combine optimization in later pass. */ > + }) > + > ;; According to GCC internal: > ;; This pattern only handles duplicates of non-constant inputs. > ;; Constant vectors go through the movm pattern instead. > ;; So "direct_broadcast_operand" can only be mem or reg, no CONSTANT. > -(define_insn_and_split "vec_duplicate<mode>" > +(define_insn_and_split "*vec_duplicate<mode>" > [(set (match_operand:V_VLS 0 "register_operand") > (vec_duplicate:V_VLS > (match_operand:<VEL> 1 "direct_broadcast_operand")))] > -- > 2.36.3 >
I would prefer to add a dedicated test case to test that, so that we could also cover that even if we didn't enable multi-lib testing for RV32, and I suppose that should only require compile test for part of that test case ? On Mon, Nov 6, 2023 at 8:41 PM juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> wrote: > > Testcase already existed on the trunk, which is added by Li Pan added recently when supporting rounding mode autovec. > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635280.html > > math-llrintf-run-0.c passed on RV64 but cause ICE on RV32. > > > > ________________________________ > juzhe.zhong@rivai.ai > > > From: Kito Cheng > Date: 2023-11-06 20:38 > To: Juzhe-Zhong > CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc > Subject: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system > Could you add a testcase? other than that LGTM. > > On Mon, Nov 6, 2023 at 8:27 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote: > > > > An ICE was discovered in recent rounding autovec support: > > > > config/riscv/riscv-v.cc:4314 > > 65 | } > > | ^ > > 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**, > > rtx_def*, bool) > > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314 > > 0x1fb1aa2 pre_vsetvl::remove_avl_operand() > > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342 > > 0x1fb18c1 pre_vsetvl::cleaup() > > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308 > > 0x1fb216d pass_vsetvl::lazy_vsetvl() > > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480 > > 0x1fb2214 pass_vsetvl::execute(function*) > > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504 > > > > The root cause is that the RA reload into (set (reg) vec_duplicate:DI). However, it is not valid in RV32 system > > since we don't have a single broadcast instruction DI scalar in RV32 system. > > We should expand it early for RV32 system. > > > > gcc/ChangeLog: > > > > * config/riscv/predicates.md: Refine predicate. > > * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function. > > * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto. > > * config/riscv/vector.md (vec_duplicate<mode>): New pattern. > > (*vec_duplicate<mode>): Adapt pattern. > > > > --- > > gcc/config/riscv/predicates.md | 9 +-------- > > gcc/config/riscv/riscv-protos.h | 1 + > > gcc/config/riscv/riscv-v.cc | 20 ++++++++++++++++++++ > > gcc/config/riscv/vector.md | 20 +++++++++++++++++++- > > 4 files changed, 41 insertions(+), 9 deletions(-) > > > > diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md > > index db18054607f..df1c66f3a76 100644 > > --- a/gcc/config/riscv/predicates.md > > +++ b/gcc/config/riscv/predicates.md > > @@ -553,14 +553,7 @@ > > > > ;; The scalar operand can be directly broadcast by RVV instructions. > > (define_predicate "direct_broadcast_operand" > > - (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op)) > > - && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op) > > - || rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))) > > - && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE (Pmode)))") > > - (ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))") > > - (ior (match_code "const_int,const_poly_int") > > - (ior (match_operand 0 "register_operand") > > - (match_test "satisfies_constraint_Wdm (op)")))))) > > + (match_test "riscv_vector::can_be_broadcasted_p (op)")) > > > > ;; A CONST_INT operand that has exactly two bits cleared. > > (define_predicate "const_nottwobits_operand" > > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > > index 6cbf2130f88..acae00f653f 100644 > > --- a/gcc/config/riscv/riscv-protos.h > > +++ b/gcc/config/riscv/riscv-protos.h > > @@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *); > > enum vlmul_type get_vlmul (rtx_insn *); > > int count_regno_occurrences (rtx_insn *, unsigned int); > > bool imm_avl_p (machine_mode); > > +bool can_be_broadcasted_p (rtx); > > } > > > > /* We classify builtin types into two classes: > > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > > index 80d2bb9e289..a64946213c3 100644 > > --- a/gcc/config/riscv/riscv-v.cc > > +++ b/gcc/config/riscv/riscv-v.cc > > @@ -4417,4 +4417,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) > > return count; > > } > > > > +/* Return true if the OP can be directly broadcasted. */ > > +bool > > +can_be_broadcasted_p (rtx op) > > +{ > > + machine_mode mode = GET_MODE (op); > > + /* We don't allow RA (register allocation) reload generate > > + (vec_duplicate:DI reg) in RV32 system wheras we allow > > + (vec_duplicate:DI mem) in RV32 system. */ > > + if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode) > > + && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)) > > + && !satisfies_constraint_Wdm (op)) > > + return false; > > + > > + if (satisfies_constraint_K (op) || register_operand (op, mode) > > + || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode))) > > + return true; > > + > > + return can_create_pseudo_p () && nonmemory_operand (op, mode); > > +} > > + > > } // namespace riscv_vector > > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > > index 8509c4fe5f2..e23f64938b7 100644 > > --- a/gcc/config/riscv/vector.md > > +++ b/gcc/config/riscv/vector.md > > @@ -1370,11 +1370,29 @@ > > ;; ---- Duplicate Operations > > ;; ----------------------------------------------------------------- > > > > +(define_expand "vec_duplicate<mode>" > > + [(set (match_operand:V_VLS 0 "register_operand") > > + (vec_duplicate:V_VLS > > + (match_operand:<VEL> 1 "direct_broadcast_operand")))] > > + "TARGET_VECTOR" > > + { > > + /* Early expand DImode broadcast in RV32 system to avoid RA reload > > + generate (set (reg) (vec_duplicate:DI)). */ > > + if (maybe_gt (GET_MODE_SIZE (<VEL>mode), GET_MODE_SIZE (Pmode))) > > + { > > + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), > > + riscv_vector::UNARY_OP, operands); > > + DONE; > > + } > > + /* Otherwise, allow it fall into general vec_duplicate pattern > > + which allow us to have vv->vx combine optimization in later pass. */ > > + }) > > + > > ;; According to GCC internal: > > ;; This pattern only handles duplicates of non-constant inputs. > > ;; Constant vectors go through the movm pattern instead. > > ;; So "direct_broadcast_operand" can only be mem or reg, no CONSTANT. > > -(define_insn_and_split "vec_duplicate<mode>" > > +(define_insn_and_split "*vec_duplicate<mode>" > > [(set (match_operand:V_VLS 0 "register_operand") > > (vec_duplicate:V_VLS > > (match_operand:<VEL> 1 "direct_broadcast_operand")))] > > -- > > 2.36.3 > > >
Committed with adding testcase as you suggested in V2: [PATCH V2] RISC-V: Early expand DImode vec_duplicate in RV32 system (gnu.org) juzhe.zhong@rivai.ai From: Kito Cheng Date: 2023-11-06 20:46 To: juzhe.zhong@rivai.ai CC: kito.cheng; gcc-patches; jeffreyalaw; Robin Dapp Subject: Re: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system I would prefer to add a dedicated test case to test that, so that we could also cover that even if we didn't enable multi-lib testing for RV32, and I suppose that should only require compile test for part of that test case ? On Mon, Nov 6, 2023 at 8:41 PM juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> wrote: > > Testcase already existed on the trunk, which is added by Li Pan added recently when supporting rounding mode autovec. > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635280.html > > math-llrintf-run-0.c passed on RV64 but cause ICE on RV32. > > > > ________________________________ > juzhe.zhong@rivai.ai > > > From: Kito Cheng > Date: 2023-11-06 20:38 > To: Juzhe-Zhong > CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc > Subject: Re: [PATCH] RISC-V: Early expand DImode vec_duplicate in RV32 system > Could you add a testcase? other than that LGTM. > > On Mon, Nov 6, 2023 at 8:27 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote: > > > > An ICE was discovered in recent rounding autovec support: > > > > config/riscv/riscv-v.cc:4314 > > 65 | } > > | ^ > > 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**, > > rtx_def*, bool) > > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314 > > 0x1fb1aa2 pre_vsetvl::remove_avl_operand() > > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342 > > 0x1fb18c1 pre_vsetvl::cleaup() > > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308 > > 0x1fb216d pass_vsetvl::lazy_vsetvl() > > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480 > > 0x1fb2214 pass_vsetvl::execute(function*) > > /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504 > > > > The root cause is that the RA reload into (set (reg) vec_duplicate:DI). However, it is not valid in RV32 system > > since we don't have a single broadcast instruction DI scalar in RV32 system. > > We should expand it early for RV32 system. > > > > gcc/ChangeLog: > > > > * config/riscv/predicates.md: Refine predicate. > > * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function. > > * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto. > > * config/riscv/vector.md (vec_duplicate<mode>): New pattern. > > (*vec_duplicate<mode>): Adapt pattern. > > > > --- > > gcc/config/riscv/predicates.md | 9 +-------- > > gcc/config/riscv/riscv-protos.h | 1 + > > gcc/config/riscv/riscv-v.cc | 20 ++++++++++++++++++++ > > gcc/config/riscv/vector.md | 20 +++++++++++++++++++- > > 4 files changed, 41 insertions(+), 9 deletions(-) > > > > diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md > > index db18054607f..df1c66f3a76 100644 > > --- a/gcc/config/riscv/predicates.md > > +++ b/gcc/config/riscv/predicates.md > > @@ -553,14 +553,7 @@ > > > > ;; The scalar operand can be directly broadcast by RVV instructions. > > (define_predicate "direct_broadcast_operand" > > - (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op)) > > - && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op) > > - || rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))) > > - && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE (Pmode)))") > > - (ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))") > > - (ior (match_code "const_int,const_poly_int") > > - (ior (match_operand 0 "register_operand") > > - (match_test "satisfies_constraint_Wdm (op)")))))) > > + (match_test "riscv_vector::can_be_broadcasted_p (op)")) > > > > ;; A CONST_INT operand that has exactly two bits cleared. > > (define_predicate "const_nottwobits_operand" > > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > > index 6cbf2130f88..acae00f653f 100644 > > --- a/gcc/config/riscv/riscv-protos.h > > +++ b/gcc/config/riscv/riscv-protos.h > > @@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *); > > enum vlmul_type get_vlmul (rtx_insn *); > > int count_regno_occurrences (rtx_insn *, unsigned int); > > bool imm_avl_p (machine_mode); > > +bool can_be_broadcasted_p (rtx); > > } > > > > /* We classify builtin types into two classes: > > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > > index 80d2bb9e289..a64946213c3 100644 > > --- a/gcc/config/riscv/riscv-v.cc > > +++ b/gcc/config/riscv/riscv-v.cc > > @@ -4417,4 +4417,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) > > return count; > > } > > > > +/* Return true if the OP can be directly broadcasted. */ > > +bool > > +can_be_broadcasted_p (rtx op) > > +{ > > + machine_mode mode = GET_MODE (op); > > + /* We don't allow RA (register allocation) reload generate > > + (vec_duplicate:DI reg) in RV32 system wheras we allow > > + (vec_duplicate:DI mem) in RV32 system. */ > > + if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode) > > + && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)) > > + && !satisfies_constraint_Wdm (op)) > > + return false; > > + > > + if (satisfies_constraint_K (op) || register_operand (op, mode) > > + || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode))) > > + return true; > > + > > + return can_create_pseudo_p () && nonmemory_operand (op, mode); > > +} > > + > > } // namespace riscv_vector > > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > > index 8509c4fe5f2..e23f64938b7 100644 > > --- a/gcc/config/riscv/vector.md > > +++ b/gcc/config/riscv/vector.md > > @@ -1370,11 +1370,29 @@ > > ;; ---- Duplicate Operations > > ;; ----------------------------------------------------------------- > > > > +(define_expand "vec_duplicate<mode>" > > + [(set (match_operand:V_VLS 0 "register_operand") > > + (vec_duplicate:V_VLS > > + (match_operand:<VEL> 1 "direct_broadcast_operand")))] > > + "TARGET_VECTOR" > > + { > > + /* Early expand DImode broadcast in RV32 system to avoid RA reload > > + generate (set (reg) (vec_duplicate:DI)). */ > > + if (maybe_gt (GET_MODE_SIZE (<VEL>mode), GET_MODE_SIZE (Pmode))) > > + { > > + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), > > + riscv_vector::UNARY_OP, operands); > > + DONE; > > + } > > + /* Otherwise, allow it fall into general vec_duplicate pattern > > + which allow us to have vv->vx combine optimization in later pass. */ > > + }) > > + > > ;; According to GCC internal: > > ;; This pattern only handles duplicates of non-constant inputs. > > ;; Constant vectors go through the movm pattern instead. > > ;; So "direct_broadcast_operand" can only be mem or reg, no CONSTANT. > > -(define_insn_and_split "vec_duplicate<mode>" > > +(define_insn_and_split "*vec_duplicate<mode>" > > [(set (match_operand:V_VLS 0 "register_operand") > > (vec_duplicate:V_VLS > > (match_operand:<VEL> 1 "direct_broadcast_operand")))] > > -- > > 2.36.3 > > >
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index db18054607f..df1c66f3a76 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -553,14 +553,7 @@ ;; The scalar operand can be directly broadcast by RVV instructions. (define_predicate "direct_broadcast_operand" - (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op)) - && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op) - || rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))) - && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE (Pmode)))") - (ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))") - (ior (match_code "const_int,const_poly_int") - (ior (match_operand 0 "register_operand") - (match_test "satisfies_constraint_Wdm (op)")))))) + (match_test "riscv_vector::can_be_broadcasted_p (op)")) ;; A CONST_INT operand that has exactly two bits cleared. (define_predicate "const_nottwobits_operand" diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 6cbf2130f88..acae00f653f 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -595,6 +595,7 @@ uint8_t get_sew (rtx_insn *); enum vlmul_type get_vlmul (rtx_insn *); int count_regno_occurrences (rtx_insn *, unsigned int); bool imm_avl_p (machine_mode); +bool can_be_broadcasted_p (rtx); } /* We classify builtin types into two classes: diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 80d2bb9e289..a64946213c3 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4417,4 +4417,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) return count; } +/* Return true if the OP can be directly broadcasted. */ +bool +can_be_broadcasted_p (rtx op) +{ + machine_mode mode = GET_MODE (op); + /* We don't allow RA (register allocation) reload generate + (vec_duplicate:DI reg) in RV32 system wheras we allow + (vec_duplicate:DI mem) in RV32 system. */ + if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode) + && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)) + && !satisfies_constraint_Wdm (op)) + return false; + + if (satisfies_constraint_K (op) || register_operand (op, mode) + || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode))) + return true; + + return can_create_pseudo_p () && nonmemory_operand (op, mode); +} + } // namespace riscv_vector diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 8509c4fe5f2..e23f64938b7 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1370,11 +1370,29 @@ ;; ---- Duplicate Operations ;; ----------------------------------------------------------------- +(define_expand "vec_duplicate<mode>" + [(set (match_operand:V_VLS 0 "register_operand") + (vec_duplicate:V_VLS + (match_operand:<VEL> 1 "direct_broadcast_operand")))] + "TARGET_VECTOR" + { + /* Early expand DImode broadcast in RV32 system to avoid RA reload + generate (set (reg) (vec_duplicate:DI)). */ + if (maybe_gt (GET_MODE_SIZE (<VEL>mode), GET_MODE_SIZE (Pmode))) + { + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), + riscv_vector::UNARY_OP, operands); + DONE; + } + /* Otherwise, allow it fall into general vec_duplicate pattern + which allow us to have vv->vx combine optimization in later pass. */ + }) + ;; According to GCC internal: ;; This pattern only handles duplicates of non-constant inputs. ;; Constant vectors go through the movm pattern instead. ;; So "direct_broadcast_operand" can only be mem or reg, no CONSTANT. -(define_insn_and_split "vec_duplicate<mode>" +(define_insn_and_split "*vec_duplicate<mode>" [(set (match_operand:V_VLS 0 "register_operand") (vec_duplicate:V_VLS (match_operand:<VEL> 1 "direct_broadcast_operand")))]