Message ID | 20240110050538.2465410-1-juzhe.zhong@rivai.ai |
---|---|
State | New |
Headers | show |
Series | RISC-V: Refine unsigned avg_floor/avg_ceil | expand |
LGTM! On Wed, Jan 10, 2024 at 1:05 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote: > > This patch is inspired by LLVM patches: > https://github.com/llvm/llvm-project/pull/76550 > https://github.com/llvm/llvm-project/pull/77473 > > Use vaaddu for AVG vectorization. > > Before this patch: > > vsetivli zero,8,e8,mf2,ta,ma > vle8.v v3,0(a1) > vle8.v v2,0(a2) > vwaddu.vv v1,v3,v2 > vsetvli zero,zero,e16,m1,ta,ma > vadd.vi v1,v1,1 > vsetvli zero,zero,e8,mf2,ta,ma > vnsrl.wi v1,v1,1 > vse8.v v1,0(a0) > ret > > After this patch: > > vsetivli zero,8,e8,mf2,ta,ma > csrwi vxrm,0 > vle8.v v1,0(a1) > vle8.v v2,0(a2) > vaaddu.vv v1,v1,v2 > vse8.v v1,0(a0) > ret > > Note on signed averaging addition > > Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. > But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. > Thus this patch only introduces vaaddu. > > More details in: > https://github.com/riscv/riscv-v-spec/issues/935 > https://github.com/riscv/riscv-v-spec/issues/934 > > Tested on both RV32 and RV64 no regression. > > Ok for trunk ? > > gcc/ChangeLog: > > * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. > (avg<v_double_trunc>3_floor): New pattern. > (<u>avg<v_double_trunc>3_ceil): Remove. > (avg<v_double_trunc>3_ceil): New pattern. > (uavg<mode>3_floor): Ditto. > (uavg<mode>3_ceil): Ditto. > * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. > (enum insn_type): Ditto. > * config/riscv/riscv-v.cc: Ditto. > * config/riscv/vector-iterators.md (ashiftrt): Remove. > (ASHIFTRT): Ditto. > * config/riscv/vector.md: Add VLS modes. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. > * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. > * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. > * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto. > > --- > gcc/config/riscv/autovec.md | 50 ++++++++++++++----- > gcc/config/riscv/riscv-protos.h | 8 +++ > gcc/config/riscv/riscv-v.cc | 11 ++++ > gcc/config/riscv/vector-iterators.md | 5 -- > gcc/config/riscv/vector.md | 12 ++--- > .../gcc.target/riscv/rvv/autovec/vls/avg-1.c | 4 +- > .../gcc.target/riscv/rvv/autovec/vls/avg-2.c | 4 +- > .../gcc.target/riscv/rvv/autovec/vls/avg-3.c | 4 +- > .../gcc.target/riscv/rvv/autovec/vls/avg-4.c | 6 +-- > .../gcc.target/riscv/rvv/autovec/vls/avg-5.c | 6 +-- > .../gcc.target/riscv/rvv/autovec/vls/avg-6.c | 6 +-- > .../riscv/rvv/autovec/widen/vec-avg-rv32gcv.c | 7 +-- > .../riscv/rvv/autovec/widen/vec-avg-rv64gcv.c | 7 +-- > 13 files changed, 86 insertions(+), 44 deletions(-) > > diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md > index 775eaa825b0..706cd9717cb 100644 > --- a/gcc/config/riscv/autovec.md > +++ b/gcc/config/riscv/autovec.md > @@ -2345,39 +2345,39 @@ > ;; op[0] = (narrow) ((wide) op[1] + (wide) op[2] + 1)) >> 1; > ;; ------------------------------------------------------------------------- > > -(define_expand "<u>avg<v_double_trunc>3_floor" > +(define_expand "avg<v_double_trunc>3_floor" > [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") > (truncate:<V_DOUBLE_TRUNC> > - (<ext_to_rshift>:VWEXTI > + (ashiftrt:VWEXTI > (plus:VWEXTI > - (any_extend:VWEXTI > + (sign_extend:VWEXTI > (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) > - (any_extend:VWEXTI > + (sign_extend:VWEXTI > (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))] > "TARGET_VECTOR" > { > /* First emit a widening addition. */ > rtx tmp1 = gen_reg_rtx (<MODE>mode); > rtx ops1[] = {tmp1, operands[1], operands[2]}; > - insn_code icode = code_for_pred_dual_widen (PLUS, <CODE>, <MODE>mode); > + insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode); > riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1); > > /* Then a narrowing shift. */ > rtx ops2[] = {operands[0], tmp1, const1_rtx}; > - icode = code_for_pred_narrow_scalar (<EXT_TO_RSHIFT>, <MODE>mode); > + icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode); > riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2); > DONE; > }) > > -(define_expand "<u>avg<v_double_trunc>3_ceil" > +(define_expand "avg<v_double_trunc>3_ceil" > [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") > (truncate:<V_DOUBLE_TRUNC> > - (<ext_to_rshift>:VWEXTI > + (ashiftrt:VWEXTI > (plus:VWEXTI > (plus:VWEXTI > - (any_extend:VWEXTI > + (sign_extend:VWEXTI > (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) > - (any_extend:VWEXTI > + (sign_extend:VWEXTI > (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))) > (const_int 1)))))] > "TARGET_VECTOR" > @@ -2385,7 +2385,7 @@ > /* First emit a widening addition. */ > rtx tmp1 = gen_reg_rtx (<MODE>mode); > rtx ops1[] = {tmp1, operands[1], operands[2]}; > - insn_code icode = code_for_pred_dual_widen (PLUS, <CODE>, <MODE>mode); > + insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode); > riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1); > > /* Then add 1. */ > @@ -2396,11 +2396,37 @@ > > /* Finally, a narrowing shift. */ > rtx ops3[] = {operands[0], tmp2, const1_rtx}; > - icode = code_for_pred_narrow_scalar (<EXT_TO_RSHIFT>, <MODE>mode); > + icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode); > riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3); > DONE; > }) > > +;; csrwi vxrm, 2 > +;; vaaddu.vv vd, vs2, vs1 > +(define_expand "uavg<mode>3_floor" > + [(match_operand:V_VLSI 0 "register_operand") > + (match_operand:V_VLSI 1 "register_operand") > + (match_operand:V_VLSI 2 "register_operand")] > + "TARGET_VECTOR" > +{ > + insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode); > + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, operands); > + DONE; > +}) > + > +;; csrwi vxrm, 0 > +;; vaaddu.vv vd, vs2, vs1 > +(define_expand "uavg<mode>3_ceil" > + [(match_operand:V_VLSI 0 "register_operand") > + (match_operand:V_VLSI 1 "register_operand") > + (match_operand:V_VLSI 2 "register_operand")] > + "TARGET_VECTOR" > +{ > + insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode); > + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, operands); > + DONE; > +}) > + > ;; ------------------------------------------------------------------------- > ;; ---- [FP] Rounding. > ;; ------------------------------------------------------------------------- > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > index 00a5b645abe..fc0097acde3 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -366,6 +366,12 @@ enum insn_flags : unsigned int > > /* Means INSN has FRM operand and the value is FRM_RNE. */ > FRM_RNE_P = 1 << 19, > + > + /* Means INSN has VXRM operand and the value is VXRM_RNU. */ > + VXRM_RNU_P = 1 << 20, > + > + /* Means INSN has VXRM operand and the value is VXRM_RDN. */ > + VXRM_RDN_P = 1 << 21, > }; > > enum insn_type : unsigned int > @@ -426,6 +432,8 @@ enum insn_type : unsigned int > BINARY_OP_TAMU = __MASK_OP_TAMU | BINARY_OP_P, > BINARY_OP_TUMA = __MASK_OP_TUMA | BINARY_OP_P, > BINARY_OP_FRM_DYN = BINARY_OP | FRM_DYN_P, > + BINARY_OP_VXRM_RNU = BINARY_OP | VXRM_RNU_P, > + BINARY_OP_VXRM_RDN = BINARY_OP | VXRM_RDN_P, > > /* Ternary operator. Always have real merge operand. */ > TERNARY_OP = HAS_DEST_P | HAS_MASK_P | USE_ALL_TRUES_MASK_P | HAS_MERGE_P > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 2491522191a..7ae579ba890 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -207,6 +207,13 @@ public: > add_input_operand (frm_rtx, Pmode); > } > > + void > + add_rounding_mode_operand (enum fixed_point_rounding_mode rounding_mode) > + { > + rtx frm_rtx = gen_int_mode (rounding_mode, Pmode); > + add_input_operand (frm_rtx, Pmode); > + } > + > /* Return the vtype mode based on insn_flags. > vtype mode mean the mode vsetvl insn set. */ > machine_mode > @@ -334,6 +341,10 @@ public: > add_rounding_mode_operand (FRM_RMM); > else if (m_insn_flags & FRM_RNE_P) > add_rounding_mode_operand (FRM_RNE); > + else if (m_insn_flags & VXRM_RNU_P) > + add_rounding_mode_operand (VXRM_RNU); > + else if (m_insn_flags & VXRM_RDN_P) > + add_rounding_mode_operand (VXRM_RDN); > > gcc_assert (insn_data[(int) icode].n_operands == m_opno); > expand (icode, any_mem_p); > diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md > index b4a276dc2c8..c2ea7e8b10a 100644 > --- a/gcc/config/riscv/vector-iterators.md > +++ b/gcc/config/riscv/vector-iterators.md > @@ -3581,11 +3581,6 @@ > (define_code_attr nmsub_nmadd [(plus "nmsub") (minus "nmadd")]) > (define_code_attr nmsac_nmacc [(plus "nmsac") (minus "nmacc")]) > > -(define_code_attr ext_to_rshift [(sign_extend "ashiftrt") > - (zero_extend "lshiftrt")]) > -(define_code_attr EXT_TO_RSHIFT [(sign_extend "ASHIFTRT") > - (zero_extend "LSHIFTRT")]) > - > (define_code_iterator and_ior [and ior]) > > (define_code_iterator any_float_binop [plus mult minus div]) > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index 24b7b4394be..c1a282a27b3 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -4239,8 +4239,8 @@ > (set_attr "mode" "<MODE>")]) > > (define_insn "@pred_<sat_op><mode>" > - [(set (match_operand:VI 0 "register_operand" "=vd, vd, vr, vr") > - (if_then_else:VI > + [(set (match_operand:V_VLSI 0 "register_operand" "=vd, vd, vr, vr") > + (if_then_else:V_VLSI > (unspec:<VM> > [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") > (match_operand 5 "vector_length_operand" " rK, rK, rK, rK") > @@ -4251,10 +4251,10 @@ > (reg:SI VL_REGNUM) > (reg:SI VTYPE_REGNUM) > (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE) > - (unspec:VI > - [(match_operand:VI 3 "register_operand" " vr, vr, vr, vr") > - (match_operand:VI 4 "register_operand" " vr, vr, vr, vr")] VSAT_OP) > - (match_operand:VI 2 "vector_merge_operand" " vu, 0, vu, 0")))] > + (unspec:V_VLSI > + [(match_operand:V_VLSI 3 "register_operand" " vr, vr, vr, vr") > + (match_operand:V_VLSI 4 "register_operand" " vr, vr, vr, vr")] VSAT_OP) > + (match_operand:V_VLSI 2 "vector_merge_operand" " vu, 0, vu, 0")))] > "TARGET_VECTOR" > "v<sat_op>.vv\t%0,%3,%4%p1" > [(set_attr "type" "<sat_insn_type>") > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c > index d53bd3a386a..2327a3d018e 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c > @@ -26,9 +26,9 @@ DEF_AVG_FLOOR (uint8_t, uint16_t, 1024) > DEF_AVG_FLOOR (uint8_t, uint16_t, 2048) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 10 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c > index 68d1df73a5f..8030810fdbd 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c > @@ -24,9 +24,9 @@ DEF_AVG_FLOOR (uint16_t, uint32_t, 512) > DEF_AVG_FLOOR (uint16_t, uint32_t, 1024) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 9 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c > index 07ffab61f67..dce0ffa346e 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c > @@ -22,9 +22,9 @@ DEF_AVG_FLOOR (uint32_t, uint64_t, 256) > DEF_AVG_FLOOR (uint32_t, uint64_t, 512) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 8 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c > index 83e219ca09a..65912fb39f2 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c > @@ -26,10 +26,10 @@ DEF_AVG_CEIL (uint8_t, uint16_t, 1024) > DEF_AVG_CEIL (uint8_t, uint16_t, 2048) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 10 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */ > -/* { dg-final { scan-assembler-times {vadd\.vi} 20 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */ > +/* { dg-final { scan-assembler-times {vadd\.vi} 10 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c > index 325faeaa930..a197b24c234 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c > @@ -24,10 +24,10 @@ DEF_AVG_CEIL (uint16_t, uint32_t, 512) > DEF_AVG_CEIL (uint16_t, uint32_t, 1024) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 9 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */ > -/* { dg-final { scan-assembler-times {vadd\.vi} 18 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */ > +/* { dg-final { scan-assembler-times {vadd\.vi} 9 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c > index d836428c7f4..a53de71a01b 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c > @@ -22,10 +22,10 @@ DEF_AVG_CEIL (uint16_t, uint32_t, 256) > DEF_AVG_CEIL (uint16_t, uint32_t, 512) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 8 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */ > -/* { dg-final { scan-assembler-times {vadd\.vi} 16 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */ > +/* { dg-final { scan-assembler-times {vadd\.vi} 8 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c > index e2754339d94..6874a3dab1b 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c > @@ -4,7 +4,8 @@ > #include "vec-avg-template.h" > > /* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvnsrl.wi} 6 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */ > +/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */ > /* { dg-final { scan-assembler-times {\tvnsra.wi} 6 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c > index 1f0ef29566d..06f35e14812 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c > @@ -4,7 +4,8 @@ > #include "vec-avg-template.h" > > /* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */ > +/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */ > /* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */ > -- > 2.36.3 >
Committed, thanks Kito. Pan -----Original Message----- From: Kito Cheng <kito.cheng@gmail.com> Sent: Wednesday, January 10, 2024 3:12 PM To: Juzhe-Zhong <juzhe.zhong@rivai.ai> Cc: gcc-patches@gcc.gnu.org; kito.cheng@sifive.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com Subject: Re: [PATCH] RISC-V: Refine unsigned avg_floor/avg_ceil LGTM! On Wed, Jan 10, 2024 at 1:05 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote: > > This patch is inspired by LLVM patches: > https://github.com/llvm/llvm-project/pull/76550 > https://github.com/llvm/llvm-project/pull/77473 > > Use vaaddu for AVG vectorization. > > Before this patch: > > vsetivli zero,8,e8,mf2,ta,ma > vle8.v v3,0(a1) > vle8.v v2,0(a2) > vwaddu.vv v1,v3,v2 > vsetvli zero,zero,e16,m1,ta,ma > vadd.vi v1,v1,1 > vsetvli zero,zero,e8,mf2,ta,ma > vnsrl.wi v1,v1,1 > vse8.v v1,0(a0) > ret > > After this patch: > > vsetivli zero,8,e8,mf2,ta,ma > csrwi vxrm,0 > vle8.v v1,0(a1) > vle8.v v2,0(a2) > vaaddu.vv v1,v1,v2 > vse8.v v1,0(a0) > ret > > Note on signed averaging addition > > Based on the rvv spec, there is also a variant for signed averaging addition called vaadd. > But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through vaadd. > Thus this patch only introduces vaaddu. > > More details in: > https://github.com/riscv/riscv-v-spec/issues/935 > https://github.com/riscv/riscv-v-spec/issues/934 > > Tested on both RV32 and RV64 no regression. > > Ok for trunk ? > > gcc/ChangeLog: > > * config/riscv/autovec.md (<u>avg<v_double_trunc>3_floor): Remove. > (avg<v_double_trunc>3_floor): New pattern. > (<u>avg<v_double_trunc>3_ceil): Remove. > (avg<v_double_trunc>3_ceil): New pattern. > (uavg<mode>3_floor): Ditto. > (uavg<mode>3_ceil): Ditto. > * config/riscv/riscv-protos.h (enum insn_flags): Add for average addition. > (enum insn_type): Ditto. > * config/riscv/riscv-v.cc: Ditto. > * config/riscv/vector-iterators.md (ashiftrt): Remove. > (ASHIFTRT): Ditto. > * config/riscv/vector.md: Add VLS modes. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/vls/avg-1.c: Adapt test. > * gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/avg-4.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/avg-5.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/avg-6.c: Ditto. > * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto. > * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto. > > --- > gcc/config/riscv/autovec.md | 50 ++++++++++++++----- > gcc/config/riscv/riscv-protos.h | 8 +++ > gcc/config/riscv/riscv-v.cc | 11 ++++ > gcc/config/riscv/vector-iterators.md | 5 -- > gcc/config/riscv/vector.md | 12 ++--- > .../gcc.target/riscv/rvv/autovec/vls/avg-1.c | 4 +- > .../gcc.target/riscv/rvv/autovec/vls/avg-2.c | 4 +- > .../gcc.target/riscv/rvv/autovec/vls/avg-3.c | 4 +- > .../gcc.target/riscv/rvv/autovec/vls/avg-4.c | 6 +-- > .../gcc.target/riscv/rvv/autovec/vls/avg-5.c | 6 +-- > .../gcc.target/riscv/rvv/autovec/vls/avg-6.c | 6 +-- > .../riscv/rvv/autovec/widen/vec-avg-rv32gcv.c | 7 +-- > .../riscv/rvv/autovec/widen/vec-avg-rv64gcv.c | 7 +-- > 13 files changed, 86 insertions(+), 44 deletions(-) > > diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md > index 775eaa825b0..706cd9717cb 100644 > --- a/gcc/config/riscv/autovec.md > +++ b/gcc/config/riscv/autovec.md > @@ -2345,39 +2345,39 @@ > ;; op[0] = (narrow) ((wide) op[1] + (wide) op[2] + 1)) >> 1; > ;; ------------------------------------------------------------------------- > > -(define_expand "<u>avg<v_double_trunc>3_floor" > +(define_expand "avg<v_double_trunc>3_floor" > [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") > (truncate:<V_DOUBLE_TRUNC> > - (<ext_to_rshift>:VWEXTI > + (ashiftrt:VWEXTI > (plus:VWEXTI > - (any_extend:VWEXTI > + (sign_extend:VWEXTI > (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) > - (any_extend:VWEXTI > + (sign_extend:VWEXTI > (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))] > "TARGET_VECTOR" > { > /* First emit a widening addition. */ > rtx tmp1 = gen_reg_rtx (<MODE>mode); > rtx ops1[] = {tmp1, operands[1], operands[2]}; > - insn_code icode = code_for_pred_dual_widen (PLUS, <CODE>, <MODE>mode); > + insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode); > riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1); > > /* Then a narrowing shift. */ > rtx ops2[] = {operands[0], tmp1, const1_rtx}; > - icode = code_for_pred_narrow_scalar (<EXT_TO_RSHIFT>, <MODE>mode); > + icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode); > riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2); > DONE; > }) > > -(define_expand "<u>avg<v_double_trunc>3_ceil" > +(define_expand "avg<v_double_trunc>3_ceil" > [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") > (truncate:<V_DOUBLE_TRUNC> > - (<ext_to_rshift>:VWEXTI > + (ashiftrt:VWEXTI > (plus:VWEXTI > (plus:VWEXTI > - (any_extend:VWEXTI > + (sign_extend:VWEXTI > (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) > - (any_extend:VWEXTI > + (sign_extend:VWEXTI > (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))) > (const_int 1)))))] > "TARGET_VECTOR" > @@ -2385,7 +2385,7 @@ > /* First emit a widening addition. */ > rtx tmp1 = gen_reg_rtx (<MODE>mode); > rtx ops1[] = {tmp1, operands[1], operands[2]}; > - insn_code icode = code_for_pred_dual_widen (PLUS, <CODE>, <MODE>mode); > + insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode); > riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1); > > /* Then add 1. */ > @@ -2396,11 +2396,37 @@ > > /* Finally, a narrowing shift. */ > rtx ops3[] = {operands[0], tmp2, const1_rtx}; > - icode = code_for_pred_narrow_scalar (<EXT_TO_RSHIFT>, <MODE>mode); > + icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode); > riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3); > DONE; > }) > > +;; csrwi vxrm, 2 > +;; vaaddu.vv vd, vs2, vs1 > +(define_expand "uavg<mode>3_floor" > + [(match_operand:V_VLSI 0 "register_operand") > + (match_operand:V_VLSI 1 "register_operand") > + (match_operand:V_VLSI 2 "register_operand")] > + "TARGET_VECTOR" > +{ > + insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode); > + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, operands); > + DONE; > +}) > + > +;; csrwi vxrm, 0 > +;; vaaddu.vv vd, vs2, vs1 > +(define_expand "uavg<mode>3_ceil" > + [(match_operand:V_VLSI 0 "register_operand") > + (match_operand:V_VLSI 1 "register_operand") > + (match_operand:V_VLSI 2 "register_operand")] > + "TARGET_VECTOR" > +{ > + insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode); > + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, operands); > + DONE; > +}) > + > ;; ------------------------------------------------------------------------- > ;; ---- [FP] Rounding. > ;; ------------------------------------------------------------------------- > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > index 00a5b645abe..fc0097acde3 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -366,6 +366,12 @@ enum insn_flags : unsigned int > > /* Means INSN has FRM operand and the value is FRM_RNE. */ > FRM_RNE_P = 1 << 19, > + > + /* Means INSN has VXRM operand and the value is VXRM_RNU. */ > + VXRM_RNU_P = 1 << 20, > + > + /* Means INSN has VXRM operand and the value is VXRM_RDN. */ > + VXRM_RDN_P = 1 << 21, > }; > > enum insn_type : unsigned int > @@ -426,6 +432,8 @@ enum insn_type : unsigned int > BINARY_OP_TAMU = __MASK_OP_TAMU | BINARY_OP_P, > BINARY_OP_TUMA = __MASK_OP_TUMA | BINARY_OP_P, > BINARY_OP_FRM_DYN = BINARY_OP | FRM_DYN_P, > + BINARY_OP_VXRM_RNU = BINARY_OP | VXRM_RNU_P, > + BINARY_OP_VXRM_RDN = BINARY_OP | VXRM_RDN_P, > > /* Ternary operator. Always have real merge operand. */ > TERNARY_OP = HAS_DEST_P | HAS_MASK_P | USE_ALL_TRUES_MASK_P | HAS_MERGE_P > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 2491522191a..7ae579ba890 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -207,6 +207,13 @@ public: > add_input_operand (frm_rtx, Pmode); > } > > + void > + add_rounding_mode_operand (enum fixed_point_rounding_mode rounding_mode) > + { > + rtx frm_rtx = gen_int_mode (rounding_mode, Pmode); > + add_input_operand (frm_rtx, Pmode); > + } > + > /* Return the vtype mode based on insn_flags. > vtype mode mean the mode vsetvl insn set. */ > machine_mode > @@ -334,6 +341,10 @@ public: > add_rounding_mode_operand (FRM_RMM); > else if (m_insn_flags & FRM_RNE_P) > add_rounding_mode_operand (FRM_RNE); > + else if (m_insn_flags & VXRM_RNU_P) > + add_rounding_mode_operand (VXRM_RNU); > + else if (m_insn_flags & VXRM_RDN_P) > + add_rounding_mode_operand (VXRM_RDN); > > gcc_assert (insn_data[(int) icode].n_operands == m_opno); > expand (icode, any_mem_p); > diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md > index b4a276dc2c8..c2ea7e8b10a 100644 > --- a/gcc/config/riscv/vector-iterators.md > +++ b/gcc/config/riscv/vector-iterators.md > @@ -3581,11 +3581,6 @@ > (define_code_attr nmsub_nmadd [(plus "nmsub") (minus "nmadd")]) > (define_code_attr nmsac_nmacc [(plus "nmsac") (minus "nmacc")]) > > -(define_code_attr ext_to_rshift [(sign_extend "ashiftrt") > - (zero_extend "lshiftrt")]) > -(define_code_attr EXT_TO_RSHIFT [(sign_extend "ASHIFTRT") > - (zero_extend "LSHIFTRT")]) > - > (define_code_iterator and_ior [and ior]) > > (define_code_iterator any_float_binop [plus mult minus div]) > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index 24b7b4394be..c1a282a27b3 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -4239,8 +4239,8 @@ > (set_attr "mode" "<MODE>")]) > > (define_insn "@pred_<sat_op><mode>" > - [(set (match_operand:VI 0 "register_operand" "=vd, vd, vr, vr") > - (if_then_else:VI > + [(set (match_operand:V_VLSI 0 "register_operand" "=vd, vd, vr, vr") > + (if_then_else:V_VLSI > (unspec:<VM> > [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") > (match_operand 5 "vector_length_operand" " rK, rK, rK, rK") > @@ -4251,10 +4251,10 @@ > (reg:SI VL_REGNUM) > (reg:SI VTYPE_REGNUM) > (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE) > - (unspec:VI > - [(match_operand:VI 3 "register_operand" " vr, vr, vr, vr") > - (match_operand:VI 4 "register_operand" " vr, vr, vr, vr")] VSAT_OP) > - (match_operand:VI 2 "vector_merge_operand" " vu, 0, vu, 0")))] > + (unspec:V_VLSI > + [(match_operand:V_VLSI 3 "register_operand" " vr, vr, vr, vr") > + (match_operand:V_VLSI 4 "register_operand" " vr, vr, vr, vr")] VSAT_OP) > + (match_operand:V_VLSI 2 "vector_merge_operand" " vu, 0, vu, 0")))] > "TARGET_VECTOR" > "v<sat_op>.vv\t%0,%3,%4%p1" > [(set_attr "type" "<sat_insn_type>") > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c > index d53bd3a386a..2327a3d018e 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c > @@ -26,9 +26,9 @@ DEF_AVG_FLOOR (uint8_t, uint16_t, 1024) > DEF_AVG_FLOOR (uint8_t, uint16_t, 2048) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 10 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c > index 68d1df73a5f..8030810fdbd 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c > @@ -24,9 +24,9 @@ DEF_AVG_FLOOR (uint16_t, uint32_t, 512) > DEF_AVG_FLOOR (uint16_t, uint32_t, 1024) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 9 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c > index 07ffab61f67..dce0ffa346e 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c > @@ -22,9 +22,9 @@ DEF_AVG_FLOOR (uint32_t, uint64_t, 256) > DEF_AVG_FLOOR (uint32_t, uint64_t, 512) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 8 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c > index 83e219ca09a..65912fb39f2 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c > @@ -26,10 +26,10 @@ DEF_AVG_CEIL (uint8_t, uint16_t, 1024) > DEF_AVG_CEIL (uint8_t, uint16_t, 2048) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 10 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */ > -/* { dg-final { scan-assembler-times {vadd\.vi} 20 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */ > +/* { dg-final { scan-assembler-times {vadd\.vi} 10 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c > index 325faeaa930..a197b24c234 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c > @@ -24,10 +24,10 @@ DEF_AVG_CEIL (uint16_t, uint32_t, 512) > DEF_AVG_CEIL (uint16_t, uint32_t, 1024) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 9 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */ > -/* { dg-final { scan-assembler-times {vadd\.vi} 18 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */ > +/* { dg-final { scan-assembler-times {vadd\.vi} 9 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c > index d836428c7f4..a53de71a01b 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c > @@ -22,10 +22,10 @@ DEF_AVG_CEIL (uint16_t, uint32_t, 256) > DEF_AVG_CEIL (uint16_t, uint32_t, 512) > > /* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */ > -/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 8 } } */ > /* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */ > -/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */ > -/* { dg-final { scan-assembler-times {vadd\.vi} 16 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */ > +/* { dg-final { scan-assembler-times {vadd\.vi} 8 } } */ > /* { dg-final { scan-assembler-not {csrr} } } */ > /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ > /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c > index e2754339d94..6874a3dab1b 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c > @@ -4,7 +4,8 @@ > #include "vec-avg-template.h" > > /* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvnsrl.wi} 6 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */ > +/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */ > /* { dg-final { scan-assembler-times {\tvnsra.wi} 6 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c > index 1f0ef29566d..06f35e14812 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c > @@ -4,7 +4,8 @@ > #include "vec-avg-template.h" > > /* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */ > -/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */ > +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */ > +/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */ > /* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */ > +/* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */ > -- > 2.36.3 >
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 775eaa825b0..706cd9717cb 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2345,39 +2345,39 @@ ;; op[0] = (narrow) ((wide) op[1] + (wide) op[2] + 1)) >> 1; ;; ------------------------------------------------------------------------- -(define_expand "<u>avg<v_double_trunc>3_floor" +(define_expand "avg<v_double_trunc>3_floor" [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") (truncate:<V_DOUBLE_TRUNC> - (<ext_to_rshift>:VWEXTI + (ashiftrt:VWEXTI (plus:VWEXTI - (any_extend:VWEXTI + (sign_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) - (any_extend:VWEXTI + (sign_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))] "TARGET_VECTOR" { /* First emit a widening addition. */ rtx tmp1 = gen_reg_rtx (<MODE>mode); rtx ops1[] = {tmp1, operands[1], operands[2]}; - insn_code icode = code_for_pred_dual_widen (PLUS, <CODE>, <MODE>mode); + insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode); riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1); /* Then a narrowing shift. */ rtx ops2[] = {operands[0], tmp1, const1_rtx}; - icode = code_for_pred_narrow_scalar (<EXT_TO_RSHIFT>, <MODE>mode); + icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode); riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2); DONE; }) -(define_expand "<u>avg<v_double_trunc>3_ceil" +(define_expand "avg<v_double_trunc>3_ceil" [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") (truncate:<V_DOUBLE_TRUNC> - (<ext_to_rshift>:VWEXTI + (ashiftrt:VWEXTI (plus:VWEXTI (plus:VWEXTI - (any_extend:VWEXTI + (sign_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) - (any_extend:VWEXTI + (sign_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))) (const_int 1)))))] "TARGET_VECTOR" @@ -2385,7 +2385,7 @@ /* First emit a widening addition. */ rtx tmp1 = gen_reg_rtx (<MODE>mode); rtx ops1[] = {tmp1, operands[1], operands[2]}; - insn_code icode = code_for_pred_dual_widen (PLUS, <CODE>, <MODE>mode); + insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode); riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1); /* Then add 1. */ @@ -2396,11 +2396,37 @@ /* Finally, a narrowing shift. */ rtx ops3[] = {operands[0], tmp2, const1_rtx}; - icode = code_for_pred_narrow_scalar (<EXT_TO_RSHIFT>, <MODE>mode); + icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode); riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3); DONE; }) +;; csrwi vxrm, 2 +;; vaaddu.vv vd, vs2, vs1 +(define_expand "uavg<mode>3_floor" + [(match_operand:V_VLSI 0 "register_operand") + (match_operand:V_VLSI 1 "register_operand") + (match_operand:V_VLSI 2 "register_operand")] + "TARGET_VECTOR" +{ + insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, operands); + DONE; +}) + +;; csrwi vxrm, 0 +;; vaaddu.vv vd, vs2, vs1 +(define_expand "uavg<mode>3_ceil" + [(match_operand:V_VLSI 0 "register_operand") + (match_operand:V_VLSI 1 "register_operand") + (match_operand:V_VLSI 2 "register_operand")] + "TARGET_VECTOR" +{ + insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, operands); + DONE; +}) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Rounding. ;; ------------------------------------------------------------------------- diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 00a5b645abe..fc0097acde3 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -366,6 +366,12 @@ enum insn_flags : unsigned int /* Means INSN has FRM operand and the value is FRM_RNE. */ FRM_RNE_P = 1 << 19, + + /* Means INSN has VXRM operand and the value is VXRM_RNU. */ + VXRM_RNU_P = 1 << 20, + + /* Means INSN has VXRM operand and the value is VXRM_RDN. */ + VXRM_RDN_P = 1 << 21, }; enum insn_type : unsigned int @@ -426,6 +432,8 @@ enum insn_type : unsigned int BINARY_OP_TAMU = __MASK_OP_TAMU | BINARY_OP_P, BINARY_OP_TUMA = __MASK_OP_TUMA | BINARY_OP_P, BINARY_OP_FRM_DYN = BINARY_OP | FRM_DYN_P, + BINARY_OP_VXRM_RNU = BINARY_OP | VXRM_RNU_P, + BINARY_OP_VXRM_RDN = BINARY_OP | VXRM_RDN_P, /* Ternary operator. Always have real merge operand. */ TERNARY_OP = HAS_DEST_P | HAS_MASK_P | USE_ALL_TRUES_MASK_P | HAS_MERGE_P diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 2491522191a..7ae579ba890 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -207,6 +207,13 @@ public: add_input_operand (frm_rtx, Pmode); } + void + add_rounding_mode_operand (enum fixed_point_rounding_mode rounding_mode) + { + rtx frm_rtx = gen_int_mode (rounding_mode, Pmode); + add_input_operand (frm_rtx, Pmode); + } + /* Return the vtype mode based on insn_flags. vtype mode mean the mode vsetvl insn set. */ machine_mode @@ -334,6 +341,10 @@ public: add_rounding_mode_operand (FRM_RMM); else if (m_insn_flags & FRM_RNE_P) add_rounding_mode_operand (FRM_RNE); + else if (m_insn_flags & VXRM_RNU_P) + add_rounding_mode_operand (VXRM_RNU); + else if (m_insn_flags & VXRM_RDN_P) + add_rounding_mode_operand (VXRM_RDN); gcc_assert (insn_data[(int) icode].n_operands == m_opno); expand (icode, any_mem_p); diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index b4a276dc2c8..c2ea7e8b10a 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -3581,11 +3581,6 @@ (define_code_attr nmsub_nmadd [(plus "nmsub") (minus "nmadd")]) (define_code_attr nmsac_nmacc [(plus "nmsac") (minus "nmacc")]) -(define_code_attr ext_to_rshift [(sign_extend "ashiftrt") - (zero_extend "lshiftrt")]) -(define_code_attr EXT_TO_RSHIFT [(sign_extend "ASHIFTRT") - (zero_extend "LSHIFTRT")]) - (define_code_iterator and_ior [and ior]) (define_code_iterator any_float_binop [plus mult minus div]) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 24b7b4394be..c1a282a27b3 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -4239,8 +4239,8 @@ (set_attr "mode" "<MODE>")]) (define_insn "@pred_<sat_op><mode>" - [(set (match_operand:VI 0 "register_operand" "=vd, vd, vr, vr") - (if_then_else:VI + [(set (match_operand:V_VLSI 0 "register_operand" "=vd, vd, vr, vr") + (if_then_else:V_VLSI (unspec:<VM> [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") (match_operand 5 "vector_length_operand" " rK, rK, rK, rK") @@ -4251,10 +4251,10 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE) - (unspec:VI - [(match_operand:VI 3 "register_operand" " vr, vr, vr, vr") - (match_operand:VI 4 "register_operand" " vr, vr, vr, vr")] VSAT_OP) - (match_operand:VI 2 "vector_merge_operand" " vu, 0, vu, 0")))] + (unspec:V_VLSI + [(match_operand:V_VLSI 3 "register_operand" " vr, vr, vr, vr") + (match_operand:V_VLSI 4 "register_operand" " vr, vr, vr, vr")] VSAT_OP) + (match_operand:V_VLSI 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "v<sat_op>.vv\t%0,%3,%4%p1" [(set_attr "type" "<sat_insn_type>") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c index d53bd3a386a..2327a3d018e 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c @@ -26,9 +26,9 @@ DEF_AVG_FLOOR (uint8_t, uint16_t, 1024) DEF_AVG_FLOOR (uint8_t, uint16_t, 2048) /* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */ -/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 10 } } */ /* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */ -/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */ +/* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c index 68d1df73a5f..8030810fdbd 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c @@ -24,9 +24,9 @@ DEF_AVG_FLOOR (uint16_t, uint32_t, 512) DEF_AVG_FLOOR (uint16_t, uint32_t, 1024) /* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */ -/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 9 } } */ /* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */ -/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */ +/* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c index 07ffab61f67..dce0ffa346e 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c @@ -22,9 +22,9 @@ DEF_AVG_FLOOR (uint32_t, uint64_t, 256) DEF_AVG_FLOOR (uint32_t, uint64_t, 512) /* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */ -/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 8 } } */ /* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */ -/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */ +/* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c index 83e219ca09a..65912fb39f2 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-4.c @@ -26,10 +26,10 @@ DEF_AVG_CEIL (uint8_t, uint16_t, 1024) DEF_AVG_CEIL (uint8_t, uint16_t, 2048) /* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */ -/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 10 } } */ /* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */ -/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */ -/* { dg-final { scan-assembler-times {vadd\.vi} 20 } } */ +/* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */ +/* { dg-final { scan-assembler-times {vadd\.vi} 10 } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c index 325faeaa930..a197b24c234 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-5.c @@ -24,10 +24,10 @@ DEF_AVG_CEIL (uint16_t, uint32_t, 512) DEF_AVG_CEIL (uint16_t, uint32_t, 1024) /* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */ -/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 9 } } */ /* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */ -/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */ -/* { dg-final { scan-assembler-times {vadd\.vi} 18 } } */ +/* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */ +/* { dg-final { scan-assembler-times {vadd\.vi} 9 } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c index d836428c7f4..a53de71a01b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-6.c @@ -22,10 +22,10 @@ DEF_AVG_CEIL (uint16_t, uint32_t, 256) DEF_AVG_CEIL (uint16_t, uint32_t, 512) /* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */ -/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 8 } } */ /* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */ -/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */ -/* { dg-final { scan-assembler-times {vadd\.vi} 16 } } */ +/* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */ +/* { dg-final { scan-assembler-times {vadd\.vi} 8 } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ /* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c index e2754339d94..6874a3dab1b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c @@ -4,7 +4,8 @@ #include "vec-avg-template.h" /* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */ -/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */ -/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */ -/* { dg-final { scan-assembler-times {\tvnsrl.wi} 6 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */ +/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */ /* { dg-final { scan-assembler-times {\tvnsra.wi} 6 } } */ +/* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c index 1f0ef29566d..06f35e14812 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c @@ -4,7 +4,8 @@ #include "vec-avg-template.h" /* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */ -/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */ -/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */ -/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */ +/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */ +/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */ /* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */ +/* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */