Message ID | 20200312145900.2054-10-zhiwei_liu@c-sky.com |
---|---|
State | New |
Headers | show |
Series | target/riscv: support vector extension v0.7.1 | expand |
On 3/12/20 7:58 AM, LIU Zhiwei wrote: > + if (a->vm && s->vl_eq_vlmax) { \ > + tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd), \ > + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), \ > + MAXSZ(s), MAXSZ(s)); \ The first argument here should be just s->sew. You should have see the assert fire: tcg_debug_assert(vece <= MO_64); It would be nice to pull out the bulk of GEN_OPIVV_GVEC_TRANS as a function, and pass in tcg_gen_gvec_* as a function pointer, and fns as a pointer. In general, I prefer the functions that are generated by macros like this to have exactly one executable statement -- the call to the helper that does all of the work using the arguments provided. That way a maximum number of lines are available for stepping with the debugger. > + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ > + data = FIELD_DP32(data, VDATA, VM, a->vm); \ > + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ Why are these replicated in each trans_* function, and not done in opiv?_trans, where the rest of the descriptor is created? > +/* OPIVX without GVEC IR */ > +#define GEN_OPIVX_TRANS(NAME, CHECK) \ > +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ > +{ \ > + if (CHECK(s, a)) { \ > + uint32_t data = 0; \ > + static gen_helper_opivx const fns[4] = { \ > + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ > + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ > + }; \ > + \ > + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ > + data = FIELD_DP32(data, VDATA, VM, a->vm); \ > + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ > + return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \ > + } \ > + return false; \ > +} > + > +GEN_OPIVX_TRANS(vrsub_vx, opivx_check) Note that you *can* generate vector code for this, you just have to write your own helpers. E.g. static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 a) { tcg_gen_vec_sub8_i64(d, b, a); } // etc, reversing the arguments and passing on to sub. static const GVecGen2s rsub_op[4] = { { .fni8 = tcg_gen_vec_rsub8_i64, .fniv = tcg_gen_rsub_vec, .fno = gen_helper_gvec_rsubs8, .opt_opc = vecop_list_sub, .vece = MO_8 }, { .fni8 = tcg_gen_vec_rsub16_i64, .fniv = tcg_gen_rsub_vec, .fno = gen_helper_gvec_rsubs16, .opt_opc = vecop_list_sub, .vece = MO_16 }, { .fni4 = tcg_gen_rsub_i32, .fniv = tcg_gen_rsub_vec, .fno = gen_helper_gvec_rsubs32, .opt_opc = vecop_list_sub, .vece = MO_32 }, { .fni8 = tcg_gen_rsub_i64, .fniv = tcg_gen_rsub_vec, .fno = gen_helper_gvec_rsubs64, .opt_opc = vecop_list_sub, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; static void gen_gvec_rsubs(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) { tcg_debug_assert(vece <= MO_64); tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); } static void gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t c, uint32_t oprsz, uint32_t maxsz) { tcg_debug_assert(vece <= MO_64); tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); } > +/* generate the helpers for OPIVV */ > +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ > +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ > + void *vs2, CPURISCVState *env, uint32_t desc) \ > +{ \ > + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ > + uint32_t mlen = vext_mlen(desc); \ > + uint32_t vm = vext_vm(desc); \ > + uint32_t vl = env->vl; \ > + uint32_t i; \ > + for (i = 0; i < vl; i++) { \ > + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ > + continue; \ > + } \ > + do_##NAME(vd, vs1, vs2, i); \ > + } \ > + if (i != 0) { \ > + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ > + } \ > +} > + > +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) > +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) > +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) > +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) > +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) > +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) > +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) > +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) The body of GEN_VEXT_VV can be an inline function, calling the helper functions that you generated above. > +/* > + * If XLEN < SEW, the value from the x register is sign-extended to SEW bits. > + * So (target_long)s1 is need. (T1)(target_long)s1 gives the real operator type. > + * (TX1)(T1)(target_long)s1 expands the operator type of widen operations > + * or narrow operations > + */ > +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ > +static void do_##NAME(void *vd, target_ulong s1, void *vs2, int i) \ > +{ \ > + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ > + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)(target_long)s1); \ > +} Why not just make the type of s1 be target_long in the parameter? > +/* generate the helpers for instructions with one vector and one sclar */ > +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ > +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ > + void *vs2, CPURISCVState *env, uint32_t desc) \ > +{ \ > + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ > + uint32_t mlen = vext_mlen(desc); \ > + uint32_t vm = vext_vm(desc); \ > + uint32_t vl = env->vl; \ > + uint32_t i; \ > + \ > + for (i = 0; i < vl; i++) { \ > + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ > + continue; \ > + } \ > + do_##NAME(vd, s1, vs2, i); \ > + } \ > + if (i != 0) { \ > + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ > + } \ > +} Likewise an inline function. r~
On 2020/3/14 13:25, Richard Henderson wrote: > On 3/12/20 7:58 AM, LIU Zhiwei wrote: >> + if (a->vm && s->vl_eq_vlmax) { \ >> + tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd), \ >> + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), \ >> + MAXSZ(s), MAXSZ(s)); \ > The first argument here should be just s->sew. > You should have see the assert fire: > > tcg_debug_assert(vece <= MO_64); Oh, sorry, I did not see this. I must miss testing this path. > It would be nice to pull out the bulk of GEN_OPIVV_GVEC_TRANS as a function, > and pass in tcg_gen_gvec_* as a function pointer, and fns as a pointer. > > In general, I prefer the functions that are generated by macros like this to > have exactly one executable statement -- the call to the helper that does all > of the work using the arguments provided. That way a maximum number of lines > are available for stepping with the debugger. Can't agree more. When I debug the test cases, I also find it is hard to debug the generated code. The macro to generate code should be as short as possible. I accept your advice to pull out the bulk of GEN_OPIVV_GVEC_TRANS as a function. > >> + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ >> + data = FIELD_DP32(data, VDATA, VM, a->vm); \ >> + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ > Why are these replicated in each trans_* function, and not done in opiv?_trans, > where the rest of the descriptor is created? The opiv? _trans is a better place. > >> +/* OPIVX without GVEC IR */ >> +#define GEN_OPIVX_TRANS(NAME, CHECK) \ >> +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ >> +{ \ >> + if (CHECK(s, a)) { \ >> + uint32_t data = 0; \ >> + static gen_helper_opivx const fns[4] = { \ >> + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ >> + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ >> + }; \ >> + \ >> + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ >> + data = FIELD_DP32(data, VDATA, VM, a->vm); \ >> + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ >> + return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \ >> + } \ >> + return false; \ >> +} >> + >> +GEN_OPIVX_TRANS(vrsub_vx, opivx_check) > Note that you *can* generate vector code for this, > you just have to write your own helpers. > > E.g. > > static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 a) > { > tcg_gen_vec_sub8_i64(d, b, a); > } > // etc, reversing the arguments and passing on to sub. > > static const GVecGen2s rsub_op[4] = { > { .fni8 = tcg_gen_vec_rsub8_i64, > .fniv = tcg_gen_rsub_vec, > .fno = gen_helper_gvec_rsubs8, > .opt_opc = vecop_list_sub, > .vece = MO_8 }, > { .fni8 = tcg_gen_vec_rsub16_i64, > .fniv = tcg_gen_rsub_vec, > .fno = gen_helper_gvec_rsubs16, > .opt_opc = vecop_list_sub, > .vece = MO_16 }, > { .fni4 = tcg_gen_rsub_i32, > .fniv = tcg_gen_rsub_vec, > .fno = gen_helper_gvec_rsubs32, > .opt_opc = vecop_list_sub, > .vece = MO_32 }, > { .fni8 = tcg_gen_rsub_i64, > .fniv = tcg_gen_rsub_vec, > .fno = gen_helper_gvec_rsubs64, > .opt_opc = vecop_list_sub, > .prefer_i64 = TCG_TARGET_REG_BITS == 64, > .vece = MO_64 }, > }; > static void gen_gvec_rsubs(unsigned vece, uint32_t dofs, > uint32_t aofs, TCGv_i64 c, > uint32_t oprsz, uint32_t maxsz) > { > tcg_debug_assert(vece <= MO_64); > tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); > } > > static void gen_gvec_rsubi(unsigned vece, uint32_t dofs, > uint32_t aofs, int64_t c, > uint32_t oprsz, uint32_t maxsz) > { > tcg_debug_assert(vece <= MO_64); > tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); > } Good idea. I will try to these GVEC IRs. >> +/* generate the helpers for OPIVV */ >> +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ >> +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ >> + void *vs2, CPURISCVState *env, uint32_t desc) \ >> +{ \ >> + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ >> + uint32_t mlen = vext_mlen(desc); \ >> + uint32_t vm = vext_vm(desc); \ >> + uint32_t vl = env->vl; \ >> + uint32_t i; \ >> + for (i = 0; i < vl; i++) { \ >> + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ >> + continue; \ >> + } \ >> + do_##NAME(vd, vs1, vs2, i); \ >> + } \ >> + if (i != 0) { \ >> + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ >> + } \ >> +} >> + >> +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) >> +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) >> +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) >> +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) >> +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) >> +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) >> +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) >> +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) > The body of GEN_VEXT_VV can be an inline function, calling the helper functions > that you generated above. Yes, I will. >> +/* >> + * If XLEN < SEW, the value from the x register is sign-extended to SEW bits. >> + * So (target_long)s1 is need. (T1)(target_long)s1 gives the real operator type. >> + * (TX1)(T1)(target_long)s1 expands the operator type of widen operations >> + * or narrow operations >> + */ >> +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ >> +static void do_##NAME(void *vd, target_ulong s1, void *vs2, int i) \ >> +{ \ >> + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ >> + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)(target_long)s1); \ >> +} > Why not just make the type of s1 be target_long in the parameter? Yes, I should. > >> +/* generate the helpers for instructions with one vector and one sclar */ >> +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ >> +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ >> + void *vs2, CPURISCVState *env, uint32_t desc) \ >> +{ \ >> + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ >> + uint32_t mlen = vext_mlen(desc); \ >> + uint32_t vm = vext_vm(desc); \ >> + uint32_t vl = env->vl; \ >> + uint32_t i; \ >> + \ >> + for (i = 0; i < vl; i++) { \ >> + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ >> + continue; \ >> + } \ >> + do_##NAME(vd, s1, vs2, i); \ >> + } \ >> + if (i != 0) { \ >> + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ >> + } \ >> +} > Likewise an inline function. Yes, I will. Very informative comments. I will try to address them in next patch set soon. Thanks very much. Zhiwei > > > r~
On 2020/3/14 13:25, Richard Henderson wrote: > On 3/12/20 7:58 AM, LIU Zhiwei wrote: >> + if (a->vm && s->vl_eq_vlmax) { \ >> + tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd), \ >> + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), \ >> + MAXSZ(s), MAXSZ(s)); \ > The first argument here should be just s->sew. > You should have see the assert fire: > > tcg_debug_assert(vece <= MO_64); > > It would be nice to pull out the bulk of GEN_OPIVV_GVEC_TRANS as a function, > and pass in tcg_gen_gvec_* as a function pointer, and fns as a pointer. > > In general, I prefer the functions that are generated by macros like this to > have exactly one executable statement -- the call to the helper that does all > of the work using the arguments provided. That way a maximum number of lines > are available for stepping with the debugger. > >> + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ >> + data = FIELD_DP32(data, VDATA, VM, a->vm); \ >> + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ > Why are these replicated in each trans_* function, and not done in opiv?_trans, > where the rest of the descriptor is created? > >> +/* OPIVX without GVEC IR */ >> +#define GEN_OPIVX_TRANS(NAME, CHECK) \ >> +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ >> +{ \ >> + if (CHECK(s, a)) { \ >> + uint32_t data = 0; \ >> + static gen_helper_opivx const fns[4] = { \ >> + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ >> + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ >> + }; \ >> + \ >> + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ >> + data = FIELD_DP32(data, VDATA, VM, a->vm); \ >> + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ >> + return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \ >> + } \ >> + return false; \ >> +} >> + >> +GEN_OPIVX_TRANS(vrsub_vx, opivx_check) > Note that you *can* generate vector code for this, > you just have to write your own helpers. > > E.g. > > static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 a) > { > tcg_gen_vec_sub8_i64(d, b, a); > } > // etc, reversing the arguments and passing on to sub. > > static const GVecGen2s rsub_op[4] = { > { .fni8 = tcg_gen_vec_rsub8_i64, > .fniv = tcg_gen_rsub_vec, > .fno = gen_helper_gvec_rsubs8, > .opt_opc = vecop_list_sub, > .vece = MO_8 }, > { .fni8 = tcg_gen_vec_rsub16_i64, > .fniv = tcg_gen_rsub_vec, > .fno = gen_helper_gvec_rsubs16, > .opt_opc = vecop_list_sub, > .vece = MO_16 }, > { .fni4 = tcg_gen_rsub_i32, > .fniv = tcg_gen_rsub_vec, > .fno = gen_helper_gvec_rsubs32, > .opt_opc = vecop_list_sub, > .vece = MO_32 }, > { .fni8 = tcg_gen_rsub_i64, > .fniv = tcg_gen_rsub_vec, > .fno = gen_helper_gvec_rsubs64, > .opt_opc = vecop_list_sub, > .prefer_i64 = TCG_TARGET_REG_BITS == 64, > .vece = MO_64 }, > }; > > static void gen_gvec_rsubs(unsigned vece, uint32_t dofs, > uint32_t aofs, TCGv_i64 c, > uint32_t oprsz, uint32_t maxsz) > { > tcg_debug_assert(vece <= MO_64); > tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); > } > > static void gen_gvec_rsubi(unsigned vece, uint32_t dofs, > uint32_t aofs, int64_t c, > uint32_t oprsz, uint32_t maxsz) > { > tcg_debug_assert(vece <= MO_64); > tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); > } Hi Richard, When I try to add GVEC IR rsubs,I find it is some difficult to keep it separate from tcg-runtime-gvec.c. The .fno functions, e.g., gen_helper_gvec_rsubs8 need to be defined like void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) { intptr_t oprsz = simd_oprsz(desc); vec8 vecb = (vec8)DUP16(b); intptr_t i; for (i = 0; i < oprsz; i += sizeof(vec8)) { *(vec8 *)(d + i) = vecb - *(vec8 *)(a + i); } clear_high(d, oprsz, desc); } The vec8 and DUP are defined in tcg-runtime-gvec.c. Should I declare them in somewhere else, or just put HELPER(gvec_subs8) into tcg-runtime-gvec.c? Zhiwei >> +/* generate the helpers for OPIVV */ >> +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ >> +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ >> + void *vs2, CPURISCVState *env, uint32_t desc) \ >> +{ \ >> + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ >> + uint32_t mlen = vext_mlen(desc); \ >> + uint32_t vm = vext_vm(desc); \ >> + uint32_t vl = env->vl; \ >> + uint32_t i; \ >> + for (i = 0; i < vl; i++) { \ >> + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ >> + continue; \ >> + } \ >> + do_##NAME(vd, vs1, vs2, i); \ >> + } \ >> + if (i != 0) { \ >> + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ >> + } \ >> +} >> + >> +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) >> +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) >> +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) >> +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) >> +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) >> +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) >> +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) >> +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) > The body of GEN_VEXT_VV can be an inline function, calling the helper functions > that you generated above. > >> +/* >> + * If XLEN < SEW, the value from the x register is sign-extended to SEW bits. >> + * So (target_long)s1 is need. (T1)(target_long)s1 gives the real operator type. >> + * (TX1)(T1)(target_long)s1 expands the operator type of widen operations >> + * or narrow operations >> + */ >> +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ >> +static void do_##NAME(void *vd, target_ulong s1, void *vs2, int i) \ >> +{ \ >> + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ >> + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)(target_long)s1); \ >> +} > Why not just make the type of s1 be target_long in the parameter? > >> +/* generate the helpers for instructions with one vector and one sclar */ >> +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ >> +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ >> + void *vs2, CPURISCVState *env, uint32_t desc) \ >> +{ \ >> + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ >> + uint32_t mlen = vext_mlen(desc); \ >> + uint32_t vm = vext_vm(desc); \ >> + uint32_t vl = env->vl; \ >> + uint32_t i; \ >> + \ >> + for (i = 0; i < vl; i++) { \ >> + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ >> + continue; \ >> + } \ >> + do_##NAME(vd, s1, vs2, i); \ >> + } \ >> + if (i != 0) { \ >> + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ >> + } \ >> +} > Likewise an inline function. > > > r~
On 3/23/20 1:10 AM, LIU Zhiwei wrote: >> static void gen_gvec_rsubi(unsigned vece, uint32_t dofs, >> uint32_t aofs, int64_t c, >> uint32_t oprsz, uint32_t maxsz) >> { >> tcg_debug_assert(vece <= MO_64); >> tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); >> } > Hi Richard, > > When I try to add GVEC IR rsubs,I find it is some difficult to keep it > separate from tcg-runtime-gvec.c. > > The .fno functions, e.g., gen_helper_gvec_rsubs8 need to be defined like > > void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) > > { > > intptr_t oprsz = simd_oprsz(desc); > > vec8 vecb = (vec8)DUP16(b); > > intptr_t i; > > for (i = 0; i < oprsz; i += sizeof(vec8)) { > > *(vec8 *)(d + i) = vecb - *(vec8 *)(a + i); > > } > > clear_high(d, oprsz, desc); > > } > > > The vec8 and DUP are defined in tcg-runtime-gvec.c. Update your branch -- they're gone since commit 0a83e43a9ee6. Just use normal integer types. r~
diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 70a4b05f75..e73701d4bb 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -269,3 +269,24 @@ DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 1330703720..d1034a0e61 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -44,6 +44,7 @@ &u imm rd &shift shamt rs1 rd &atomic aq rl rs2 rs1 rd +&rmrr vm rd rs1 rs2 &rwdvm vm wd rd rs1 rs2 &r2nfvm vm rd rs1 nf &rnfvm vm rd rs1 rs2 nf @@ -68,6 +69,7 @@ @r2 ....... ..... ..... ... ..... ....... %rs1 %rd @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd +@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd @@ -275,5 +277,13 @@ vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm # *** new major opcode OP-V *** +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm + vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index 3c677160c5..00c7ec976f 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -730,3 +730,223 @@ GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check) GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check) GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check) #endif + +/* + *** Vector Integer Arithmetic Instructions + */ +#define MAXSZ(s) (s->vlen >> (3 - s->lmul)) + +static bool opivv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s, RVV) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false)); +} + +/* OPIVV with GVEC IR */ +#define GEN_OPIVV_GVEC_TRANS(NAME, GVSUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (!opivv_check(s, a)) { \ + return false; \ + } \ + \ + if (a->vm && s->vl_eq_vlmax) { \ + tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd), \ + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), \ + MAXSZ(s), MAXSZ(s)); \ + } else { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), \ + cpu_env, 0, s->vlen / 8, data, fns[s->sew]); \ + } \ + return true; \ +} +GEN_OPIVV_GVEC_TRANS(vadd_vv, add) +GEN_OPIVV_GVEC_TRANS(vsub_vv, sub) + +typedef void (*gen_helper_opivx)(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, + uint32_t data, gen_helper_opivx fn, DisasContext *s) +{ + TCGv_ptr dest, src2, mask; + TCGv src1; + TCGv_i32 desc; + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + src2 = tcg_temp_new_ptr(); + src1 = tcg_temp_new(); + gen_get_gpr(src1, rs1); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, src1, src2, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(src2); + tcg_temp_free(src1); + tcg_temp_free_i32(desc); + return true; +} + +static bool opivx_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s, RVV) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false)); +} +/* OPIVX with GVEC IR */ +#define GEN_OPIVX_GVEC_TRANS(NAME, GVSUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (!opivx_check(s, a)) { \ + return false; \ + } \ + \ + if (a->vm && s->vl_eq_vlmax) { \ + TCGv_i64 src1 = tcg_temp_new_i64(); \ + TCGv tmp = tcg_temp_new(); \ + gen_get_gpr(tmp, a->rs1); \ + tcg_gen_ext_tl_i64(src1, tmp); \ + tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd), \ + vreg_ofs(s, a->rs2), src1, MAXSZ(s), MAXSZ(s)); \ + tcg_temp_free_i64(src1); \ + tcg_temp_free(tmp); \ + return true; \ + } else { \ + uint32_t data = 0; \ + static gen_helper_opivx const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \ + } \ + return true; \ +} +GEN_OPIVX_GVEC_TRANS(vadd_vx, adds) +GEN_OPIVX_GVEC_TRANS(vsub_vx, subs) + +/* OPIVX without GVEC IR */ +#define GEN_OPIVX_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opivx const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \ + } \ + return false; \ +} + +GEN_OPIVX_TRANS(vrsub_vx, opivx_check) + +static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, + uint32_t data, gen_helper_opivx fn, DisasContext *s, int zx) +{ + TCGv_ptr dest, src2, mask; + TCGv src1; + TCGv_i32 desc; + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + src2 = tcg_temp_new_ptr(); + if (zx) { + src1 = tcg_const_tl(imm); + } else { + src1 = tcg_const_tl(sextract64(imm, 0, 5)); + } + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, src1, src2, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(src2); + tcg_temp_free(src1); + tcg_temp_free_i32(desc); + return true; +} + +/* OPIVI with GVEC IR */ +#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, GVSUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (!opivx_check(s, a)) { \ + return false; \ + } \ + \ + if (a->vm && s->vl_eq_vlmax) { \ + tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd), \ + vreg_ofs(s, a->rs2), sextract64(a->rs1, 0, 5), \ + MAXSZ(s), MAXSZ(s)); \ + return true; \ + } else { \ + uint32_t data = 0; \ + static gen_helper_opivx const fns[4] = { \ + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ + }; \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + return opivi_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew], s, ZX); \ + } \ + return true; \ +} +GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi) + +/* OPIVI without GVEC IR */ +#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opivx const fns[4] = { \ + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ + }; \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + return opivi_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew], s, ZX); \ + } \ + return false; \ +} +GEN_OPIVI_TRANS(vrsub_vi, 0, vrsub_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index f9b409b169..abdf3b82a8 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -828,3 +828,125 @@ GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) + +/* + *** Vector Integer Arithmetic Instructions + */ + +/* expand macro args before macro */ +#define RVVCALL(macro, ...) macro(__VA_ARGS__) + +/* (TD, T1, T2, TX1, TX2) */ +#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t +#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t +#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t +#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t + +/* operation of two vector elements */ +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1); \ +} +#define DO_SUB(N, M) (N - M) +#define DO_RSUB(N, M) (M - N) + +RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) +RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) + +/* generate the helpers for OPIVV */ +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, vs1, vs2, i); \ + } \ + if (i != 0) { \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ + } \ +} + +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) + +/* + * If XLEN < SEW, the value from the x register is sign-extended to SEW bits. + * So (target_long)s1 is need. (T1)(target_long)s1 gives the real operator type. + * (TX1)(T1)(target_long)s1 expands the operator type of widen operations + * or narrow operations + */ +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, target_ulong s1, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)(target_long)s1); \ +} +RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) +RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) +RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) + +/* generate the helpers for instructions with one vector and one sclar */ +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, s1, vs2, i); \ + } \ + if (i != 0) { \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ + } \ +} +GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq)
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> --- target/riscv/helper.h | 21 +++ target/riscv/insn32.decode | 10 ++ target/riscv/insn_trans/trans_rvv.inc.c | 220 ++++++++++++++++++++++++ target/riscv/vector_helper.c | 122 +++++++++++++ 4 files changed, 373 insertions(+)