Message ID | 20240813113436.831-8-zhiwei_liu@linux.alibaba.com |
---|---|
State | New |
Headers | show |
Series | tcg/riscv: Add support for vector | expand |
On 8/13/24 21:34, LIU Zhiwei wrote: > @@ -641,6 +645,13 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) > case TCG_TYPE_I64: > tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0); > break; > + case TCG_TYPE_V64: > + case TCG_TYPE_V128: > + case TCG_TYPE_V256: > + tcg_debug_assert(ret > TCG_REG_V0 && arg > TCG_REG_V0); > + tcg_target_set_vec_config(s, type, prev_vece); > + tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true); I suggest these asserts be in tcg_out_opc_* That way you don't need to replicate to all uses. > +static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, > + TCGReg dst, TCGReg src) Oh, please drop all of the inline markup, from all patches. Let the compiler decide. > +static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, > + TCGReg dst, TCGReg base, intptr_t offset) > +{ > + tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset); > + return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0); > +} Is this really better than using strided load with rs2 = r0? r~
On 2024/8/14 17:11, Richard Henderson wrote: > On 8/13/24 21:34, LIU Zhiwei wrote: >> @@ -641,6 +645,13 @@ static bool tcg_out_mov(TCGContext *s, TCGType >> type, TCGReg ret, TCGReg arg) >> case TCG_TYPE_I64: >> tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0); >> break; >> + case TCG_TYPE_V64: >> + case TCG_TYPE_V128: >> + case TCG_TYPE_V256: >> + tcg_debug_assert(ret > TCG_REG_V0 && arg > TCG_REG_V0); >> + tcg_target_set_vec_config(s, type, prev_vece); >> + tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true); > > I suggest these asserts be in tcg_out_opc_* > That way you don't need to replicate to all uses. OK. > >> +static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, >> unsigned vece, >> + TCGReg dst, TCGReg src) > > Oh, please drop all of the inline markup, from all patches. > Let the compiler decide. > OK. >> +static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, >> unsigned vece, >> + TCGReg dst, TCGReg base, >> intptr_t offset) >> +{ >> + tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset); >> + return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0); >> +} > > Is this really better than using strided load with rs2 = r0? It depends. For our test board, it is. Thanks, Zhiwei > > > r~
On 8/13/24 21:34, LIU Zhiwei wrote: > + case TCG_TYPE_V64: > + case TCG_TYPE_V128: > + case TCG_TYPE_V256: > + tcg_debug_assert(ret > TCG_REG_V0 && arg > TCG_REG_V0); > + tcg_target_set_vec_config(s, type, prev_vece); > + tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true); > + break; Is it worth using whole register move (vmvNr.v) for the appropriate VLEN? r~
On 2024/8/20 17:00, Richard Henderson wrote: > On 8/13/24 21:34, LIU Zhiwei wrote: >> + case TCG_TYPE_V64: >> + case TCG_TYPE_V128: >> + case TCG_TYPE_V256: >> + tcg_debug_assert(ret > TCG_REG_V0 && arg > TCG_REG_V0); >> + tcg_target_set_vec_config(s, type, prev_vece); >> + tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true); >> + break; > > Is it worth using whole register move (vmvNr.v) for the appropriate VLEN? Yes. We will use vmvNr.v next version. Thanks for your suggestion. Zhiwei > > > r~
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index f17d679d71..f60913e805 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -288,6 +288,10 @@ typedef enum { OPC_VSE16_V = 0x5027 | V_SUMOP, OPC_VSE32_V = 0x6027 | V_SUMOP, OPC_VSE64_V = 0x7027 | V_SUMOP, + + OPC_VMV_V_V = 0x5e000057 | V_OPIVV, + OPC_VMV_V_I = 0x5e000057 | V_OPIVI, + OPC_VMV_V_X = 0x5e000057 | V_OPIVX, } RISCVInsn; /* @@ -641,6 +645,13 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) case TCG_TYPE_I64: tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0); break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + tcg_debug_assert(ret > TCG_REG_V0 && arg > TCG_REG_V0); + tcg_target_set_vec_config(s, type, prev_vece); + tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true); + break; default: g_assert_not_reached(); } @@ -977,6 +988,33 @@ static void tcg_out_addsub2(TCGContext *s, } } +static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg src) +{ + tcg_target_set_vec_config(s, type, vece); + tcg_out_opc_vx(s, OPC_VMV_V_X, dst, TCG_REG_V0, src, true); + return true; +} + +static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg base, intptr_t offset) +{ + tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset); + return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0); +} + +static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, int64_t arg) +{ + if (arg < 16 && arg >= -16) { + tcg_target_set_vec_config(s, type, vece); + tcg_out_opc_vi(s, OPC_VMV_V_I, dst, TCG_REG_V0, arg, true); + return; + } + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg); + tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0); +} + static const struct { RISCVInsn op; bool swap; @@ -2111,6 +2149,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, tcg_target_set_vec_config(s, type, vece); } switch (opc) { + case INDEX_op_dupm_vec: + tcg_out_dupm_vec(s, type, vece, a0, a1, a2); + break; case INDEX_op_ld_vec: tcg_out_ld(s, type, a0, a1, a2); break; @@ -2282,6 +2323,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_st_vec: return C_O0_I2(v, r); + case INDEX_op_dup_vec: + case INDEX_op_dupm_vec: case INDEX_op_ld_vec: return C_O1_I1(v, r); default: