Message ID | 20200312145900.2054-55-zhiwei_liu@c-sky.com |
---|---|
State | New |
Headers | show |
Series | target/riscv: support vector extension v0.7.1 | expand |
On 3/12/20 7:58 AM, LIU Zhiwei wrote: > +static bool trans_vext_x_v(DisasContext *s, arg_r *a) > +{ > + if (vext_check_isa_ill(s, RVV)) { > + TCGv_ptr src2; > + TCGv dest, src1; > + gen_helper_vext_x_v fns[4] = { > + gen_helper_vext_x_v_b, gen_helper_vext_x_v_h, > + gen_helper_vext_x_v_w, gen_helper_vext_x_v_d > + }; > + > + dest = tcg_temp_new(); > + src1 = tcg_temp_new(); > + src2 = tcg_temp_new_ptr(); > + > + gen_get_gpr(src1, a->rs1); > + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2)); > + > + fns[s->sew](dest, src2, src1, cpu_env); > + gen_set_gpr(a->rd, dest); > + > + tcg_temp_free(dest); > + tcg_temp_free(src1); > + tcg_temp_free_ptr(src2); > + return true; > + } > + return false; > +} This entire operation can be performed inline easily. static void extract_element(TCGv dest, TCGv_ptr base, int ofs, int sew) { switch (sew) { case MO_8: tcg_gen_ld8u_tl(dest, base, ofs); break; case MO_16: tcg_gen_ld16u_tl(dest, base, ofs); break; default: tcg_gen_ld32u_tl(dest, base, ofs); break; #if TARGET_LONG_BITS == 64 case MO_64: tcg_gen_ld_i64(dest, base, ofs); break; #endif } } static bool trans_vext_x_v(DisasContext *s, arg_r *a) { ... if (a->rs1 == 0) { /* Special case vmv.x.s rd, vs2. */ do_extract(dest, cpu_env, vreg_ofs(s, a->rs2), s->sew); } else { int vlen = s->vlen >> (3 + s->sew); TCGv_i32 ofs = tcg_temp_new_i32(); TCGv_ptr base = tcg_temp_new_ptr(); TCGv t_vlen, t_zero; /* Mask the index to the length so that we do not produce an out-of-range load. */ tcg_gen_trunc_tl_i32(ofs, cpu_gpr[a->rs1]); tcg_gen_andi_i32(ofs, ofs, vlen - 1); /* Convert the index to an offset. */ tcg_gen_shli_i32(ofs, ofs, s->sew); /* Convert the index to a pointer. */ tcg_gen_extu_i32_ptr(base, ofs); tcg_gen_add_ptr(base, base, cpu_env); /* Perform the load. */ do_extract(dest, base, vreg_ofs(s, a->rs2), s->sew); tcg_temp_free_ptr(base); tcg_temp_free_i32(ofs); /* Flush out-of-range indexing to zero. */ t_vlen = tcg_const_tl(vlen); t_zero = tcg_const_tl(0); tcg_gen_movcond_tl(TCG_COND_LTU, dest, cpu_gpr[a->rs1], t_vlen, dest, t_zero); tcg_temp_free(t_vlen); tcg_temp_free(t_zero); } r~
On 2020/3/15 10:53, Richard Henderson wrote: > On 3/12/20 7:58 AM, LIU Zhiwei wrote: >> +static bool trans_vext_x_v(DisasContext *s, arg_r *a) >> +{ >> + if (vext_check_isa_ill(s, RVV)) { >> + TCGv_ptr src2; >> + TCGv dest, src1; >> + gen_helper_vext_x_v fns[4] = { >> + gen_helper_vext_x_v_b, gen_helper_vext_x_v_h, >> + gen_helper_vext_x_v_w, gen_helper_vext_x_v_d >> + }; >> + >> + dest = tcg_temp_new(); >> + src1 = tcg_temp_new(); >> + src2 = tcg_temp_new_ptr(); >> + >> + gen_get_gpr(src1, a->rs1); >> + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2)); >> + >> + fns[s->sew](dest, src2, src1, cpu_env); >> + gen_set_gpr(a->rd, dest); >> + >> + tcg_temp_free(dest); >> + tcg_temp_free(src1); >> + tcg_temp_free_ptr(src2); >> + return true; >> + } >> + return false; >> +} > This entire operation can be performed inline easily. > > static void extract_element(TCGv dest, TCGv_ptr base, > int ofs, int sew) > { > switch (sew) { > case MO_8: > tcg_gen_ld8u_tl(dest, base, ofs); > break; > case MO_16: > tcg_gen_ld16u_tl(dest, base, ofs); > break; > default: > tcg_gen_ld32u_tl(dest, base, ofs); > break; > #if TARGET_LONG_BITS == 64 > case MO_64: > tcg_gen_ld_i64(dest, base, ofs); > break; > #endif > } > } > > static bool trans_vext_x_v(DisasContext *s, arg_r *a) > { > ... > if (a->rs1 == 0) { > /* Special case vmv.x.s rd, vs2. */ > do_extract(dest, cpu_env, > vreg_ofs(s, a->rs2), s->sew); > } else { > int vlen = s->vlen >> (3 + s->sew); > TCGv_i32 ofs = tcg_temp_new_i32(); > TCGv_ptr base = tcg_temp_new_ptr(); > TCGv t_vlen, t_zero; > > /* Mask the index to the length so that we do > not produce an out-of-range load. */ > tcg_gen_trunc_tl_i32(ofs, cpu_gpr[a->rs1]); > tcg_gen_andi_i32(ofs, ofs, vlen - 1); > > /* Convert the index to an offset. */ > tcg_gen_shli_i32(ofs, ofs, s->sew); In big endianess host, should I convert the index first before this statement. #ifdef HOST_WORDS_BIGENDIAN static void convert_idx(TCGv_i32 idx, int sew) { switch (sew) { case MO_8: tcg_gen_xori_i32(idx, idx, 7); break; case MO_16: tcg_gen_xori_i32(idx, idx, 3); break; case MO_32: tcg_gen_xori_i32(idx, idx, 1); break; default: break; } } #endif When convert the index to an offset, use this function first #ifdef HOST_WORDS_BIGENDIAN convert_idx(ofs, s->sew) #endif /* Convert the index to an offset. */ tcg_gen_shli_i32(ofs, ofs, s->sew) Zhiwei > /* Convert the index to a pointer. */ > tcg_gen_extu_i32_ptr(base, ofs); > tcg_gen_add_ptr(base, base, cpu_env); > > /* Perform the load. */ > do_extract(dest, base, > vreg_ofs(s, a->rs2), s->sew); > tcg_temp_free_ptr(base); > tcg_temp_free_i32(ofs); > > /* Flush out-of-range indexing to zero. */ > t_vlen = tcg_const_tl(vlen); > t_zero = tcg_const_tl(0); > tcg_gen_movcond_tl(TCG_COND_LTU, dest, cpu_gpr[a->rs1], > t_vlen, dest, t_zero); > tcg_temp_free(t_vlen); > tcg_temp_free(t_zero); > } > > r~
On 3/14/20 10:15 PM, LIU Zhiwei wrote: > > > On 2020/3/15 10:53, Richard Henderson wrote: >> On 3/12/20 7:58 AM, LIU Zhiwei wrote: >>> +static bool trans_vext_x_v(DisasContext *s, arg_r *a) >>> +{ >>> + if (vext_check_isa_ill(s, RVV)) { >>> + TCGv_ptr src2; >>> + TCGv dest, src1; >>> + gen_helper_vext_x_v fns[4] = { >>> + gen_helper_vext_x_v_b, gen_helper_vext_x_v_h, >>> + gen_helper_vext_x_v_w, gen_helper_vext_x_v_d >>> + }; >>> + >>> + dest = tcg_temp_new(); >>> + src1 = tcg_temp_new(); >>> + src2 = tcg_temp_new_ptr(); >>> + >>> + gen_get_gpr(src1, a->rs1); >>> + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2)); >>> + >>> + fns[s->sew](dest, src2, src1, cpu_env); >>> + gen_set_gpr(a->rd, dest); >>> + >>> + tcg_temp_free(dest); >>> + tcg_temp_free(src1); >>> + tcg_temp_free_ptr(src2); >>> + return true; >>> + } >>> + return false; >>> +} >> This entire operation can be performed inline easily. >> >> static void extract_element(TCGv dest, TCGv_ptr base, >> int ofs, int sew) >> { >> switch (sew) { >> case MO_8: >> tcg_gen_ld8u_tl(dest, base, ofs); >> break; >> case MO_16: >> tcg_gen_ld16u_tl(dest, base, ofs); >> break; >> default: >> tcg_gen_ld32u_tl(dest, base, ofs); >> break; >> #if TARGET_LONG_BITS == 64 >> case MO_64: >> tcg_gen_ld_i64(dest, base, ofs); >> break; >> #endif >> } >> } >> >> static bool trans_vext_x_v(DisasContext *s, arg_r *a) >> { >> ... >> if (a->rs1 == 0) { >> /* Special case vmv.x.s rd, vs2. */ >> do_extract(dest, cpu_env, >> vreg_ofs(s, a->rs2), s->sew); >> } else { >> int vlen = s->vlen >> (3 + s->sew); >> TCGv_i32 ofs = tcg_temp_new_i32(); >> TCGv_ptr base = tcg_temp_new_ptr(); >> TCGv t_vlen, t_zero; >> >> /* Mask the index to the length so that we do >> not produce an out-of-range load. */ >> tcg_gen_trunc_tl_i32(ofs, cpu_gpr[a->rs1]); >> tcg_gen_andi_i32(ofs, ofs, vlen - 1); >> >> /* Convert the index to an offset. */ >> tcg_gen_shli_i32(ofs, ofs, s->sew); > > In big endianess host, should I convert the index first before this statement. > > #ifdef HOST_WORDS_BIGENDIAN > static void convert_idx(TCGv_i32 idx, int sew) > { > switch (sew) { > case MO_8: > tcg_gen_xori_i32(idx, idx, 7); > break; > case MO_16: > tcg_gen_xori_i32(idx, idx, 3); > break; > case MO_32: > tcg_gen_xori_i32(idx, idx, 1); > break; > default: > break; > } > } > #endif > > > When convert the index to an offset, use this function first > > #ifdef HOST_WORDS_BIGENDIAN > convert_idx(ofs, s->sew) > #endif Yes, I forgot about endian adjust. I would say static void endian_adjust(TCGv_i32 ofs, int sew) { #ifdef HOST_WORDS_BIGENDIAN tcg_gen_xori_i32(ofs, ofs, 7 >> sew); #endif } so that you don't need the ifdef at the use site. r~
diff --git a/target/riscv/helper.h b/target/riscv/helper.h index e3f2970221..d94347a9a5 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1101,3 +1101,8 @@ DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32) DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32) DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32) DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32) + +DEF_HELPER_3(vext_x_v_b, tl, ptr, tl, env) +DEF_HELPER_3(vext_x_v_h, tl, ptr, tl, env) +DEF_HELPER_3(vext_x_v_w, tl, ptr, tl, env) +DEF_HELPER_3(vext_x_v_d, tl, ptr, tl, env) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 1504059415..c26a186d6a 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -555,6 +555,7 @@ vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index 1ff72a6406..46651dfb10 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -2210,3 +2210,36 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) } return false; } + +/* + *** Vector Permutation Instructions + */ +/* Integer Extract Instruction */ +typedef void (* gen_helper_vext_x_v)(TCGv, TCGv_ptr, TCGv, TCGv_env); +static bool trans_vext_x_v(DisasContext *s, arg_r *a) +{ + if (vext_check_isa_ill(s, RVV)) { + TCGv_ptr src2; + TCGv dest, src1; + gen_helper_vext_x_v fns[4] = { + gen_helper_vext_x_v_b, gen_helper_vext_x_v_h, + gen_helper_vext_x_v_w, gen_helper_vext_x_v_d + }; + + dest = tcg_temp_new(); + src1 = tcg_temp_new(); + src2 = tcg_temp_new_ptr(); + + gen_get_gpr(src1, a->rs1); + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2)); + + fns[s->sew](dest, src2, src1, cpu_env); + gen_set_gpr(a->rd, dest); + + tcg_temp_free(dest); + tcg_temp_free(src1); + tcg_temp_free_ptr(src2); + return true; + } + return false; +} diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index ff3b60e9c8..8704ee120f 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4440,3 +4440,23 @@ GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb) GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh) GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl) GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq) + +/* + *** Vector Permutation Instructions + */ +/* Integer Extract Instruction */ +#define GEN_VEXT_X_V(NAME, ETYPE, H) \ +target_ulong HELPER(NAME)(void *vs2, target_ulong s1, \ + CPURISCVState *env) \ +{ \ + uint32_t vlen = env_archcpu(env)->cfg.vlen / 8; \ + \ + if (s1 >= vlen / sizeof(ETYPE)) { \ + return 0; \ + } \ + return *((ETYPE *)vs2 + s1); \ +} +GEN_VEXT_X_V(vext_x_v_b, uint8_t, H1) +GEN_VEXT_X_V(vext_x_v_h, uint16_t, H2) +GEN_VEXT_X_V(vext_x_v_w, uint32_t, H4) +GEN_VEXT_X_V(vext_x_v_d, uint64_t, H8)
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> --- target/riscv/helper.h | 5 ++++ target/riscv/insn32.decode | 1 + target/riscv/insn_trans/trans_rvv.inc.c | 33 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 20 +++++++++++++++ 4 files changed, 59 insertions(+)