Message ID | 1626861198-6133-10-git-send-email-gaosong@loongson.cn |
---|---|
State | New |
Headers | show |
Series | Add LoongArch linux-user emulation support | expand |
On 7/21/21 11:53 AM, Song Gao wrote: > This patch implement fixed point bit instruction translation. > > This includes: > - EXT.W.{B/H} > - CL{O/Z}.{W/D}, CT{O/Z}.{W/D} > - BYTEPICK.{W/D} > - REVB.{2H/4H/2W/D} > - REVH.{2W/D} > - BITREV.{4B/8B}, BITREV.{W/D} > - BSTRINS.{W/D}, BSTRPICK.{W/D} > - MASKEQZ, MASKNEZ > > Signed-off-by: Song Gao <gaosong@loongson.cn> > --- > target/loongarch/helper.h | 10 + > target/loongarch/insns.decode | 45 +++ > target/loongarch/op_helper.c | 119 ++++++++ > target/loongarch/trans.inc.c | 665 ++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 839 insertions(+) > diff --git a/target/loongarch/op_helper.c b/target/loongarch/op_helper.c > index b2cbdd7..07c3d52 100644 > --- a/target/loongarch/op_helper.c > +++ b/target/loongarch/op_helper.c > @@ -25,3 +25,122 @@ void helper_raise_exception(CPULoongArchState *env, uint32_t exception) > { > do_raise_exception(env, exception, GETPC()); > } > + > +target_ulong helper_cto_w(CPULoongArchState *env, target_ulong rj) > +{ > + uint32_t v = (uint32_t)rj; > + int temp = 0; > + > + while ((v & 0x1) == 1) { > + temp++; > + v = v >> 1; > + } Why not use cto32() from "qemu/host-utils.h" > + > + return (target_ulong)temp; > +} > + > +target_ulong helper_ctz_w(CPULoongArchState *env, target_ulong rj) > +{ > + uint32_t v = (uint32_t)rj; > + > + if (v == 0) { > + return 32; > + } > + > + int temp = 0; > + while ((v & 0x1) == 0) { > + temp++; > + v = v >> 1; > + } ctz32 > + > + return (target_ulong)temp; > +} > + > +target_ulong helper_cto_d(CPULoongArchState *env, target_ulong rj) > +{ > + uint64_t v = rj; > + int temp = 0; > + > + while ((v & 0x1) == 1) { > + temp++; > + v = v >> 1; > + } cto64 > + > + return (target_ulong)temp; > +} > + > +target_ulong helper_ctz_d(CPULoongArchState *env, target_ulong rj) > +{ > + uint64_t v = rj; > + > + if (v == 0) { > + return 64; > + } > + > + int temp = 0; > + while ((v & 0x1) == 0) { > + temp++; > + v = v >> 1; > + } and ctz64? > + > + return (target_ulong)temp; > +} > + > +target_ulong helper_bitrev_w(CPULoongArchState *env, target_ulong rj) > +{ > + int32_t v = (int32_t)rj; > + const int SIZE = 32; > + uint8_t bytes[SIZE]; > + > + int i; > + for (i = 0; i < SIZE; i++) { > + bytes[i] = v & 0x1; > + v = v >> 1; > + } > + /* v == 0 */ > + for (i = 0; i < SIZE; i++) { > + v = v | ((uint32_t)bytes[i] << (SIZE - 1 - i)); > + } > + > + return (target_ulong)(int32_t)v; > +} > + > +target_ulong helper_bitrev_d(CPULoongArchState *env, target_ulong rj) > +{ > + uint64_t v = rj; > + const int SIZE = 64; > + uint8_t bytes[SIZE]; > + > + int i; > + for (i = 0; i < SIZE; i++) { > + bytes[i] = v & 0x1; > + v = v >> 1; > + } > + /* v == 0 */ > + for (i = 0; i < SIZE; i++) { > + v = v | ((uint64_t)bytes[i] << (SIZE - 1 - i)); > + } > + > + return (target_ulong)v; > +} > + > +static inline target_ulong bitswap(target_ulong v) > +{ > + v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) | > + ((v & (target_ulong)0x5555555555555555ULL) << 1); > + v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) | > + ((v & (target_ulong)0x3333333333333333ULL) << 2); > + v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) | > + ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4); > + return v; Is this revbit64? > +} > + > +target_ulong helper_loongarch_dbitswap(target_ulong rj) > +{ > + return bitswap(rj); > +} > + > +target_ulong helper_loongarch_bitswap(target_ulong rt) > +{ > + return (int32_t)bitswap(rt); > +}
Hi, Philippe On 07/22/2021 01:46 AM, Philippe Mathieu-Daudé wrote: > On 7/21/21 11:53 AM, Song Gao wrote: >> This patch implement fixed point bit instruction translation. >> >> This includes: >> - EXT.W.{B/H} >> - CL{O/Z}.{W/D}, CT{O/Z}.{W/D} >> - BYTEPICK.{W/D} >> - REVB.{2H/4H/2W/D} >> - REVH.{2W/D} >> - BITREV.{4B/8B}, BITREV.{W/D} >> - BSTRINS.{W/D}, BSTRPICK.{W/D} >> - MASKEQZ, MASKNEZ >> >> Signed-off-by: Song Gao <gaosong@loongson.cn> >> --- >> target/loongarch/helper.h | 10 + >> target/loongarch/insns.decode | 45 +++ >> target/loongarch/op_helper.c | 119 ++++++++ >> target/loongarch/trans.inc.c | 665 ++++++++++++++++++++++++++++++++++++++++++ >> 4 files changed, 839 insertions(+) > >> diff --git a/target/loongarch/op_helper.c b/target/loongarch/op_helper.c >> index b2cbdd7..07c3d52 100644 >> --- a/target/loongarch/op_helper.c >> +++ b/target/loongarch/op_helper.c >> @@ -25,3 +25,122 @@ void helper_raise_exception(CPULoongArchState *env, uint32_t exception) >> { >> do_raise_exception(env, exception, GETPC()); >> } >> + >> +target_ulong helper_cto_w(CPULoongArchState *env, target_ulong rj) >> +{ >> + uint32_t v = (uint32_t)rj; >> + int temp = 0; >> + >> + while ((v & 0x1) == 1) { >> + temp++; >> + v = v >> 1; >> + } > > Why not use cto32() from "qemu/host-utils.h" >>> + >> + return (target_ulong)temp; >> +} >> + >> +target_ulong helper_ctz_w(CPULoongArchState *env, target_ulong rj) >> +{ >> + uint32_t v = (uint32_t)rj; >> + >> + if (v == 0) { >> + return 32; >> + } >> + >> + int temp = 0; >> + while ((v & 0x1) == 0) { >> + temp++; >> + v = v >> 1; >> + } > > ctz32 > >> + >> + return (target_ulong)temp; >> +} >> + >> +target_ulong helper_cto_d(CPULoongArchState *env, target_ulong rj) >> +{ >> + uint64_t v = rj; >> + int temp = 0; >> + >> + while ((v & 0x1) == 1) { >> + temp++; >> + v = v >> 1; >> + } > > cto64 > >> + >> + return (target_ulong)temp; >> +} >> + >> +target_ulong helper_ctz_d(CPULoongArchState *env, target_ulong rj) >> +{ >> + uint64_t v = rj; >> + >> + if (v == 0) { >> + return 64; >> + } >> + >> + int temp = 0; >> + while ((v & 0x1) == 0) { >> + temp++; >> + v = v >> 1; >> + } > > and ctz64? > Yes, I didn't notice the file "qemu/host-utils.h" before, thanks for kindly help! >> + >> + return (target_ulong)temp; >> +} >> + >> +target_ulong helper_bitrev_w(CPULoongArchState *env, target_ulong rj) >> +{ >> + int32_t v = (int32_t)rj; >> + const int SIZE = 32; >> + uint8_t bytes[SIZE]; >> + >> + int i; >> + for (i = 0; i < SIZE; i++) { >> + bytes[i] = v & 0x1; >> + v = v >> 1; >> + } >> + /* v == 0 */ >> + for (i = 0; i < SIZE; i++) { >> + v = v | ((uint32_t)bytes[i] << (SIZE - 1 - i)); >> + } >> + >> + return (target_ulong)(int32_t)v; >> +} >> + >> +target_ulong helper_bitrev_d(CPULoongArchState *env, target_ulong rj) >> +{ >> + uint64_t v = rj; >> + const int SIZE = 64; >> + uint8_t bytes[SIZE]; >> + >> + int i; >> + for (i = 0; i < SIZE; i++) { >> + bytes[i] = v & 0x1; >> + v = v >> 1; >> + } >> + /* v == 0 */ >> + for (i = 0; i < SIZE; i++) { >> + v = v | ((uint64_t)bytes[i] << (SIZE - 1 - i)); >> + } >> + >> + return (target_ulong)v; >> +} >> + >> +static inline target_ulong bitswap(target_ulong v) >> +{ >> + v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) | >> + ((v & (target_ulong)0x5555555555555555ULL) << 1); >> + v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) | >> + ((v & (target_ulong)0x3333333333333333ULL) << 2); >> + v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) | >> + ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4); >> + return v; > > Is this revbit64? > No, helper_bitrev_d is revbit64(LoongArch insn is 'bitrev.d rd, rj'). bitswap function for 'bitrev.4b/8b rd, rj' instruction. BITREV.4B: bstr32[31:24] = BITREV(GR[rj][31:24]) bstr32[23:16] = BITREV(GR[rj][23:16]) bstr32[15: 8] = BITREV(GR[rj][15: 8]) bstr32[ 7: 0] = BITREV(GR[rj][ 7: 0]) GR[rd] = SignExtend(bstr32, GRLEN) BITREV.8B: GR[rd][63:56] = BITREV(GR[rj][63:56]) GR[rd][55:48] = BITREV(GR[rj][55:48]) GR[rd][47:40] = BITREV(GR[rj][47:40]) GR[rd][39:32] = BITREV(GR[rj][39:32]) GR[rd][31:24] = BITREV(GR[rj][31:24]) GR[rd][23:16] = BITREV(GR[rj][23:16]) GR[rd][15: 8] = BITREV(GR[rj][15: 8]) GR[rd][ 7: 0] = BITREV(GR[rj][ 7: 0]) We can see a detailed introduction in [1] 2.2.3.6. [1] : https://github.com/loongson/LoongArch-Documentation/releases/download/LoongArch-Vol1-v3/LoongArch-Vol1-v1.00-EN.pdf Thanks Song Gao
On 7/20/21 11:53 PM, Song Gao wrote: > This patch implement fixed point bit instruction translation. > > This includes: > - EXT.W.{B/H} > - CL{O/Z}.{W/D}, CT{O/Z}.{W/D} > - BYTEPICK.{W/D} > - REVB.{2H/4H/2W/D} > - REVH.{2W/D} > - BITREV.{4B/8B}, BITREV.{W/D} > - BSTRINS.{W/D}, BSTRPICK.{W/D} > - MASKEQZ, MASKNEZ > > Signed-off-by: Song Gao <gaosong@loongson.cn> > --- > target/loongarch/helper.h | 10 + > target/loongarch/insns.decode | 45 +++ > target/loongarch/op_helper.c | 119 ++++++++ > target/loongarch/trans.inc.c | 665 ++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 839 insertions(+) > > diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h > index 6c7e19b..bbbcc26 100644 > --- a/target/loongarch/helper.h > +++ b/target/loongarch/helper.h > @@ -8,3 +8,13 @@ > > DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int) > DEF_HELPER_2(raise_exception, noreturn, env, i32) > + > +DEF_HELPER_2(cto_w, tl, env, tl) > +DEF_HELPER_2(ctz_w, tl, env, tl) > +DEF_HELPER_2(cto_d, tl, env, tl) > +DEF_HELPER_2(ctz_d, tl, env, tl) The count leading and trailing zero operations are built into tcg. Count leading and trailing one simply needs a NOT operation to convert it to zero. > +DEF_HELPER_2(bitrev_w, tl, env, tl) > +DEF_HELPER_2(bitrev_d, tl, env, tl) These should use TCG_CALL_NO_RWG_SE. > +target_ulong helper_bitrev_w(CPULoongArchState *env, target_ulong rj) > +{ > + int32_t v = (int32_t)rj; > + const int SIZE = 32; > + uint8_t bytes[SIZE]; > + > + int i; > + for (i = 0; i < SIZE; i++) { > + bytes[i] = v & 0x1; > + v = v >> 1; > + } > + /* v == 0 */ > + for (i = 0; i < SIZE; i++) { > + v = v | ((uint32_t)bytes[i] << (SIZE - 1 - i)); > + } > + > + return (target_ulong)(int32_t)v; > +} return (int32_t)revbit32(rj); > +target_ulong helper_bitrev_d(CPULoongArchState *env, target_ulong rj) > +{ > + uint64_t v = rj; > + const int SIZE = 64; > + uint8_t bytes[SIZE]; > + > + int i; > + for (i = 0; i < SIZE; i++) { > + bytes[i] = v & 0x1; > + v = v >> 1; > + } > + /* v == 0 */ > + for (i = 0; i < SIZE; i++) { > + v = v | ((uint64_t)bytes[i] << (SIZE - 1 - i)); > + } > + > + return (target_ulong)v; > +} return revbit64(rj); > +static inline target_ulong bitswap(target_ulong v) > +{ > + v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) | > + ((v & (target_ulong)0x5555555555555555ULL) << 1); > + v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) | > + ((v & (target_ulong)0x3333333333333333ULL) << 2); > + v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) | > + ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4); > + return v; > +} > + > +target_ulong helper_loongarch_dbitswap(target_ulong rj) > +{ > + return bitswap(rj); > +} > + > +target_ulong helper_loongarch_bitswap(target_ulong rt) > +{ > + return (int32_t)bitswap(rt); > +} I assume these are fpr the bitrev.4b and bitrev.8b insns? It would be better to name them correctly. > +/* Fixed point bit operation instruction translation */ > +static bool trans_ext_w_h(DisasContext *ctx, arg_ext_w_h *a) > +{ > + TCGv t0; > + TCGv Rd = cpu_gpr[a->rd]; > + > + if (a->rd == 0) { > + /* Nop */ > + return true; > + } > + > + t0 = get_gpr(a->rj); > + > + tcg_gen_ext16s_tl(Rd, t0); Again, you should have a common routine for handling these unary operations. > +static bool trans_clo_w(DisasContext *ctx, arg_clo_w *a) > +{ > + TCGv Rd = cpu_gpr[a->rd]; > + > + if (a->rd == 0) { > + /* Nop */ > + return true; > + } > + > + gen_load_gpr(Rd, a->rj); > + > + tcg_gen_not_tl(Rd, Rd); > + tcg_gen_ext32u_tl(Rd, Rd); > + tcg_gen_clzi_tl(Rd, Rd, TARGET_LONG_BITS); > + tcg_gen_subi_tl(Rd, Rd, TARGET_LONG_BITS - 32); So, you're actually using the tcg builtins here, and the helper you created isn't used. > +static bool trans_cto_w(DisasContext *ctx, arg_cto_w *a) > +{ > + TCGv t0; > + TCGv Rd = cpu_gpr[a->rd]; > + > + if (a->rd == 0) { > + /* Nop */ > + return true; > + } > + > + t0 = tcg_temp_new(); > + gen_load_gpr(t0, a->rj); > + > + gen_helper_cto_w(Rd, cpu_env, t0); Here you should have used the tcg builtin. > +static bool trans_ctz_w(DisasContext *ctx, arg_ctz_w *a) > +{ > + TCGv t0; > + TCGv Rd = cpu_gpr[a->rd]; > + > + if (a->rd == 0) { > + /* Nop */ > + return true; > + } > + > + t0 = tcg_temp_new(); > + gen_load_gpr(t0, a->rj); > + > + gen_helper_ctz_w(Rd, cpu_env, t0); Likewise. > +static bool trans_revb_2w(DisasContext *ctx, arg_revb_2w *a) > +{ > + TCGv_i64 t0, t1, t2; > + TCGv Rd = cpu_gpr[a->rd]; > + > + if (a->rd == 0) { > + /* Nop */ > + return true; > + } > + > + t0 = tcg_temp_new_i64(); > + t1 = tcg_temp_new_i64(); > + t2 = get_gpr(a->rj); > + > + gen_load_gpr(t0, a->rd); > + > + tcg_gen_ext32u_i64(t1, t2); > + tcg_gen_bswap32_i64(t0, t1); > + tcg_gen_shri_i64(t1, t2, 32); > + tcg_gen_bswap32_i64(t1, t1); > + tcg_gen_concat32_i64(Rd, t0, t1); tcg_gen_bswap64_i64(Rd, Rj) tcg_gen_rotri_i64(Rd, Rd, 32); > +static bool trans_bytepick_d(DisasContext *ctx, arg_bytepick_d *a) > +{ > + TCGv t0; > + TCGv Rd = cpu_gpr[a->rd]; > + > + if (a->rd == 0) { > + /* Nop */ > + return true; > + } > + > + t0 = tcg_temp_new(); > + > + check_loongarch_64(ctx); > + if (a->sa3 == 0 || ((a->sa3) * 8) == 64) { > + if (a->sa3 == 0) { > + gen_load_gpr(t0, a->rk); > + } else { > + gen_load_gpr(t0, a->rj); > + } > + tcg_gen_mov_tl(Rd, t0); > + } else { > + TCGv t1 = tcg_temp_new(); > + > + gen_load_gpr(t0, a->rk); > + gen_load_gpr(t1, a->rj); > + > + tcg_gen_shli_tl(t0, t0, ((a->sa3) * 8)); > + tcg_gen_shri_tl(t1, t1, 64 - ((a->sa3) * 8)); > + tcg_gen_or_tl(Rd, t1, t0); > + > + tcg_temp_free(t1); > + } tcg_gen_extract2_i64(Rd, Rk, Rj, a->sa3 * 8); r~
Hi, Richard. On 07/23/2021 09:29 AM, Richard Henderson wrote: > On 7/20/21 11:53 PM, Song Gao wrote: >> This patch implement fixed point bit instruction translation. >> >> This includes: >> - EXT.W.{B/H} >> - CL{O/Z}.{W/D}, CT{O/Z}.{W/D} >> - BYTEPICK.{W/D} >> - REVB.{2H/4H/2W/D} >> - REVH.{2W/D} >> - BITREV.{4B/8B}, BITREV.{W/D} >> - BSTRINS.{W/D}, BSTRPICK.{W/D} >> - MASKEQZ, MASKNEZ >> >> Signed-off-by: Song Gao <gaosong@loongson.cn> >> --- >> target/loongarch/helper.h | 10 + >> target/loongarch/insns.decode | 45 +++ >> target/loongarch/op_helper.c | 119 ++++++++ >> target/loongarch/trans.inc.c | 665 ++++++++++++++++++++++++++++++++++++++++++ >> 4 files changed, 839 insertions(+) >> >> diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h >> index 6c7e19b..bbbcc26 100644 >> --- a/target/loongarch/helper.h >> +++ b/target/loongarch/helper.h >> @@ -8,3 +8,13 @@ >> DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int) >> DEF_HELPER_2(raise_exception, noreturn, env, i32) >> + >> +DEF_HELPER_2(cto_w, tl, env, tl) >> +DEF_HELPER_2(ctz_w, tl, env, tl) >> +DEF_HELPER_2(cto_d, tl, env, tl) >> +DEF_HELPER_2(ctz_d, tl, env, tl) > > The count leading and trailing zero operations are built into tcg. Count leading and trailing one simply needs a NOT operation to convert it to zero. > My understanding is this: cto -> NOT operation (tcg_gen_not_tl) -> ctz, is right? >> +DEF_HELPER_2(bitrev_w, tl, env, tl) >> +DEF_HELPER_2(bitrev_d, tl, env, tl) > > These should use TCG_CALL_NO_RWG_SE. > >> +target_ulong helper_bitrev_w(CPULoongArchState *env, target_ulong rj) >> +{ >> + int32_t v = (int32_t)rj; >> + const int SIZE = 32; >> + uint8_t bytes[SIZE]; >> + >> + int i; >> + for (i = 0; i < SIZE; i++) { >> + bytes[i] = v & 0x1; >> + v = v >> 1; >> + } >> + /* v == 0 */ >> + for (i = 0; i < SIZE; i++) { >> + v = v | ((uint32_t)bytes[i] << (SIZE - 1 - i)); >> + } >> + >> + return (target_ulong)(int32_t)v; >> +} > > return (int32_t)revbit32(rj); > > OK. >> +target_ulong helper_bitrev_d(CPULoongArchState *env, target_ulong rj) >> +{ >> + uint64_t v = rj; >> + const int SIZE = 64; >> + uint8_t bytes[SIZE]; >> + >> + int i; >> + for (i = 0; i < SIZE; i++) { >> + bytes[i] = v & 0x1; >> + v = v >> 1; >> + } >> + /* v == 0 */ >> + for (i = 0; i < SIZE; i++) { >> + v = v | ((uint64_t)bytes[i] << (SIZE - 1 - i)); >> + } >> + >> + return (target_ulong)v; >> +} > > return revbit64(rj); > OK. >> +static inline target_ulong bitswap(target_ulong v) >> +{ >> + v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) | >> + ((v & (target_ulong)0x5555555555555555ULL) << 1); >> + v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) | >> + ((v & (target_ulong)0x3333333333333333ULL) << 2); >> + v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) | >> + ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4); >> + return v; >> +} >> + >> +target_ulong helper_loongarch_dbitswap(target_ulong rj) >> +{ >> + return bitswap(rj); >> +} >> + >> +target_ulong helper_loongarch_bitswap(target_ulong rt) >> +{ >> + return (int32_t)bitswap(rt); >> +} > > I assume these are fpr the bitrev.4b and bitrev.8b insns? > It would be better to name them correctly. > > Yes. >> +/* Fixed point bit operation instruction translation */ >> +static bool trans_ext_w_h(DisasContext *ctx, arg_ext_w_h *a) >> +{ >> + TCGv t0; >> + TCGv Rd = cpu_gpr[a->rd]; >> + >> + if (a->rd == 0) { >> + /* Nop */ >> + return true; >> + } >> + >> + t0 = get_gpr(a->rj); >> + >> + tcg_gen_ext16s_tl(Rd, t0); > > Again, you should have a common routine for handling these unary operations. > OK. >> +static bool trans_clo_w(DisasContext *ctx, arg_clo_w *a) >> +{ >> + TCGv Rd = cpu_gpr[a->rd]; >> + >> + if (a->rd == 0) { >> + /* Nop */ >> + return true; >> + } >> + >> + gen_load_gpr(Rd, a->rj); >> + >> + tcg_gen_not_tl(Rd, Rd); >> + tcg_gen_ext32u_tl(Rd, Rd); >> + tcg_gen_clzi_tl(Rd, Rd, TARGET_LONG_BITS); >> + tcg_gen_subi_tl(Rd, Rd, TARGET_LONG_BITS - 32); > > So, you're actually using the tcg builtins here, and the helper you created isn't used. > Yes. >> +static bool trans_cto_w(DisasContext *ctx, arg_cto_w *a) >> +{ >> + TCGv t0; >> + TCGv Rd = cpu_gpr[a->rd]; >> + >> + if (a->rd == 0) { >> + /* Nop */ >> + return true; >> + } >> + >> + t0 = tcg_temp_new(); >> + gen_load_gpr(t0, a->rj); >> + >> + gen_helper_cto_w(Rd, cpu_env, t0); > > Here you should have used the tcg builtin. > OK. >> +static bool trans_ctz_w(DisasContext *ctx, arg_ctz_w *a) >> +{ >> + TCGv t0; >> + TCGv Rd = cpu_gpr[a->rd]; >> + >> + if (a->rd == 0) { >> + /* Nop */ >> + return true; >> + } >> + >> + t0 = tcg_temp_new(); >> + gen_load_gpr(t0, a->rj); >> + >> + gen_helper_ctz_w(Rd, cpu_env, t0); > > Likewise. > >> +static bool trans_revb_2w(DisasContext *ctx, arg_revb_2w *a) >> +{ >> + TCGv_i64 t0, t1, t2; >> + TCGv Rd = cpu_gpr[a->rd]; >> + >> + if (a->rd == 0) { >> + /* Nop */ >> + return true; >> + } >> + >> + t0 = tcg_temp_new_i64(); >> + t1 = tcg_temp_new_i64(); >> + t2 = get_gpr(a->rj); >> + >> + gen_load_gpr(t0, a->rd); >> + >> + tcg_gen_ext32u_i64(t1, t2); >> + tcg_gen_bswap32_i64(t0, t1); >> + tcg_gen_shri_i64(t1, t2, 32); >> + tcg_gen_bswap32_i64(t1, t1); >> + tcg_gen_concat32_i64(Rd, t0, t1); > > tcg_gen_bswap64_i64(Rd, Rj) > tcg_gen_rotri_i64(Rd, Rd, 32); > OK. >> +static bool trans_bytepick_d(DisasContext *ctx, arg_bytepick_d *a) >> +{ >> + TCGv t0; >> + TCGv Rd = cpu_gpr[a->rd]; >> + >> + if (a->rd == 0) { >> + /* Nop */ >> + return true; >> + } >> + >> + t0 = tcg_temp_new(); >> + >> + check_loongarch_64(ctx); >> + if (a->sa3 == 0 || ((a->sa3) * 8) == 64) { >> + if (a->sa3 == 0) { >> + gen_load_gpr(t0, a->rk); >> + } else { >> + gen_load_gpr(t0, a->rj); >> + } >> + tcg_gen_mov_tl(Rd, t0); >> + } else { >> + TCGv t1 = tcg_temp_new(); >> + >> + gen_load_gpr(t0, a->rk); >> + gen_load_gpr(t1, a->rj); >> + >> + tcg_gen_shli_tl(t0, t0, ((a->sa3) * 8)); >> + tcg_gen_shri_tl(t1, t1, 64 - ((a->sa3) * 8)); >> + tcg_gen_or_tl(Rd, t1, t0); >> + >> + tcg_temp_free(t1); >> + } > > tcg_gen_extract2_i64(Rd, Rk, Rj, a->sa3 * 8); > OK Thank you kindly help. Thanks Song Gao.
On 7/26/21 2:22 AM, Song Gao wrote: > Hi, Richard. > > On 07/23/2021 09:29 AM, Richard Henderson wrote: >> On 7/20/21 11:53 PM, Song Gao wrote: >>> This patch implement fixed point bit instruction translation. >>> >>> This includes: >>> - EXT.W.{B/H} >>> - CL{O/Z}.{W/D}, CT{O/Z}.{W/D} >>> - BYTEPICK.{W/D} >>> - REVB.{2H/4H/2W/D} >>> - REVH.{2W/D} >>> - BITREV.{4B/8B}, BITREV.{W/D} >>> - BSTRINS.{W/D}, BSTRPICK.{W/D} >>> - MASKEQZ, MASKNEZ >>> >>> Signed-off-by: Song Gao <gaosong@loongson.cn> >>> --- >>> target/loongarch/helper.h | 10 + >>> target/loongarch/insns.decode | 45 +++ >>> target/loongarch/op_helper.c | 119 ++++++++ >>> target/loongarch/trans.inc.c | 665 ++++++++++++++++++++++++++++++++++++++++++ >>> 4 files changed, 839 insertions(+) >>> >>> diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h >>> index 6c7e19b..bbbcc26 100644 >>> --- a/target/loongarch/helper.h >>> +++ b/target/loongarch/helper.h >>> @@ -8,3 +8,13 @@ >>> DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int) >>> DEF_HELPER_2(raise_exception, noreturn, env, i32) >>> + >>> +DEF_HELPER_2(cto_w, tl, env, tl) >>> +DEF_HELPER_2(ctz_w, tl, env, tl) >>> +DEF_HELPER_2(cto_d, tl, env, tl) >>> +DEF_HELPER_2(ctz_d, tl, env, tl) >> >> The count leading and trailing zero operations are built into tcg. Count leading and trailing one simply needs a NOT operation to convert it to zero. >> > > My understanding is this: > > cto -> NOT operation (tcg_gen_not_tl) -> ctz, > > is right? Yes. r~
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h index 6c7e19b..bbbcc26 100644 --- a/target/loongarch/helper.h +++ b/target/loongarch/helper.h @@ -8,3 +8,13 @@ DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int) DEF_HELPER_2(raise_exception, noreturn, env, i32) + +DEF_HELPER_2(cto_w, tl, env, tl) +DEF_HELPER_2(ctz_w, tl, env, tl) +DEF_HELPER_2(cto_d, tl, env, tl) +DEF_HELPER_2(ctz_d, tl, env, tl) +DEF_HELPER_2(bitrev_w, tl, env, tl) +DEF_HELPER_2(bitrev_d, tl, env, tl) + +DEF_HELPER_FLAGS_1(loongarch_bitswap, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_1(loongarch_dbitswap, TCG_CALL_NO_RWG_SE, tl, tl) diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode index 9302576..ec599a9 100644 --- a/target/loongarch/insns.decode +++ b/target/loongarch/insns.decode @@ -13,12 +13,17 @@ %rj 5:5 %rk 10:5 %sa2 15:2 +%sa3 15:3 %si12 10:s12 %ui12 10:12 %si16 10:s16 %si20 5:s20 %ui5 10:5 %ui6 10:6 +%msbw 16:5 +%lsbw 10:5 +%msbd 16:6 +%lsbd 10:6 # # Argument sets @@ -31,6 +36,10 @@ &fmt_rdsi20 rd si20 &fmt_rdrjui5 rd rj ui5 &fmt_rdrjui6 rd rj ui6 +&fmt_rdrj rd rj +&fmt_rdrjrksa3 rd rj rk sa3 +&fmt_rdrjmsbwlsbw rd rj msbw lsbw +&fmt_rdrjmsbdlsbd rd rj msbd lsbd # # Formats @@ -43,6 +52,10 @@ @fmt_rdsi20 .... ... .................... ..... &fmt_rdsi20 %rd %si20 @fmt_rdrjui5 .... ........ ..... ..... ..... ..... &fmt_rdrjui5 %rd %rj %ui5 @fmt_rdrjui6 .... ........ .... ...... ..... ..... &fmt_rdrjui6 %rd %rj %ui6 +@fmt_rdrj .... ........ ..... ..... ..... ..... &fmt_rdrj %rd %rj +@fmt_rdrjmsbwlsbw .... ....... ..... . ..... ..... ..... &fmt_rdrjmsbwlsbw %rd %rj %msbw %lsbw +@fmt_rdrjmsbdlsbd .... ...... ...... ...... ..... ..... &fmt_rdrjmsbdlsbd %rd %rj %msbd %lsbd +@fmt_rdrjrksa3 .... ........ .. ... ..... ..... ..... &fmt_rdrjrksa3 %rd %rj %rk %sa3 # # Fixed point arithmetic operation instruction @@ -113,3 +126,35 @@ srai_w 0000 00000100 10001 ..... ..... ..... @fmt_rdrjui5 srai_d 0000 00000100 1001 ...... ..... ..... @fmt_rdrjui6 rotri_w 0000 00000100 11001 ..... ..... ..... @fmt_rdrjui5 rotri_d 0000 00000100 1101 ...... ..... ..... @fmt_rdrjui6 + +# +# Fixed point bit operation instruction +# +ext_w_h 0000 00000000 00000 10110 ..... ..... @fmt_rdrj +ext_w_b 0000 00000000 00000 10111 ..... ..... @fmt_rdrj +clo_w 0000 00000000 00000 00100 ..... ..... @fmt_rdrj +clz_w 0000 00000000 00000 00101 ..... ..... @fmt_rdrj +cto_w 0000 00000000 00000 00110 ..... ..... @fmt_rdrj +ctz_w 0000 00000000 00000 00111 ..... ..... @fmt_rdrj +clo_d 0000 00000000 00000 01000 ..... ..... @fmt_rdrj +clz_d 0000 00000000 00000 01001 ..... ..... @fmt_rdrj +cto_d 0000 00000000 00000 01010 ..... ..... @fmt_rdrj +ctz_d 0000 00000000 00000 01011 ..... ..... @fmt_rdrj +revb_2h 0000 00000000 00000 01100 ..... ..... @fmt_rdrj +revb_4h 0000 00000000 00000 01101 ..... ..... @fmt_rdrj +revb_2w 0000 00000000 00000 01110 ..... ..... @fmt_rdrj +revb_d 0000 00000000 00000 01111 ..... ..... @fmt_rdrj +revh_2w 0000 00000000 00000 10000 ..... ..... @fmt_rdrj +revh_d 0000 00000000 00000 10001 ..... ..... @fmt_rdrj +bitrev_4b 0000 00000000 00000 10010 ..... ..... @fmt_rdrj +bitrev_8b 0000 00000000 00000 10011 ..... ..... @fmt_rdrj +bitrev_w 0000 00000000 00000 10100 ..... ..... @fmt_rdrj +bitrev_d 0000 00000000 00000 10101 ..... ..... @fmt_rdrj +bytepick_w 0000 00000000 100 .. ..... ..... ..... @fmt_rdrjrksa2 +bytepick_d 0000 00000000 11 ... ..... ..... ..... @fmt_rdrjrksa3 +maskeqz 0000 00000001 00110 ..... ..... ..... @fmt_rdrjrk +masknez 0000 00000001 00111 ..... ..... ..... @fmt_rdrjrk +bstrins_w 0000 0000011 ..... 0 ..... ..... ..... @fmt_rdrjmsbwlsbw +bstrpick_w 0000 0000011 ..... 1 ..... ..... ..... @fmt_rdrjmsbwlsbw +bstrins_d 0000 000010 ...... ...... ..... ..... @fmt_rdrjmsbdlsbd +bstrpick_d 0000 000011 ...... ...... ..... ..... @fmt_rdrjmsbdlsbd diff --git a/target/loongarch/op_helper.c b/target/loongarch/op_helper.c index b2cbdd7..07c3d52 100644 --- a/target/loongarch/op_helper.c +++ b/target/loongarch/op_helper.c @@ -25,3 +25,122 @@ void helper_raise_exception(CPULoongArchState *env, uint32_t exception) { do_raise_exception(env, exception, GETPC()); } + +target_ulong helper_cto_w(CPULoongArchState *env, target_ulong rj) +{ + uint32_t v = (uint32_t)rj; + int temp = 0; + + while ((v & 0x1) == 1) { + temp++; + v = v >> 1; + } + + return (target_ulong)temp; +} + +target_ulong helper_ctz_w(CPULoongArchState *env, target_ulong rj) +{ + uint32_t v = (uint32_t)rj; + + if (v == 0) { + return 32; + } + + int temp = 0; + while ((v & 0x1) == 0) { + temp++; + v = v >> 1; + } + + return (target_ulong)temp; +} + +target_ulong helper_cto_d(CPULoongArchState *env, target_ulong rj) +{ + uint64_t v = rj; + int temp = 0; + + while ((v & 0x1) == 1) { + temp++; + v = v >> 1; + } + + return (target_ulong)temp; +} + +target_ulong helper_ctz_d(CPULoongArchState *env, target_ulong rj) +{ + uint64_t v = rj; + + if (v == 0) { + return 64; + } + + int temp = 0; + while ((v & 0x1) == 0) { + temp++; + v = v >> 1; + } + + return (target_ulong)temp; +} + +target_ulong helper_bitrev_w(CPULoongArchState *env, target_ulong rj) +{ + int32_t v = (int32_t)rj; + const int SIZE = 32; + uint8_t bytes[SIZE]; + + int i; + for (i = 0; i < SIZE; i++) { + bytes[i] = v & 0x1; + v = v >> 1; + } + /* v == 0 */ + for (i = 0; i < SIZE; i++) { + v = v | ((uint32_t)bytes[i] << (SIZE - 1 - i)); + } + + return (target_ulong)(int32_t)v; +} + +target_ulong helper_bitrev_d(CPULoongArchState *env, target_ulong rj) +{ + uint64_t v = rj; + const int SIZE = 64; + uint8_t bytes[SIZE]; + + int i; + for (i = 0; i < SIZE; i++) { + bytes[i] = v & 0x1; + v = v >> 1; + } + /* v == 0 */ + for (i = 0; i < SIZE; i++) { + v = v | ((uint64_t)bytes[i] << (SIZE - 1 - i)); + } + + return (target_ulong)v; +} + +static inline target_ulong bitswap(target_ulong v) +{ + v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) | + ((v & (target_ulong)0x5555555555555555ULL) << 1); + v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) | + ((v & (target_ulong)0x3333333333333333ULL) << 2); + v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) | + ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4); + return v; +} + +target_ulong helper_loongarch_dbitswap(target_ulong rj) +{ + return bitswap(rj); +} + +target_ulong helper_loongarch_bitswap(target_ulong rt) +{ + return (int32_t)bitswap(rt); +} diff --git a/target/loongarch/trans.inc.c b/target/loongarch/trans.inc.c index 62e9396..8c5ba63 100644 --- a/target/loongarch/trans.inc.c +++ b/target/loongarch/trans.inc.c @@ -1451,3 +1451,668 @@ static bool trans_rotri_d(DisasContext *ctx, arg_rotri_d *a) return true; } + +/* Fixed point bit operation instruction translation */ +static bool trans_ext_w_h(DisasContext *ctx, arg_ext_w_h *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = get_gpr(a->rj); + + tcg_gen_ext16s_tl(Rd, t0); + + return true; +} + +static bool trans_ext_w_b(DisasContext *ctx, arg_ext_w_b *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = get_gpr(a->rj); + + tcg_gen_ext8s_tl(Rd, t0); + + return true; +} + +static bool trans_clo_w(DisasContext *ctx, arg_clo_w *a) +{ + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + gen_load_gpr(Rd, a->rj); + + tcg_gen_not_tl(Rd, Rd); + tcg_gen_ext32u_tl(Rd, Rd); + tcg_gen_clzi_tl(Rd, Rd, TARGET_LONG_BITS); + tcg_gen_subi_tl(Rd, Rd, TARGET_LONG_BITS - 32); + + return true; +} + +static bool trans_clz_w(DisasContext *ctx, arg_clz_w *a) +{ + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + gen_load_gpr(Rd, a->rj); + + tcg_gen_ext32u_tl(Rd, Rd); + tcg_gen_clzi_tl(Rd, Rd, TARGET_LONG_BITS); + tcg_gen_subi_tl(Rd, Rd, TARGET_LONG_BITS - 32); + + return true; +} + +static bool trans_cto_w(DisasContext *ctx, arg_cto_w *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + gen_load_gpr(t0, a->rj); + + gen_helper_cto_w(Rd, cpu_env, t0); + + tcg_temp_free(t0); + + return true; +} + +static bool trans_ctz_w(DisasContext *ctx, arg_ctz_w *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + gen_load_gpr(t0, a->rj); + + gen_helper_ctz_w(Rd, cpu_env, t0); + + tcg_temp_free(t0); + + return true; +} +static bool trans_clo_d(DisasContext *ctx, arg_clo_d *a) +{ + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + check_loongarch_64(ctx); + gen_load_gpr(Rd, a->rj); + tcg_gen_not_tl(Rd, Rd); + tcg_gen_clzi_i64(Rd, Rd, 64); + + return true; +} + +static bool trans_clz_d(DisasContext *ctx, arg_clz_d *a) +{ + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + check_loongarch_64(ctx); + gen_load_gpr(Rd, a->rj); + tcg_gen_clzi_i64(Rd, Rd, 64); + + return true; +} + +static bool trans_cto_d(DisasContext *ctx, arg_cto_d *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + gen_load_gpr(t0, a->rj); + + gen_helper_cto_d(Rd, cpu_env, t0); + + tcg_temp_free(t0); + + return true; +} + +static bool trans_ctz_d(DisasContext *ctx, arg_ctz_d *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + gen_load_gpr(t0, a->rj); + + gen_helper_ctz_d(Rd, cpu_env, t0); + + tcg_temp_free(t0); + + return true; +} + +static bool trans_revb_2h(DisasContext *ctx, arg_revb_2h *a) +{ + TCGv t0, t1, mask; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + mask = tcg_const_tl(0x00FF00FF); + + gen_load_gpr(t0, a->rj); + + tcg_gen_shri_tl(t1, t0, 8); + tcg_gen_and_tl(t1, t1, mask); + tcg_gen_and_tl(t0, t0, mask); + tcg_gen_shli_tl(t0, t0, 8); + tcg_gen_or_tl(t0, t0, t1); + tcg_gen_ext32s_tl(Rd, t0); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mask); + + return true; +} + +static bool trans_revb_4h(DisasContext *ctx, arg_revb_4h *a) +{ + TCGv t0, t1, mask; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + mask = tcg_const_tl(0x00FF00FF00FF00FFULL); + + gen_load_gpr(t0, a->rj); + + check_loongarch_64(ctx); + tcg_gen_shri_tl(t1, t0, 8); + tcg_gen_and_tl(t1, t1, mask); + tcg_gen_and_tl(t0, t0, mask); + tcg_gen_shli_tl(t0, t0, 8); + tcg_gen_or_tl(Rd, t0, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mask); + + return true; +} + +static bool trans_revb_2w(DisasContext *ctx, arg_revb_2w *a) +{ + TCGv_i64 t0, t1, t2; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t2 = get_gpr(a->rj); + + gen_load_gpr(t0, a->rd); + + tcg_gen_ext32u_i64(t1, t2); + tcg_gen_bswap32_i64(t0, t1); + tcg_gen_shri_i64(t1, t2, 32); + tcg_gen_bswap32_i64(t1, t1); + tcg_gen_concat32_i64(Rd, t0, t1); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + + return true; +} + +static bool trans_revb_d(DisasContext *ctx, arg_revb_d *a) +{ + TCGv Rd = cpu_gpr[a->rd]; + TCGv Rj = cpu_gpr[a->rj]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + check_loongarch_64(ctx); + tcg_gen_bswap64_i64(Rd, Rj); + + return true; +} + +static bool trans_revh_2w(DisasContext *ctx, arg_revh_2w *a) +{ + TCGv_i64 t0, t1, t2, mask; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t2 = get_gpr(a->rj); + mask = tcg_const_i64(0x0000ffff0000ffffull); + + gen_load_gpr(t1, a->rd); + + tcg_gen_shri_i64(t0, t2, 16); + tcg_gen_and_i64(t1, t2, mask); + tcg_gen_and_i64(t0, t0, mask); + tcg_gen_shli_i64(t1, t1, 16); + tcg_gen_or_i64(Rd, t1, t0); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(mask); + + return true; +} + +static bool trans_revh_d(DisasContext *ctx, arg_revh_d *a) +{ + TCGv t0, t1, mask; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + mask = tcg_const_tl(0x0000FFFF0000FFFFULL); + + gen_load_gpr(t0, a->rj); + + check_loongarch_64(ctx); + tcg_gen_shri_tl(t1, t0, 16); + tcg_gen_and_tl(t1, t1, mask); + tcg_gen_and_tl(t0, t0, mask); + tcg_gen_shli_tl(t0, t0, 16); + tcg_gen_or_tl(t0, t0, t1); + tcg_gen_shri_tl(t1, t0, 32); + tcg_gen_shli_tl(t0, t0, 32); + tcg_gen_or_tl(Rd, t0, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mask); + + return true; +} + +static bool trans_bitrev_4b(DisasContext *ctx, arg_bitrev_4b *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + gen_load_gpr(t0, a->rj); + + gen_helper_loongarch_bitswap(Rd, t0); + + tcg_temp_free(t0); + + return true; +} + +static bool trans_bitrev_8b(DisasContext *ctx, arg_bitrev_8b *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + gen_load_gpr(t0, a->rj); + + check_loongarch_64(ctx); + gen_helper_loongarch_dbitswap(Rd, t0); + + tcg_temp_free(t0); + + return true; +} + +static bool trans_bitrev_w(DisasContext *ctx, arg_bitrev_w *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + gen_load_gpr(t0, a->rj); + + gen_helper_bitrev_w(Rd, cpu_env, t0); + + tcg_temp_free(t0); + + return true; +} + +static bool trans_bitrev_d(DisasContext *ctx, arg_bitrev_d *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + gen_load_gpr(t0, a->rj); + + check_loongarch_64(ctx); + gen_helper_bitrev_d(Rd, cpu_env, t0); + + tcg_temp_free(t0); + + return true; +} + +static bool trans_bytepick_w(DisasContext *ctx, arg_bytepick_w *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + if (a->sa2 == 0 || ((a->sa2) * 8) == 32) { + if (a->sa2 == 0) { + t0 = get_gpr(a->rk); + } else { + t0 = get_gpr(a->rj); + } + tcg_gen_ext32s_tl(Rd, t0); + } else { + t0 = get_gpr(a->rk); + + TCGv t1 = get_gpr(a->rj); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(t2, t1, t0); + tcg_gen_shri_i64(t2, t2, 32 - ((a->sa2) * 8)); + tcg_gen_ext32s_i64(Rd, t2); + + tcg_temp_free_i64(t2); + } + + return true; +} + +static bool trans_bytepick_d(DisasContext *ctx, arg_bytepick_d *a) +{ + TCGv t0; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = tcg_temp_new(); + + check_loongarch_64(ctx); + if (a->sa3 == 0 || ((a->sa3) * 8) == 64) { + if (a->sa3 == 0) { + gen_load_gpr(t0, a->rk); + } else { + gen_load_gpr(t0, a->rj); + } + tcg_gen_mov_tl(Rd, t0); + } else { + TCGv t1 = tcg_temp_new(); + + gen_load_gpr(t0, a->rk); + gen_load_gpr(t1, a->rj); + + tcg_gen_shli_tl(t0, t0, ((a->sa3) * 8)); + tcg_gen_shri_tl(t1, t1, 64 - ((a->sa3) * 8)); + tcg_gen_or_tl(Rd, t1, t0); + + tcg_temp_free(t1); + } + + tcg_temp_free(t0); + + return true; +} + +static bool trans_maskeqz(DisasContext *ctx, arg_maskeqz *a) +{ + TCGv t0, t1, t2; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = get_gpr(a->rk); + t1 = get_gpr(a->rj); + t2 = tcg_const_tl(0); + + tcg_gen_movcond_tl(TCG_COND_NE, Rd, t0, t2, t1, t2); + + tcg_temp_free(t2); + + return true; +} + +static bool trans_masknez(DisasContext *ctx, arg_masknez *a) +{ + TCGv t0, t1, t2; + TCGv Rd = cpu_gpr[a->rd]; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + t0 = get_gpr(a->rk); + t1 = get_gpr(a->rj); + t2 = tcg_const_tl(0); + + tcg_gen_movcond_tl(TCG_COND_EQ, Rd, t0, t2, t1, t2); + + tcg_temp_free(t2); + + return true; +} + +static bool trans_bstrins_d(DisasContext *ctx, arg_bstrins_d *a) +{ + TCGv t0, t1; + TCGv Rd = cpu_gpr[a->rd]; + int lsb = a->lsbd; + int msb = a->msbd; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + if (lsb > msb) { + return false; + } + + t0 = tcg_temp_new(); + t1 = get_gpr(a->rj); + + gen_load_gpr(t0, a->rd); + + tcg_gen_deposit_tl(t0, t0, t1, lsb, msb - lsb + 1); + tcg_gen_mov_tl(Rd, t0); + + tcg_temp_free(t0); + + return true; +} + +static bool trans_bstrpick_d(DisasContext *ctx, arg_bstrpick_d *a) +{ + TCGv t0, t1; + TCGv Rd = cpu_gpr[a->rd]; + int lsb = a->lsbd; + int msb = a->msbd; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + if (lsb > msb) { + return false; + } + + t0 = tcg_temp_new(); + t1 = get_gpr(a->rj); + + gen_load_gpr(t0, a->rd); + + tcg_gen_extract_tl(t0, t1, lsb, msb - lsb + 1); + tcg_gen_mov_tl(Rd, t0); + + tcg_temp_free(t0); + + return true; +} + +static bool trans_bstrins_w(DisasContext *ctx, arg_bstrins_w *a) +{ + TCGv t0, t1; + TCGv Rd = cpu_gpr[a->rd]; + int lsb = a->lsbw; + int msb = a->msbw; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + if (lsb > msb) { + return false; + } + + t0 = tcg_temp_new(); + t1 = get_gpr(a->rj); + + gen_load_gpr(t0, a->rd); + + tcg_gen_deposit_tl(t0, t0, t1, lsb, msb - lsb + 1); + tcg_gen_ext32s_tl(t0, t0); + tcg_gen_mov_tl(Rd, t0); + + tcg_temp_free(t0); + + return true; +} + +static bool trans_bstrpick_w(DisasContext *ctx, arg_bstrpick_w *a) +{ + TCGv t0, t1; + TCGv Rd = cpu_gpr[a->rd]; + int lsb = a->lsbw; + int msb = a->msbw; + + if (a->rd == 0) { + /* Nop */ + return true; + } + + if ((a->lsbw > a->msbw) || (lsb + msb > 31)) { + return false; + } + + t0 = tcg_temp_new(); + t1 = get_gpr(a->rj); + + if (msb != 31) { + tcg_gen_extract_tl(t0, t1, lsb, msb + 1); + } else { + tcg_gen_ext32s_tl(t0, t1); + } + tcg_gen_mov_tl(Rd, t0); + + tcg_temp_free(t0); + + return true; +}
This patch implement fixed point bit instruction translation. This includes: - EXT.W.{B/H} - CL{O/Z}.{W/D}, CT{O/Z}.{W/D} - BYTEPICK.{W/D} - REVB.{2H/4H/2W/D} - REVH.{2W/D} - BITREV.{4B/8B}, BITREV.{W/D} - BSTRINS.{W/D}, BSTRPICK.{W/D} - MASKEQZ, MASKNEZ Signed-off-by: Song Gao <gaosong@loongson.cn> --- target/loongarch/helper.h | 10 + target/loongarch/insns.decode | 45 +++ target/loongarch/op_helper.c | 119 ++++++++ target/loongarch/trans.inc.c | 665 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 839 insertions(+)