diff mbox series

[v2,20/46] target/loongarch: Implement vext2xv

Message ID 20230630075904.45940-21-gaosong@loongson.cn
State New
Headers show
Series Add LoongArch LASX instructions | expand

Commit Message

Song Gao June 30, 2023, 7:58 a.m. UTC
This patch includes:
- VEXT2XV.{H/W/D}.B, VEXT2XV.{HU/WU/DU}.BU;
- VEXT2XV.{W/D}.B, VEXT2XV.{WU/DU}.HU;
- VEXT2XV.D.W, VEXT2XV.DU.WU.

Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 target/loongarch/disas.c                     | 13 +++++++++
 target/loongarch/helper.h                    | 13 +++++++++
 target/loongarch/insn_trans/trans_lasx.c.inc | 13 +++++++++
 target/loongarch/insns.decode                | 13 +++++++++
 target/loongarch/vec_helper.c                | 28 ++++++++++++++++++++
 5 files changed, 80 insertions(+)

Comments

Richard Henderson July 7, 2023, 9:19 p.m. UTC | #1
On 6/30/23 08:58, Song Gao wrote:
> +#define VEXT2XV(NAME, BIT, E1, E2)                        \
> +void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \
> +                  uint32_t vd, uint32_t vj)               \
> +{                                                         \
> +    int i;                                                \
> +    VReg *Vd = &(env->fpr[vd].vreg);                      \
> +    VReg *Vj = &(env->fpr[vj].vreg);                      \
> +    VReg temp;                                            \
> +                                                          \
> +    for (i = 0; i < LASX_LEN / BIT; i++) {                \
> +        temp.E1(i) = Vj->E2(i);                           \
> +    }                                                     \
> +    *Vd = temp;                                           \
> +}

So unlike VEXT(H), this does compress in order?

Anyway, function signature and iteration without LASX_LEN.
Isn't there a 128-bit helper to merge this with?


r~
Song Gao July 8, 2023, 7:24 a.m. UTC | #2
Hi, Richard

在 2023/7/8 上午5:19, Richard Henderson 写道:
> On 6/30/23 08:58, Song Gao wrote:
>> +#define VEXT2XV(NAME, BIT, E1, E2)                        \
>> +void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \
>> +                  uint32_t vd, uint32_t vj)               \
>> +{                                                         \
>> +    int i;                                                \
>> +    VReg *Vd = &(env->fpr[vd].vreg);                      \
>> +    VReg *Vj = &(env->fpr[vj].vreg);                      \
>> +    VReg temp;                                            \
>> +                                                          \
>> +    for (i = 0; i < LASX_LEN / BIT; i++) {                \
>> +        temp.E1(i) = Vj->E2(i);                           \
>> +    }                                                     \
>> +    *Vd = temp;                                           \
>> +}
>
> So unlike VEXT(H), this does compress in order?
Yes.
>
> Anyway, function signature and iteration without LASX_LEN.
> Isn't there a 128-bit helper to merge this with?
>
There is no similar 128 bit instructions.

Thanks.
Song Gao
diff mbox series

Patch

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 6ca545956d..975ea018da 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1997,6 +1997,19 @@  INSN_LASX(xvexth_wu_hu,      vv)
 INSN_LASX(xvexth_du_wu,      vv)
 INSN_LASX(xvexth_qu_du,      vv)
 
+INSN_LASX(vext2xv_h_b,       vv)
+INSN_LASX(vext2xv_w_b,       vv)
+INSN_LASX(vext2xv_d_b,       vv)
+INSN_LASX(vext2xv_w_h,       vv)
+INSN_LASX(vext2xv_d_h,       vv)
+INSN_LASX(vext2xv_d_w,       vv)
+INSN_LASX(vext2xv_hu_bu,     vv)
+INSN_LASX(vext2xv_wu_bu,     vv)
+INSN_LASX(vext2xv_du_bu,     vv)
+INSN_LASX(vext2xv_wu_hu,     vv)
+INSN_LASX(vext2xv_du_hu,     vv)
+INSN_LASX(vext2xv_du_wu,     vv)
+
 INSN_LASX(xvreplgr2vr_b,     vr)
 INSN_LASX(xvreplgr2vr_h,     vr)
 INSN_LASX(xvreplgr2vr_w,     vr)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index b7eece8d43..81d0f06cc0 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -339,6 +339,19 @@  DEF_HELPER_4(vexth_wu_hu, void, env, i32, i32, i32)
 DEF_HELPER_4(vexth_du_wu, void, env, i32, i32, i32)
 DEF_HELPER_4(vexth_qu_du, void, env, i32, i32, i32)
 
+DEF_HELPER_4(vext2xv_h_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_w_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_d_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_w_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_d_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_d_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_hu_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_wu_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_du_bu, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_wu_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_du_hu, void, env, i32, i32, i32)
+DEF_HELPER_4(vext2xv_du_wu, void, env, i32, i32, i32)
+
 DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/insn_trans/trans_lasx.c.inc b/target/loongarch/insn_trans/trans_lasx.c.inc
index f100a4a27c..096f7856c4 100644
--- a/target/loongarch/insn_trans/trans_lasx.c.inc
+++ b/target/loongarch/insn_trans/trans_lasx.c.inc
@@ -379,6 +379,19 @@  TRANS(xvexth_wu_hu, gen_vv, 32, gen_helper_vexth_wu_hu)
 TRANS(xvexth_du_wu, gen_vv, 32, gen_helper_vexth_du_wu)
 TRANS(xvexth_qu_du, gen_vv, 32, gen_helper_vexth_qu_du)
 
+TRANS(vext2xv_h_b, gen_vv, 32, gen_helper_vext2xv_h_b)
+TRANS(vext2xv_w_b, gen_vv, 32, gen_helper_vext2xv_w_b)
+TRANS(vext2xv_d_b, gen_vv, 32, gen_helper_vext2xv_d_b)
+TRANS(vext2xv_w_h, gen_vv, 32, gen_helper_vext2xv_w_h)
+TRANS(vext2xv_d_h, gen_vv, 32, gen_helper_vext2xv_d_h)
+TRANS(vext2xv_d_w, gen_vv, 32, gen_helper_vext2xv_d_w)
+TRANS(vext2xv_hu_bu, gen_vv, 32, gen_helper_vext2xv_hu_bu)
+TRANS(vext2xv_wu_bu, gen_vv, 32, gen_helper_vext2xv_wu_bu)
+TRANS(vext2xv_du_bu, gen_vv, 32, gen_helper_vext2xv_du_bu)
+TRANS(vext2xv_wu_hu, gen_vv, 32, gen_helper_vext2xv_wu_hu)
+TRANS(vext2xv_du_hu, gen_vv, 32, gen_helper_vext2xv_du_hu)
+TRANS(vext2xv_du_wu, gen_vv, 32, gen_helper_vext2xv_du_wu)
+
 TRANS(xvreplgr2vr_b, gvec_dup, 32, MO_8)
 TRANS(xvreplgr2vr_h, gvec_dup, 32, MO_16)
 TRANS(xvreplgr2vr_w, gvec_dup, 32, MO_32)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 7491f295a5..db1a6689f0 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1580,6 +1580,19 @@  xvexth_wu_hu     0111 01101001 11101 11101 ..... .....    @vv
 xvexth_du_wu     0111 01101001 11101 11110 ..... .....    @vv
 xvexth_qu_du     0111 01101001 11101 11111 ..... .....    @vv
 
+vext2xv_h_b      0111 01101001 11110 00100 ..... .....    @vv
+vext2xv_w_b      0111 01101001 11110 00101 ..... .....    @vv
+vext2xv_d_b      0111 01101001 11110 00110 ..... .....    @vv
+vext2xv_w_h      0111 01101001 11110 00111 ..... .....    @vv
+vext2xv_d_h      0111 01101001 11110 01000 ..... .....    @vv
+vext2xv_d_w      0111 01101001 11110 01001 ..... .....    @vv
+vext2xv_hu_bu    0111 01101001 11110 01010 ..... .....    @vv
+vext2xv_wu_bu    0111 01101001 11110 01011 ..... .....    @vv
+vext2xv_du_bu    0111 01101001 11110 01100 ..... .....    @vv
+vext2xv_wu_hu    0111 01101001 11110 01101 ..... .....    @vv
+vext2xv_du_hu    0111 01101001 11110 01110 ..... .....    @vv
+vext2xv_du_wu    0111 01101001 11110 01111 ..... .....    @vv
+
 xvreplgr2vr_b    0111 01101001 11110 00000 ..... .....    @vr
 xvreplgr2vr_h    0111 01101001 11110 00001 ..... .....    @vr
 xvreplgr2vr_w    0111 01101001 11110 00010 ..... .....    @vr
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 76c8cda563..3fa689bd94 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -737,6 +737,34 @@  VEXTH(vexth_hu_bu, 16, UH, UB)
 VEXTH(vexth_wu_hu, 32, UW, UH)
 VEXTH(vexth_du_wu, 64, UD, UW)
 
+#define VEXT2XV(NAME, BIT, E1, E2)                        \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \
+                  uint32_t vd, uint32_t vj)               \
+{                                                         \
+    int i;                                                \
+    VReg *Vd = &(env->fpr[vd].vreg);                      \
+    VReg *Vj = &(env->fpr[vj].vreg);                      \
+    VReg temp;                                            \
+                                                          \
+    for (i = 0; i < LASX_LEN / BIT; i++) {                \
+        temp.E1(i) = Vj->E2(i);                           \
+    }                                                     \
+    *Vd = temp;                                           \
+}
+
+VEXT2XV(vext2xv_h_b, 16, H, B)
+VEXT2XV(vext2xv_w_b, 32, W, B)
+VEXT2XV(vext2xv_d_b, 64, D, B)
+VEXT2XV(vext2xv_w_h, 32, W, H)
+VEXT2XV(vext2xv_d_h, 64, D, H)
+VEXT2XV(vext2xv_d_w, 64, D, W)
+VEXT2XV(vext2xv_hu_bu, 16, UH, UB)
+VEXT2XV(vext2xv_wu_bu, 32, UW, UB)
+VEXT2XV(vext2xv_du_bu, 64, UD, UB)
+VEXT2XV(vext2xv_wu_hu, 32, UW, UH)
+VEXT2XV(vext2xv_du_hu, 64, UD, UH)
+VEXT2XV(vext2xv_du_wu, 64, UD, UW)
+
 #define DO_SIGNCOV(a, b)  (a == 0 ? 0 : a < 0 ? -b : b)
 
 DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)