diff mbox series

[V9,3/6] target/mips: Add loongson-ext lswc2 group of instructions (Part 1)

Message ID 1600222344-16808-4-git-send-email-chenhc@lemote.com
State New
Headers show
Series mips: Add Loongson-3 machine support | expand

Commit Message

chen huacai Sept. 16, 2020, 2:12 a.m. UTC
From: Jiaxun Yang <jiaxun.yang@flygoat.com>

LWC2 & SWC2 have been rewritten by Loongson EXT vendor ASE
as "load/store quad word" and "shifted load/store" groups of
instructions.

This patch add implementation of these instructions:
gslq: load 16 bytes to GPR
gssq: store 16 bytes from GPR
gslqc1: load 16 bytes to FPR
gssqc1: store 16 bytes from FPR

Details of Loongson-EXT is here:
https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
 target/mips/translate.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

Comments

Philippe Mathieu-Daudé Sept. 16, 2020, 7:46 a.m. UTC | #1
On 9/16/20 4:12 AM, Huacai Chen wrote:
> From: Jiaxun Yang <jiaxun.yang@flygoat.com>
> 
> LWC2 & SWC2 have been rewritten by Loongson EXT vendor ASE
> as "load/store quad word" and "shifted load/store" groups of
> instructions.
> 
> This patch add implementation of these instructions:
> gslq: load 16 bytes to GPR
> gssq: store 16 bytes from GPR
> gslqc1: load 16 bytes to FPR
> gssqc1: store 16 bytes from FPR
> 
> Details of Loongson-EXT is here:
> https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md
> 
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> ---
>  target/mips/translate.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 81 insertions(+)
> 
> diff --git a/target/mips/translate.c b/target/mips/translate.c
> index 398edf7..08d51e1 100644
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -460,6 +460,17 @@ enum {
>      R6_OPC_SCD         = 0x27 | OPC_SPECIAL3,
>  };
>  
> +/* Loongson EXT load/store quad word opcodes */
> +#define MASK_LOONGSON_GSLSQ(op)           (MASK_OP_MAJOR(op) | (op & 0x8020))
> +enum {
> +    OPC_GSLQ        = 0x0020 | OPC_LWC2,
> +    OPC_GSLQC1      = 0x8020 | OPC_LWC2,
> +    OPC_GSSHFL      = OPC_LWC2,
> +    OPC_GSSQ        = 0x0020 | OPC_SWC2,
> +    OPC_GSSQC1      = 0x8020 | OPC_SWC2,
> +    OPC_GSSHFS      = OPC_SWC2,
> +};
> +
>  /* BSHFL opcodes */
>  #define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
>  
> @@ -5910,6 +5921,74 @@ no_rd:
>      tcg_temp_free_i64(t1);
>  }
>  
> +static void gen_loongson_lswc2(DisasContext *ctx, int rt,
> +                                int rs, int rd)
> +{
> +    TCGv t0, t1, t2;
> +    TCGv_i32 fp0;
> +    int lsq_offset = ((int)((ctx->opcode >> 6) & 0x1ff) << 23) >> 19;
> +    int lsq_rt1 = ctx->opcode & 0x1f;
> +    int shf_offset = (int8_t)(ctx->opcode >> 6);
> +
> +    t0 = tcg_temp_new();
> +
> +    switch (MASK_LOONGSON_GSLSQ(ctx->opcode)) {
> +#if defined(TARGET_MIPS64)

Build failure (is this code tested?):

target/mips/translate.c: In function ‘gen_loongson_lswc2’:
target/mips/translate.c:5961:9: error: unused variable ‘lsq_rt1’
[-Werror=unused-variable]
     int lsq_rt1 = ctx->opcode & 0x1f;
         ^
target/mips/translate.c:5960:9: error: unused variable ‘lsq_offset’
[-Werror=unused-variable]
     int lsq_offset = ((int)((ctx->opcode >> 6) & 0x1ff) << 23) >> 19;
         ^
cc1: all warnings being treated as errors

> +    case OPC_GSLQ:
> +        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        gen_store_gpr(t0, rt);
> +        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        gen_store_gpr(t0, lsq_rt1);
> +        break;
> +    case OPC_GSLQC1:
> +        check_cp1_enabled(ctx);
> +        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        gen_store_fpr64(ctx, t0, rt);
> +        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        gen_store_fpr64(ctx, t0, lsq_rt1);
> +        break;
> +    case OPC_GSSQ:
> +        t1 = tcg_temp_new();
> +        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
> +        gen_load_gpr(t1, rt);
> +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
> +        gen_load_gpr(t1, lsq_rt1);
> +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        tcg_temp_free(t1);
> +        break;
> +    case OPC_GSSQC1:
> +        check_cp1_enabled(ctx);
> +        t1 = tcg_temp_new();
> +        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
> +        gen_load_fpr64(ctx, t1, rt);
> +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
> +        gen_load_fpr64(ctx, t1, lsq_rt1);
> +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        tcg_temp_free(t1);
> +        break;
> +#endif
> +    default:
> +        MIPS_INVAL("loongson_gslsq");
> +        generate_exception_end(ctx, EXCP_RI);
> +        break;
> +    }
> +    tcg_temp_free(t0);
> +}
> +
>  /* Traps */
>  static void gen_trap(DisasContext *ctx, uint32_t opc,
>                       int rs, int rt, int16_t imm)
> @@ -30774,6 +30853,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
>              /* OPC_BC, OPC_BALC */
>              gen_compute_compact_branch(ctx, op, 0, 0,
>                                         sextract32(ctx->opcode << 2, 0, 28));
> +        } else if (ctx->insn_flags & ASE_LEXT) {
> +            gen_loongson_lswc2(ctx, rt, rs, rd);
>          } else {
>              /* OPC_LWC2, OPC_SWC2 */
>              /* COP2: Not implemented. */
>
Huacai Chen Sept. 16, 2020, 7:58 a.m. UTC | #2
Hi, Philippe,

On Wed, Sep 16, 2020 at 3:46 AM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
>
> On 9/16/20 4:12 AM, Huacai Chen wrote:
> > From: Jiaxun Yang <jiaxun.yang@flygoat.com>
> >
> > LWC2 & SWC2 have been rewritten by Loongson EXT vendor ASE
> > as "load/store quad word" and "shifted load/store" groups of
> > instructions.
> >
> > This patch add implementation of these instructions:
> > gslq: load 16 bytes to GPR
> > gssq: store 16 bytes from GPR
> > gslqc1: load 16 bytes to FPR
> > gssqc1: store 16 bytes from FPR
> >
> > Details of Loongson-EXT is here:
> > https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md
> >
> > Signed-off-by: Huacai Chen <chenhc@lemote.com>
> > Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> > ---
> >  target/mips/translate.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 81 insertions(+)
> >
> > diff --git a/target/mips/translate.c b/target/mips/translate.c
> > index 398edf7..08d51e1 100644
> > --- a/target/mips/translate.c
> > +++ b/target/mips/translate.c
> > @@ -460,6 +460,17 @@ enum {
> >      R6_OPC_SCD         = 0x27 | OPC_SPECIAL3,
> >  };
> >
> > +/* Loongson EXT load/store quad word opcodes */
> > +#define MASK_LOONGSON_GSLSQ(op)           (MASK_OP_MAJOR(op) | (op & 0x8020))
> > +enum {
> > +    OPC_GSLQ        = 0x0020 | OPC_LWC2,
> > +    OPC_GSLQC1      = 0x8020 | OPC_LWC2,
> > +    OPC_GSSHFL      = OPC_LWC2,
> > +    OPC_GSSQ        = 0x0020 | OPC_SWC2,
> > +    OPC_GSSQC1      = 0x8020 | OPC_SWC2,
> > +    OPC_GSSHFS      = OPC_SWC2,
> > +};
> > +
> >  /* BSHFL opcodes */
> >  #define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> >
> > @@ -5910,6 +5921,74 @@ no_rd:
> >      tcg_temp_free_i64(t1);
> >  }
> >
> > +static void gen_loongson_lswc2(DisasContext *ctx, int rt,
> > +                                int rs, int rd)
> > +{
> > +    TCGv t0, t1, t2;
> > +    TCGv_i32 fp0;
> > +    int lsq_offset = ((int)((ctx->opcode >> 6) & 0x1ff) << 23) >> 19;
> > +    int lsq_rt1 = ctx->opcode & 0x1f;
> > +    int shf_offset = (int8_t)(ctx->opcode >> 6);
> > +
> > +    t0 = tcg_temp_new();
> > +
> > +    switch (MASK_LOONGSON_GSLSQ(ctx->opcode)) {
> > +#if defined(TARGET_MIPS64)
>
> Build failure (is this code tested?):
>
> target/mips/translate.c: In function ‘gen_loongson_lswc2’:
> target/mips/translate.c:5961:9: error: unused variable ‘lsq_rt1’
> [-Werror=unused-variable]
>      int lsq_rt1 = ctx->opcode & 0x1f;
>          ^
> target/mips/translate.c:5960:9: error: unused variable ‘lsq_offset’
> [-Werror=unused-variable]
>      int lsq_offset = ((int)((ctx->opcode >> 6) & 0x1ff) << 23) >> 19;
>          ^
> cc1: all warnings being treated as errors
Thank you very much, lsq_rt1 and lsq_offset should be guarded by TARGET_MIPS64.

Huacai
>
> > +    case OPC_GSLQ:
> > +        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_store_gpr(t0, rt);
> > +        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_store_gpr(t0, lsq_rt1);
> > +        break;
> > +    case OPC_GSLQC1:
> > +        check_cp1_enabled(ctx);
> > +        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_store_fpr64(ctx, t0, rt);
> > +        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_store_fpr64(ctx, t0, lsq_rt1);
> > +        break;
> > +    case OPC_GSSQ:
> > +        t1 = tcg_temp_new();
> > +        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
> > +        gen_load_gpr(t1, rt);
> > +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
> > +        gen_load_gpr(t1, lsq_rt1);
> > +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        tcg_temp_free(t1);
> > +        break;
> > +    case OPC_GSSQC1:
> > +        check_cp1_enabled(ctx);
> > +        t1 = tcg_temp_new();
> > +        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
> > +        gen_load_fpr64(ctx, t1, rt);
> > +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
> > +        gen_load_fpr64(ctx, t1, lsq_rt1);
> > +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        tcg_temp_free(t1);
> > +        break;
> > +#endif
> > +    default:
> > +        MIPS_INVAL("loongson_gslsq");
> > +        generate_exception_end(ctx, EXCP_RI);
> > +        break;
> > +    }
> > +    tcg_temp_free(t0);
> > +}
> > +
> >  /* Traps */
> >  static void gen_trap(DisasContext *ctx, uint32_t opc,
> >                       int rs, int rt, int16_t imm)
> > @@ -30774,6 +30853,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
> >              /* OPC_BC, OPC_BALC */
> >              gen_compute_compact_branch(ctx, op, 0, 0,
> >                                         sextract32(ctx->opcode << 2, 0, 28));
> > +        } else if (ctx->insn_flags & ASE_LEXT) {
> > +            gen_loongson_lswc2(ctx, rt, rs, rd);
> >          } else {
> >              /* OPC_LWC2, OPC_SWC2 */
> >              /* COP2: Not implemented. */
> >
Richard Henderson Sept. 16, 2020, 3:15 p.m. UTC | #3
On 9/15/20 7:12 PM, Huacai Chen wrote:
> +    case OPC_GSLQ:
> +        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        gen_store_gpr(t0, rt);
> +        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        gen_store_gpr(t0, lsq_rt1);

If rs == rt, this will compute the wrong address for the second load.

Either avoid storing t0 back to rt until both loads are complete, or retain the
address temporary and simply add 8 between the two loads.


r~
Huacai Chen Sept. 19, 2020, 12:44 a.m. UTC | #4
Hi, Richard,

On Wed, Sep 16, 2020 at 11:15 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 9/15/20 7:12 PM, Huacai Chen wrote:
> > +    case OPC_GSLQ:
> > +        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_store_gpr(t0, rt);
> > +        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_store_gpr(t0, lsq_rt1);
>
> If rs == rt, this will compute the wrong address for the second load.
>
> Either avoid storing t0 back to rt until both loads are complete, or retain the
> address temporary and simply add 8 between the two loads.
OK, this will be improved in V10.

>
>
> r~
diff mbox series

Patch

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 398edf7..08d51e1 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -460,6 +460,17 @@  enum {
     R6_OPC_SCD         = 0x27 | OPC_SPECIAL3,
 };
 
+/* Loongson EXT load/store quad word opcodes */
+#define MASK_LOONGSON_GSLSQ(op)           (MASK_OP_MAJOR(op) | (op & 0x8020))
+enum {
+    OPC_GSLQ        = 0x0020 | OPC_LWC2,
+    OPC_GSLQC1      = 0x8020 | OPC_LWC2,
+    OPC_GSSHFL      = OPC_LWC2,
+    OPC_GSSQ        = 0x0020 | OPC_SWC2,
+    OPC_GSSQC1      = 0x8020 | OPC_SWC2,
+    OPC_GSSHFS      = OPC_SWC2,
+};
+
 /* BSHFL opcodes */
 #define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
 
@@ -5910,6 +5921,74 @@  no_rd:
     tcg_temp_free_i64(t1);
 }
 
+static void gen_loongson_lswc2(DisasContext *ctx, int rt,
+                                int rs, int rd)
+{
+    TCGv t0, t1, t2;
+    TCGv_i32 fp0;
+    int lsq_offset = ((int)((ctx->opcode >> 6) & 0x1ff) << 23) >> 19;
+    int lsq_rt1 = ctx->opcode & 0x1f;
+    int shf_offset = (int8_t)(ctx->opcode >> 6);
+
+    t0 = tcg_temp_new();
+
+    switch (MASK_LOONGSON_GSLSQ(ctx->opcode)) {
+#if defined(TARGET_MIPS64)
+    case OPC_GSLQ:
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, lsq_rt1);
+        break;
+    case OPC_GSLQC1:
+        check_cp1_enabled(ctx);
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        gen_store_fpr64(ctx, t0, rt);
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        gen_store_fpr64(ctx, t0, lsq_rt1);
+        break;
+    case OPC_GSSQ:
+        t1 = tcg_temp_new();
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
+        gen_load_gpr(t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
+        gen_load_gpr(t1, lsq_rt1);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+    case OPC_GSSQC1:
+        check_cp1_enabled(ctx);
+        t1 = tcg_temp_new();
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
+        gen_load_fpr64(ctx, t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
+        gen_load_fpr64(ctx, t1, lsq_rt1);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+#endif
+    default:
+        MIPS_INVAL("loongson_gslsq");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+    tcg_temp_free(t0);
+}
+
 /* Traps */
 static void gen_trap(DisasContext *ctx, uint32_t opc,
                      int rs, int rt, int16_t imm)
@@ -30774,6 +30853,8 @@  static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
             /* OPC_BC, OPC_BALC */
             gen_compute_compact_branch(ctx, op, 0, 0,
                                        sextract32(ctx->opcode << 2, 0, 28));
+        } else if (ctx->insn_flags & ASE_LEXT) {
+            gen_loongson_lswc2(ctx, rt, rs, rd);
         } else {
             /* OPC_LWC2, OPC_SWC2 */
             /* COP2: Not implemented. */