diff mbox series

[V13,6/9] target/mips: Add loongson-ext lsdc2 group of instructions

Message ID 1602059975-10115-7-git-send-email-chenhc@lemote.com
State New
Headers show
Series mips: Add Loongson-3 machine support | expand

Commit Message

chen huacai Oct. 7, 2020, 8:39 a.m. UTC
From: Jiaxun Yang <jiaxun.yang@flygoat.com>

LDC2/SDC2 opcodes have been rewritten as "load & store with offset"
group of instructions by loongson-ext ASE.

This patch add implementation of these instructions:
gslbx: load 1 bytes to GPR
gslhx: load 2 bytes to GPR
gslwx: load 4 bytes to GPR
gsldx: load 8 bytes to GPR
gslwxc1: load 4 bytes to FPR
gsldxc1: load 8 bytes to FPR
gssbx: store 1 bytes from GPR
gsshx: store 2 bytes from GPR
gsswx: store 4 bytes from GPR
gssdx: store 8 bytes from GPR
gsswxc1: store 4 bytes from FPR
gssdxc1: store 8 bytes from FPR

Details of Loongson-EXT is here:
https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
 target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)

Comments

Philippe Mathieu-Daudé Oct. 10, 2020, 1:07 p.m. UTC | #1
On 10/7/20 10:39 AM, Huacai Chen wrote:
> From: Jiaxun Yang <jiaxun.yang@flygoat.com>
> 
> LDC2/SDC2 opcodes have been rewritten as "load & store with offset"
> group of instructions by loongson-ext ASE.
> 
> This patch add implementation of these instructions:
> gslbx: load 1 bytes to GPR
> gslhx: load 2 bytes to GPR
> gslwx: load 4 bytes to GPR
> gsldx: load 8 bytes to GPR
> gslwxc1: load 4 bytes to FPR
> gsldxc1: load 8 bytes to FPR
> gssbx: store 1 bytes from GPR
> gsshx: store 2 bytes from GPR
> gsswx: store 4 bytes from GPR
> gssdx: store 8 bytes from GPR
> gsswxc1: store 4 bytes from FPR
> gssdxc1: store 8 bytes from FPR
> 
> Details of Loongson-EXT is here:
> https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md
> 
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>

If this patch is from Jiaxun, Huacai's S-o-b should come *after*.

> ---
>   target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 179 insertions(+)
> 
> diff --git a/target/mips/translate.c b/target/mips/translate.c
> index 916b57f..4d42cfc 100644
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -484,6 +484,24 @@ enum {
>       OPC_GSSDRC1     = 0x7 | OPC_GSSHFS,
>   };
>   
> +/* Loongson EXT LDC2/SDC2 opcodes */
> +#define MASK_LOONGSON_LSDC2(op)           (MASK_OP_MAJOR(op) | (op & 0x7))
> +
> +enum {
> +    OPC_GSLBX      = 0x0 | OPC_LDC2,
> +    OPC_GSLHX      = 0x1 | OPC_LDC2,
> +    OPC_GSLWX      = 0x2 | OPC_LDC2,
> +    OPC_GSLDX      = 0x3 | OPC_LDC2,
> +    OPC_GSLWXC1    = 0x6 | OPC_LDC2,
> +    OPC_GSLDXC1    = 0x7 | OPC_LDC2,
> +    OPC_GSSBX      = 0x0 | OPC_SDC2,
> +    OPC_GSSHX      = 0x1 | OPC_SDC2,
> +    OPC_GSSWX      = 0x2 | OPC_SDC2,
> +    OPC_GSSDX      = 0x3 | OPC_SDC2,
> +    OPC_GSSWXC1    = 0x6 | OPC_SDC2,
> +    OPC_GSSDXC1    = 0x7 | OPC_SDC2,
> +};
> +
>   /* BSHFL opcodes */
>   #define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
>   
> @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt,
>       tcg_temp_free(t0);
>   }
>   
> +/* Loongson EXT LDC2/SDC2 */
> +static void gen_loongson_lsdc2(DisasContext *ctx, int rt,
> +                                int rs, int rd)

Alignment off (various occurences in this series).

> +{
> +    int offset = (int8_t)(ctx->opcode >> 3);

Please use sextract32() which is easier to read:

        int32_t offset = sextract32(ctx->opcode, 3, 8);

> +    uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode);
> +    TCGv t0, t1;
> +    TCGv_i32 fp0;
> +
> +    /* Pre-conditions */
> +    switch (opc) {
> +    case OPC_GSLBX:
> +    case OPC_GSLHX:
> +    case OPC_GSLWX:
> +    case OPC_GSLDX:
> +        /* prefetch, implement as NOP */
> +        if (rt == 0) {
> +            return;
> +        }
> +        break;
> +    case OPC_GSSBX:
> +    case OPC_GSSHX:
> +    case OPC_GSSWX:
> +    case OPC_GSSDX:
> +        break;
> +    case OPC_GSLWXC1:
> +#if defined(TARGET_MIPS64)
> +    case OPC_GSLDXC1:
> +#endif
> +        check_cp1_enabled(ctx);
> +        /* prefetch, implement as NOP */
> +        if (rt == 0) {
> +            return;
> +        }
> +        break;
> +    case OPC_GSSWXC1:
> +#if defined(TARGET_MIPS64)
> +    case OPC_GSSDXC1:
> +#endif
> +        check_cp1_enabled(ctx);
> +        break;
> +    default:
> +        MIPS_INVAL("loongson_lsdc2");
> +        generate_exception_end(ctx, EXCP_RI);
> +        return;
> +        break;
> +    }
> +
> +    t0 = tcg_temp_new();
> +
> +    gen_base_offset_addr(ctx, t0, rs, offset);
> +    gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> +
> +    switch (opc) {
> +    case OPC_GSLBX:
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
> +        gen_store_gpr(t0, rt);
> +        break;
> +    case OPC_GSLHX:
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
> +                            ctx->default_tcg_memop_mask);

Do Loongson EXT plan to support unaligned accesses?

> +        gen_store_gpr(t0, rt);
> +        break;
> +    case OPC_GSLWX:
> +        gen_base_offset_addr(ctx, t0, rs, offset);
> +        if (rd) {
> +            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> +        }
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL |
> +                            ctx->default_tcg_memop_mask);
> +        gen_store_gpr(t0, rt);
> +        break;
> +#if defined(TARGET_MIPS64)
> +    case OPC_GSLDX:
> +        gen_base_offset_addr(ctx, t0, rs, offset);
> +        if (rd) {
> +            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> +        }
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        gen_store_gpr(t0, rt);
> +        break;
> +#endif
> +    case OPC_GSLWXC1:
> +        check_cp1_enabled(ctx);
> +        gen_base_offset_addr(ctx, t0, rs, offset);
> +        if (rd) {
> +            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> +        }
> +        fp0 = tcg_temp_new_i32();
> +        tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL |
> +                            ctx->default_tcg_memop_mask);
> +        gen_store_fpr32(ctx, fp0, rt);
> +        tcg_temp_free_i32(fp0);
> +        break;
> +#if defined(TARGET_MIPS64)
> +    case OPC_GSLDXC1:
> +        check_cp1_enabled(ctx);
> +        gen_base_offset_addr(ctx, t0, rs, offset);
> +        if (rd) {
> +            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> +        }
> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        gen_store_fpr64(ctx, t0, rt);
> +        break;
> +#endif
> +    case OPC_GSSBX:
> +        t1 = tcg_temp_new();
> +        gen_load_gpr(t1, rt);
> +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_SB);
> +        tcg_temp_free(t1);
> +        break;
> +    case OPC_GSSHX:
> +        t1 = tcg_temp_new();
> +        gen_load_gpr(t1, rt);
> +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW |
> +                            ctx->default_tcg_memop_mask);
> +        tcg_temp_free(t1);
> +        break;
> +    case OPC_GSSWX:
> +        t1 = tcg_temp_new();
> +        gen_load_gpr(t1, rt);
> +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
> +                            ctx->default_tcg_memop_mask);
> +        tcg_temp_free(t1);
> +        break;
> +#if defined(TARGET_MIPS64)
> +    case OPC_GSSDX:
> +        t1 = tcg_temp_new();
> +        gen_load_gpr(t1, rt);
> +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        tcg_temp_free(t1);
> +        break;
> +#endif
> +    case OPC_GSSWXC1:
> +        fp0 = tcg_temp_new_i32();
> +        gen_load_fpr32(ctx, fp0, rt);
> +        tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL |
> +                            ctx->default_tcg_memop_mask);
> +        tcg_temp_free_i32(fp0);
> +        break;
> +#if defined(TARGET_MIPS64)
> +    case OPC_GSSDXC1:
> +        t1 = tcg_temp_new();
> +        gen_load_fpr64(ctx, t1, rt);
> +        tcg_gen_qemu_st_i64(t1, t0, ctx->mem_idx, MO_TEQ |
> +                            ctx->default_tcg_memop_mask);
> +        tcg_temp_free(t1);
> +        break;
> +#endif
> +    default:
> +        break;
> +    }
> +
> +    tcg_temp_free(t0);
> +}
> +
>   /* Traps */
>   static void gen_trap(DisasContext *ctx, uint32_t opc,
>                        int rs, int rt, int16_t imm)
> @@ -31055,6 +31232,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
>                   /* OPC_JIC, OPC_JIALC */
>                   gen_compute_compact_branch(ctx, op, 0, rt, imm);
>               }
> +        } else if (ctx->insn_flags & ASE_LEXT) {
> +            gen_loongson_lsdc2(ctx, rt, rs, rd);
>           } else {
>               /* OPC_LWC2, OPC_SWC2 */
>               /* COP2: Not implemented. */
>
Huacai Chen Oct. 11, 2020, 3:02 a.m. UTC | #2
Hi, Philippe,

On Sat, Oct 10, 2020 at 9:07 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
>
> On 10/7/20 10:39 AM, Huacai Chen wrote:
> > From: Jiaxun Yang <jiaxun.yang@flygoat.com>
> >
> > LDC2/SDC2 opcodes have been rewritten as "load & store with offset"
> > group of instructions by loongson-ext ASE.
> >
> > This patch add implementation of these instructions:
> > gslbx: load 1 bytes to GPR
> > gslhx: load 2 bytes to GPR
> > gslwx: load 4 bytes to GPR
> > gsldx: load 8 bytes to GPR
> > gslwxc1: load 4 bytes to FPR
> > gsldxc1: load 8 bytes to FPR
> > gssbx: store 1 bytes from GPR
> > gsshx: store 2 bytes from GPR
> > gsswx: store 4 bytes from GPR
> > gssdx: store 8 bytes from GPR
> > gsswxc1: store 4 bytes from FPR
> > gssdxc1: store 8 bytes from FPR
> >
> > Details of Loongson-EXT is here:
> > https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md
> >
> > Signed-off-by: Huacai Chen <chenhc@lemote.com>
> > Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
>
> If this patch is from Jiaxun, Huacai's S-o-b should come *after*.
OK, I will do that.

>
> > ---
> >   target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 179 insertions(+)
> >
> > diff --git a/target/mips/translate.c b/target/mips/translate.c
> > index 916b57f..4d42cfc 100644
> > --- a/target/mips/translate.c
> > +++ b/target/mips/translate.c
> > @@ -484,6 +484,24 @@ enum {
> >       OPC_GSSDRC1     = 0x7 | OPC_GSSHFS,
> >   };
> >
> > +/* Loongson EXT LDC2/SDC2 opcodes */
> > +#define MASK_LOONGSON_LSDC2(op)           (MASK_OP_MAJOR(op) | (op & 0x7))
> > +
> > +enum {
> > +    OPC_GSLBX      = 0x0 | OPC_LDC2,
> > +    OPC_GSLHX      = 0x1 | OPC_LDC2,
> > +    OPC_GSLWX      = 0x2 | OPC_LDC2,
> > +    OPC_GSLDX      = 0x3 | OPC_LDC2,
> > +    OPC_GSLWXC1    = 0x6 | OPC_LDC2,
> > +    OPC_GSLDXC1    = 0x7 | OPC_LDC2,
> > +    OPC_GSSBX      = 0x0 | OPC_SDC2,
> > +    OPC_GSSHX      = 0x1 | OPC_SDC2,
> > +    OPC_GSSWX      = 0x2 | OPC_SDC2,
> > +    OPC_GSSDX      = 0x3 | OPC_SDC2,
> > +    OPC_GSSWXC1    = 0x6 | OPC_SDC2,
> > +    OPC_GSSDXC1    = 0x7 | OPC_SDC2,
> > +};
> > +
> >   /* BSHFL opcodes */
> >   #define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> >
> > @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt,
> >       tcg_temp_free(t0);
> >   }
> >
> > +/* Loongson EXT LDC2/SDC2 */
> > +static void gen_loongson_lsdc2(DisasContext *ctx, int rt,
> > +                                int rs, int rd)
>
> Alignment off (various occurences in this series).
OK, thanks.

>
> > +{
> > +    int offset = (int8_t)(ctx->opcode >> 3);
>
> Please use sextract32() which is easier to read:
>
>         int32_t offset = sextract32(ctx->opcode, 3, 8);
OK, thanks.

>
> > +    uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode);
> > +    TCGv t0, t1;
> > +    TCGv_i32 fp0;
> > +
> > +    /* Pre-conditions */
> > +    switch (opc) {
> > +    case OPC_GSLBX:
> > +    case OPC_GSLHX:
> > +    case OPC_GSLWX:
> > +    case OPC_GSLDX:
> > +        /* prefetch, implement as NOP */
> > +        if (rt == 0) {
> > +            return;
> > +        }
> > +        break;
> > +    case OPC_GSSBX:
> > +    case OPC_GSSHX:
> > +    case OPC_GSSWX:
> > +    case OPC_GSSDX:
> > +        break;
> > +    case OPC_GSLWXC1:
> > +#if defined(TARGET_MIPS64)
> > +    case OPC_GSLDXC1:
> > +#endif
> > +        check_cp1_enabled(ctx);
> > +        /* prefetch, implement as NOP */
> > +        if (rt == 0) {
> > +            return;
> > +        }
> > +        break;
> > +    case OPC_GSSWXC1:
> > +#if defined(TARGET_MIPS64)
> > +    case OPC_GSSDXC1:
> > +#endif
> > +        check_cp1_enabled(ctx);
> > +        break;
> > +    default:
> > +        MIPS_INVAL("loongson_lsdc2");
> > +        generate_exception_end(ctx, EXCP_RI);
> > +        return;
> > +        break;
> > +    }
> > +
> > +    t0 = tcg_temp_new();
> > +
> > +    gen_base_offset_addr(ctx, t0, rs, offset);
> > +    gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> > +
> > +    switch (opc) {
> > +    case OPC_GSLBX:
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
> > +        gen_store_gpr(t0, rt);
> > +        break;
> > +    case OPC_GSLHX:
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
> > +                            ctx->default_tcg_memop_mask);
>
> Do Loongson EXT plan to support unaligned accesses?
Not support in hardware, and Linux kernel emulate the unaligned cases.

>
> > +        gen_store_gpr(t0, rt);
> > +        break;
> > +    case OPC_GSLWX:
> > +        gen_base_offset_addr(ctx, t0, rs, offset);
> > +        if (rd) {
> > +            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> > +        }
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_store_gpr(t0, rt);
> > +        break;
> > +#if defined(TARGET_MIPS64)
> > +    case OPC_GSLDX:
> > +        gen_base_offset_addr(ctx, t0, rs, offset);
> > +        if (rd) {
> > +            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> > +        }
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_store_gpr(t0, rt);
> > +        break;
> > +#endif
> > +    case OPC_GSLWXC1:
> > +        check_cp1_enabled(ctx);
> > +        gen_base_offset_addr(ctx, t0, rs, offset);
> > +        if (rd) {
> > +            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> > +        }
> > +        fp0 = tcg_temp_new_i32();
> > +        tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_store_fpr32(ctx, fp0, rt);
> > +        tcg_temp_free_i32(fp0);
> > +        break;
> > +#if defined(TARGET_MIPS64)
> > +    case OPC_GSLDXC1:
> > +        check_cp1_enabled(ctx);
> > +        gen_base_offset_addr(ctx, t0, rs, offset);
> > +        if (rd) {
> > +            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> > +        }
> > +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        gen_store_fpr64(ctx, t0, rt);
> > +        break;
> > +#endif
> > +    case OPC_GSSBX:
> > +        t1 = tcg_temp_new();
> > +        gen_load_gpr(t1, rt);
> > +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_SB);
> > +        tcg_temp_free(t1);
> > +        break;
> > +    case OPC_GSSHX:
> > +        t1 = tcg_temp_new();
> > +        gen_load_gpr(t1, rt);
> > +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW |
> > +                            ctx->default_tcg_memop_mask);
> > +        tcg_temp_free(t1);
> > +        break;
> > +    case OPC_GSSWX:
> > +        t1 = tcg_temp_new();
> > +        gen_load_gpr(t1, rt);
> > +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
> > +                            ctx->default_tcg_memop_mask);
> > +        tcg_temp_free(t1);
> > +        break;
> > +#if defined(TARGET_MIPS64)
> > +    case OPC_GSSDX:
> > +        t1 = tcg_temp_new();
> > +        gen_load_gpr(t1, rt);
> > +        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        tcg_temp_free(t1);
> > +        break;
> > +#endif
> > +    case OPC_GSSWXC1:
> > +        fp0 = tcg_temp_new_i32();
> > +        gen_load_fpr32(ctx, fp0, rt);
> > +        tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL |
> > +                            ctx->default_tcg_memop_mask);
> > +        tcg_temp_free_i32(fp0);
> > +        break;
> > +#if defined(TARGET_MIPS64)
> > +    case OPC_GSSDXC1:
> > +        t1 = tcg_temp_new();
> > +        gen_load_fpr64(ctx, t1, rt);
> > +        tcg_gen_qemu_st_i64(t1, t0, ctx->mem_idx, MO_TEQ |
> > +                            ctx->default_tcg_memop_mask);
> > +        tcg_temp_free(t1);
> > +        break;
> > +#endif
> > +    default:
> > +        break;
> > +    }
> > +
> > +    tcg_temp_free(t0);
> > +}
> > +
> >   /* Traps */
> >   static void gen_trap(DisasContext *ctx, uint32_t opc,
> >                        int rs, int rt, int16_t imm)
> > @@ -31055,6 +31232,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
> >                   /* OPC_JIC, OPC_JIALC */
> >                   gen_compute_compact_branch(ctx, op, 0, rt, imm);
> >               }
> > +        } else if (ctx->insn_flags & ASE_LEXT) {
> > +            gen_loongson_lsdc2(ctx, rt, rs, rd);
> >           } else {
> >               /* OPC_LWC2, OPC_SWC2 */
> >               /* COP2: Not implemented. */
> >
Huacai
Philippe Mathieu-Daudé Oct. 11, 2020, 11:13 a.m. UTC | #3
On 10/11/20 5:02 AM, Huacai Chen wrote:
> Hi, Philippe,
> 
> On Sat, Oct 10, 2020 at 9:07 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
>>
>> On 10/7/20 10:39 AM, Huacai Chen wrote:
>>> From: Jiaxun Yang <jiaxun.yang@flygoat.com>
>>>
>>> LDC2/SDC2 opcodes have been rewritten as "load & store with offset"
>>> group of instructions by loongson-ext ASE.
>>>
>>> This patch add implementation of these instructions:
>>> gslbx: load 1 bytes to GPR
>>> gslhx: load 2 bytes to GPR
>>> gslwx: load 4 bytes to GPR
>>> gsldx: load 8 bytes to GPR
>>> gslwxc1: load 4 bytes to FPR
>>> gsldxc1: load 8 bytes to FPR
>>> gssbx: store 1 bytes from GPR
>>> gsshx: store 2 bytes from GPR
>>> gsswx: store 4 bytes from GPR
>>> gssdx: store 8 bytes from GPR
>>> gsswxc1: store 4 bytes from FPR
>>> gssdxc1: store 8 bytes from FPR
>>>
>>> Details of Loongson-EXT is here:
>>> https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md
>>>
>>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
>>> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
>>
>> If this patch is from Jiaxun, Huacai's S-o-b should come *after*.
> OK, I will do that.
> 
>>
>>> ---
>>>    target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 179 insertions(+)
>>>
>>> diff --git a/target/mips/translate.c b/target/mips/translate.c
>>> index 916b57f..4d42cfc 100644
>>> --- a/target/mips/translate.c
>>> +++ b/target/mips/translate.c
>>> @@ -484,6 +484,24 @@ enum {
>>>        OPC_GSSDRC1     = 0x7 | OPC_GSSHFS,
>>>    };
>>>
>>> +/* Loongson EXT LDC2/SDC2 opcodes */
>>> +#define MASK_LOONGSON_LSDC2(op)           (MASK_OP_MAJOR(op) | (op & 0x7))
>>> +
>>> +enum {
>>> +    OPC_GSLBX      = 0x0 | OPC_LDC2,
>>> +    OPC_GSLHX      = 0x1 | OPC_LDC2,
>>> +    OPC_GSLWX      = 0x2 | OPC_LDC2,
>>> +    OPC_GSLDX      = 0x3 | OPC_LDC2,
>>> +    OPC_GSLWXC1    = 0x6 | OPC_LDC2,
>>> +    OPC_GSLDXC1    = 0x7 | OPC_LDC2,
>>> +    OPC_GSSBX      = 0x0 | OPC_SDC2,
>>> +    OPC_GSSHX      = 0x1 | OPC_SDC2,
>>> +    OPC_GSSWX      = 0x2 | OPC_SDC2,
>>> +    OPC_GSSDX      = 0x3 | OPC_SDC2,
>>> +    OPC_GSSWXC1    = 0x6 | OPC_SDC2,
>>> +    OPC_GSSDXC1    = 0x7 | OPC_SDC2,
>>> +};
>>> +
>>>    /* BSHFL opcodes */
>>>    #define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
>>>
>>> @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt,
>>>        tcg_temp_free(t0);
>>>    }
>>>
>>> +/* Loongson EXT LDC2/SDC2 */
>>> +static void gen_loongson_lsdc2(DisasContext *ctx, int rt,
>>> +                                int rs, int rd)
>>
>> Alignment off (various occurences in this series).
> OK, thanks.
> 
>>
>>> +{
>>> +    int offset = (int8_t)(ctx->opcode >> 3);
>>
>> Please use sextract32() which is easier to read:
>>
>>          int32_t offset = sextract32(ctx->opcode, 3, 8);
> OK, thanks.
> 
>>
>>> +    uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode);
>>> +    TCGv t0, t1;
>>> +    TCGv_i32 fp0;
>>> +
>>> +    /* Pre-conditions */
>>> +    switch (opc) {
>>> +    case OPC_GSLBX:
>>> +    case OPC_GSLHX:
>>> +    case OPC_GSLWX:
>>> +    case OPC_GSLDX:
>>> +        /* prefetch, implement as NOP */
>>> +        if (rt == 0) {
>>> +            return;
>>> +        }
>>> +        break;
>>> +    case OPC_GSSBX:
>>> +    case OPC_GSSHX:
>>> +    case OPC_GSSWX:
>>> +    case OPC_GSSDX:
>>> +        break;
>>> +    case OPC_GSLWXC1:
>>> +#if defined(TARGET_MIPS64)
>>> +    case OPC_GSLDXC1:
>>> +#endif
>>> +        check_cp1_enabled(ctx);
>>> +        /* prefetch, implement as NOP */
>>> +        if (rt == 0) {
>>> +            return;
>>> +        }
>>> +        break;
>>> +    case OPC_GSSWXC1:
>>> +#if defined(TARGET_MIPS64)
>>> +    case OPC_GSSDXC1:
>>> +#endif
>>> +        check_cp1_enabled(ctx);
>>> +        break;
>>> +    default:
>>> +        MIPS_INVAL("loongson_lsdc2");
>>> +        generate_exception_end(ctx, EXCP_RI);
>>> +        return;
>>> +        break;
>>> +    }
>>> +
>>> +    t0 = tcg_temp_new();
>>> +
>>> +    gen_base_offset_addr(ctx, t0, rs, offset);
>>> +    gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
>>> +
>>> +    switch (opc) {
>>> +    case OPC_GSLBX:
>>> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
>>> +        gen_store_gpr(t0, rt);
>>> +        break;
>>> +    case OPC_GSLHX:
>>> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
>>> +                            ctx->default_tcg_memop_mask);
>>
>> Do Loongson EXT plan to support unaligned accesses?
> Not support in hardware, and Linux kernel emulate the unaligned cases.

OK, that was my understanding. So we don't need to use
default_tcg_memop_mask, we can directly use MO_ALIGN in
place instead.

Regards,

Phil.
Huacai Chen Oct. 12, 2020, 10:33 a.m. UTC | #4
Hi, Philippe,

On Sun, Oct 11, 2020 at 7:13 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
>
> On 10/11/20 5:02 AM, Huacai Chen wrote:
> > Hi, Philippe,
> >
> > On Sat, Oct 10, 2020 at 9:07 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
> >>
> >> On 10/7/20 10:39 AM, Huacai Chen wrote:
> >>> From: Jiaxun Yang <jiaxun.yang@flygoat.com>
> >>>
> >>> LDC2/SDC2 opcodes have been rewritten as "load & store with offset"
> >>> group of instructions by loongson-ext ASE.
> >>>
> >>> This patch add implementation of these instructions:
> >>> gslbx: load 1 bytes to GPR
> >>> gslhx: load 2 bytes to GPR
> >>> gslwx: load 4 bytes to GPR
> >>> gsldx: load 8 bytes to GPR
> >>> gslwxc1: load 4 bytes to FPR
> >>> gsldxc1: load 8 bytes to FPR
> >>> gssbx: store 1 bytes from GPR
> >>> gsshx: store 2 bytes from GPR
> >>> gsswx: store 4 bytes from GPR
> >>> gssdx: store 8 bytes from GPR
> >>> gsswxc1: store 4 bytes from FPR
> >>> gssdxc1: store 8 bytes from FPR
> >>>
> >>> Details of Loongson-EXT is here:
> >>> https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md
> >>>
> >>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> >>> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> >>
> >> If this patch is from Jiaxun, Huacai's S-o-b should come *after*.
> > OK, I will do that.
> >
> >>
> >>> ---
> >>>    target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>>    1 file changed, 179 insertions(+)
> >>>
> >>> diff --git a/target/mips/translate.c b/target/mips/translate.c
> >>> index 916b57f..4d42cfc 100644
> >>> --- a/target/mips/translate.c
> >>> +++ b/target/mips/translate.c
> >>> @@ -484,6 +484,24 @@ enum {
> >>>        OPC_GSSDRC1     = 0x7 | OPC_GSSHFS,
> >>>    };
> >>>
> >>> +/* Loongson EXT LDC2/SDC2 opcodes */
> >>> +#define MASK_LOONGSON_LSDC2(op)           (MASK_OP_MAJOR(op) | (op & 0x7))
> >>> +
> >>> +enum {
> >>> +    OPC_GSLBX      = 0x0 | OPC_LDC2,
> >>> +    OPC_GSLHX      = 0x1 | OPC_LDC2,
> >>> +    OPC_GSLWX      = 0x2 | OPC_LDC2,
> >>> +    OPC_GSLDX      = 0x3 | OPC_LDC2,
> >>> +    OPC_GSLWXC1    = 0x6 | OPC_LDC2,
> >>> +    OPC_GSLDXC1    = 0x7 | OPC_LDC2,
> >>> +    OPC_GSSBX      = 0x0 | OPC_SDC2,
> >>> +    OPC_GSSHX      = 0x1 | OPC_SDC2,
> >>> +    OPC_GSSWX      = 0x2 | OPC_SDC2,
> >>> +    OPC_GSSDX      = 0x3 | OPC_SDC2,
> >>> +    OPC_GSSWXC1    = 0x6 | OPC_SDC2,
> >>> +    OPC_GSSDXC1    = 0x7 | OPC_SDC2,
> >>> +};
> >>> +
> >>>    /* BSHFL opcodes */
> >>>    #define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> >>>
> >>> @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt,
> >>>        tcg_temp_free(t0);
> >>>    }
> >>>
> >>> +/* Loongson EXT LDC2/SDC2 */
> >>> +static void gen_loongson_lsdc2(DisasContext *ctx, int rt,
> >>> +                                int rs, int rd)
> >>
> >> Alignment off (various occurences in this series).
> > OK, thanks.
> >
> >>
> >>> +{
> >>> +    int offset = (int8_t)(ctx->opcode >> 3);
> >>
> >> Please use sextract32() which is easier to read:
> >>
> >>          int32_t offset = sextract32(ctx->opcode, 3, 8);
> > OK, thanks.
> >
> >>
> >>> +    uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode);
> >>> +    TCGv t0, t1;
> >>> +    TCGv_i32 fp0;
> >>> +
> >>> +    /* Pre-conditions */
> >>> +    switch (opc) {
> >>> +    case OPC_GSLBX:
> >>> +    case OPC_GSLHX:
> >>> +    case OPC_GSLWX:
> >>> +    case OPC_GSLDX:
> >>> +        /* prefetch, implement as NOP */
> >>> +        if (rt == 0) {
> >>> +            return;
> >>> +        }
> >>> +        break;
> >>> +    case OPC_GSSBX:
> >>> +    case OPC_GSSHX:
> >>> +    case OPC_GSSWX:
> >>> +    case OPC_GSSDX:
> >>> +        break;
> >>> +    case OPC_GSLWXC1:
> >>> +#if defined(TARGET_MIPS64)
> >>> +    case OPC_GSLDXC1:
> >>> +#endif
> >>> +        check_cp1_enabled(ctx);
> >>> +        /* prefetch, implement as NOP */
> >>> +        if (rt == 0) {
> >>> +            return;
> >>> +        }
> >>> +        break;
> >>> +    case OPC_GSSWXC1:
> >>> +#if defined(TARGET_MIPS64)
> >>> +    case OPC_GSSDXC1:
> >>> +#endif
> >>> +        check_cp1_enabled(ctx);
> >>> +        break;
> >>> +    default:
> >>> +        MIPS_INVAL("loongson_lsdc2");
> >>> +        generate_exception_end(ctx, EXCP_RI);
> >>> +        return;
> >>> +        break;
> >>> +    }
> >>> +
> >>> +    t0 = tcg_temp_new();
> >>> +
> >>> +    gen_base_offset_addr(ctx, t0, rs, offset);
> >>> +    gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
> >>> +
> >>> +    switch (opc) {
> >>> +    case OPC_GSLBX:
> >>> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
> >>> +        gen_store_gpr(t0, rt);
> >>> +        break;
> >>> +    case OPC_GSLHX:
> >>> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
> >>> +                            ctx->default_tcg_memop_mask);
> >>
> >> Do Loongson EXT plan to support unaligned accesses?
> > Not support in hardware, and Linux kernel emulate the unaligned cases.
>
> OK, that was my understanding. So we don't need to use
> default_tcg_memop_mask, we can directly use MO_ALIGN in
> place instead.
I read the code again, and found that MIPSR6 uses MO_UNALN while
others use MO_ALIGN. And I also realized that Loongson-3A4000 supports
unaligned access in hardware (the same as R6). So, I think I should
keep default_tcg_memop_mask here. And if possible, I want to set
MO_UNALN for all Loongson-3 processors.

Huacai
>
> Regards,
>
> Phil.
Philippe Mathieu-Daudé Oct. 12, 2020, 11:04 a.m. UTC | #5
On 10/12/20 12:33 PM, Huacai Chen wrote:
> Hi, Philippe,
> 
> On Sun, Oct 11, 2020 at 7:13 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
>>
>> On 10/11/20 5:02 AM, Huacai Chen wrote:
>>> Hi, Philippe,
>>>
>>> On Sat, Oct 10, 2020 at 9:07 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
>>>>
>>>> On 10/7/20 10:39 AM, Huacai Chen wrote:
>>>>> From: Jiaxun Yang <jiaxun.yang@flygoat.com>
>>>>>
>>>>> LDC2/SDC2 opcodes have been rewritten as "load & store with offset"
>>>>> group of instructions by loongson-ext ASE.
>>>>>
>>>>> This patch add implementation of these instructions:
>>>>> gslbx: load 1 bytes to GPR
>>>>> gslhx: load 2 bytes to GPR
>>>>> gslwx: load 4 bytes to GPR
>>>>> gsldx: load 8 bytes to GPR
>>>>> gslwxc1: load 4 bytes to FPR
>>>>> gsldxc1: load 8 bytes to FPR
>>>>> gssbx: store 1 bytes from GPR
>>>>> gsshx: store 2 bytes from GPR
>>>>> gsswx: store 4 bytes from GPR
>>>>> gssdx: store 8 bytes from GPR
>>>>> gsswxc1: store 4 bytes from FPR
>>>>> gssdxc1: store 8 bytes from FPR
>>>>>
>>>>> Details of Loongson-EXT is here:
>>>>> https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md
>>>>>
>>>>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
>>>>> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
>>>>
>>>> If this patch is from Jiaxun, Huacai's S-o-b should come *after*.
>>> OK, I will do that.
>>>
>>>>
>>>>> ---
>>>>>     target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>     1 file changed, 179 insertions(+)
>>>>>
>>>>> diff --git a/target/mips/translate.c b/target/mips/translate.c
>>>>> index 916b57f..4d42cfc 100644
>>>>> --- a/target/mips/translate.c
>>>>> +++ b/target/mips/translate.c
>>>>> @@ -484,6 +484,24 @@ enum {
>>>>>         OPC_GSSDRC1     = 0x7 | OPC_GSSHFS,
>>>>>     };
>>>>>
>>>>> +/* Loongson EXT LDC2/SDC2 opcodes */
>>>>> +#define MASK_LOONGSON_LSDC2(op)           (MASK_OP_MAJOR(op) | (op & 0x7))
>>>>> +
>>>>> +enum {
>>>>> +    OPC_GSLBX      = 0x0 | OPC_LDC2,
>>>>> +    OPC_GSLHX      = 0x1 | OPC_LDC2,
>>>>> +    OPC_GSLWX      = 0x2 | OPC_LDC2,
>>>>> +    OPC_GSLDX      = 0x3 | OPC_LDC2,
>>>>> +    OPC_GSLWXC1    = 0x6 | OPC_LDC2,
>>>>> +    OPC_GSLDXC1    = 0x7 | OPC_LDC2,
>>>>> +    OPC_GSSBX      = 0x0 | OPC_SDC2,
>>>>> +    OPC_GSSHX      = 0x1 | OPC_SDC2,
>>>>> +    OPC_GSSWX      = 0x2 | OPC_SDC2,
>>>>> +    OPC_GSSDX      = 0x3 | OPC_SDC2,
>>>>> +    OPC_GSSWXC1    = 0x6 | OPC_SDC2,
>>>>> +    OPC_GSSDXC1    = 0x7 | OPC_SDC2,
>>>>> +};
>>>>> +
>>>>>     /* BSHFL opcodes */
>>>>>     #define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
>>>>>
>>>>> @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt,
>>>>>         tcg_temp_free(t0);
>>>>>     }
>>>>>
>>>>> +/* Loongson EXT LDC2/SDC2 */
>>>>> +static void gen_loongson_lsdc2(DisasContext *ctx, int rt,
>>>>> +                                int rs, int rd)
>>>>
>>>> Alignment off (various occurences in this series).
>>> OK, thanks.
>>>
>>>>
>>>>> +{
>>>>> +    int offset = (int8_t)(ctx->opcode >> 3);
>>>>
>>>> Please use sextract32() which is easier to read:
>>>>
>>>>           int32_t offset = sextract32(ctx->opcode, 3, 8);
>>> OK, thanks.
>>>
>>>>
>>>>> +    uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode);
>>>>> +    TCGv t0, t1;
>>>>> +    TCGv_i32 fp0;
>>>>> +
>>>>> +    /* Pre-conditions */
>>>>> +    switch (opc) {
>>>>> +    case OPC_GSLBX:
>>>>> +    case OPC_GSLHX:
>>>>> +    case OPC_GSLWX:
>>>>> +    case OPC_GSLDX:
>>>>> +        /* prefetch, implement as NOP */
>>>>> +        if (rt == 0) {
>>>>> +            return;
>>>>> +        }
>>>>> +        break;
>>>>> +    case OPC_GSSBX:
>>>>> +    case OPC_GSSHX:
>>>>> +    case OPC_GSSWX:
>>>>> +    case OPC_GSSDX:
>>>>> +        break;
>>>>> +    case OPC_GSLWXC1:
>>>>> +#if defined(TARGET_MIPS64)
>>>>> +    case OPC_GSLDXC1:
>>>>> +#endif
>>>>> +        check_cp1_enabled(ctx);
>>>>> +        /* prefetch, implement as NOP */
>>>>> +        if (rt == 0) {
>>>>> +            return;
>>>>> +        }
>>>>> +        break;
>>>>> +    case OPC_GSSWXC1:
>>>>> +#if defined(TARGET_MIPS64)
>>>>> +    case OPC_GSSDXC1:
>>>>> +#endif
>>>>> +        check_cp1_enabled(ctx);
>>>>> +        break;
>>>>> +    default:
>>>>> +        MIPS_INVAL("loongson_lsdc2");
>>>>> +        generate_exception_end(ctx, EXCP_RI);
>>>>> +        return;
>>>>> +        break;
>>>>> +    }
>>>>> +
>>>>> +    t0 = tcg_temp_new();
>>>>> +
>>>>> +    gen_base_offset_addr(ctx, t0, rs, offset);
>>>>> +    gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
>>>>> +
>>>>> +    switch (opc) {
>>>>> +    case OPC_GSLBX:
>>>>> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
>>>>> +        gen_store_gpr(t0, rt);
>>>>> +        break;
>>>>> +    case OPC_GSLHX:
>>>>> +        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
>>>>> +                            ctx->default_tcg_memop_mask);
>>>>
>>>> Do Loongson EXT plan to support unaligned accesses?
>>> Not support in hardware, and Linux kernel emulate the unaligned cases.
>>
>> OK, that was my understanding. So we don't need to use
>> default_tcg_memop_mask, we can directly use MO_ALIGN in
>> place instead.
> I read the code again, and found that MIPSR6 uses MO_UNALN while
> others use MO_ALIGN. And I also realized that Loongson-3A4000 supports
> unaligned access in hardware (the same as R6). So, I think I should
> keep default_tcg_memop_mask here. And if possible, I want to set
> MO_UNALN for all Loongson-3 processors.

OK.

> 
> Huacai
>>
>> Regards,
>>
>> Phil.
>
diff mbox series

Patch

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 916b57f..4d42cfc 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -484,6 +484,24 @@  enum {
     OPC_GSSDRC1     = 0x7 | OPC_GSSHFS,
 };
 
+/* Loongson EXT LDC2/SDC2 opcodes */
+#define MASK_LOONGSON_LSDC2(op)           (MASK_OP_MAJOR(op) | (op & 0x7))
+
+enum {
+    OPC_GSLBX      = 0x0 | OPC_LDC2,
+    OPC_GSLHX      = 0x1 | OPC_LDC2,
+    OPC_GSLWX      = 0x2 | OPC_LDC2,
+    OPC_GSLDX      = 0x3 | OPC_LDC2,
+    OPC_GSLWXC1    = 0x6 | OPC_LDC2,
+    OPC_GSLDXC1    = 0x7 | OPC_LDC2,
+    OPC_GSSBX      = 0x0 | OPC_SDC2,
+    OPC_GSSHX      = 0x1 | OPC_SDC2,
+    OPC_GSSWX      = 0x2 | OPC_SDC2,
+    OPC_GSSDX      = 0x3 | OPC_SDC2,
+    OPC_GSSWXC1    = 0x6 | OPC_SDC2,
+    OPC_GSSDXC1    = 0x7 | OPC_SDC2,
+};
+
 /* BSHFL opcodes */
 #define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
 
@@ -6172,6 +6190,165 @@  static void gen_loongson_lswc2(DisasContext *ctx, int rt,
     tcg_temp_free(t0);
 }
 
+/* Loongson EXT LDC2/SDC2 */
+static void gen_loongson_lsdc2(DisasContext *ctx, int rt,
+                                int rs, int rd)
+{
+    int offset = (int8_t)(ctx->opcode >> 3);
+    uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode);
+    TCGv t0, t1;
+    TCGv_i32 fp0;
+
+    /* Pre-conditions */
+    switch (opc) {
+    case OPC_GSLBX:
+    case OPC_GSLHX:
+    case OPC_GSLWX:
+    case OPC_GSLDX:
+        /* prefetch, implement as NOP */
+        if (rt == 0) {
+            return;
+        }
+        break;
+    case OPC_GSSBX:
+    case OPC_GSSHX:
+    case OPC_GSSWX:
+    case OPC_GSSDX:
+        break;
+    case OPC_GSLWXC1:
+#if defined(TARGET_MIPS64)
+    case OPC_GSLDXC1:
+#endif
+        check_cp1_enabled(ctx);
+        /* prefetch, implement as NOP */
+        if (rt == 0) {
+            return;
+        }
+        break;
+    case OPC_GSSWXC1:
+#if defined(TARGET_MIPS64)
+    case OPC_GSSDXC1:
+#endif
+        check_cp1_enabled(ctx);
+        break;
+    default:
+        MIPS_INVAL("loongson_lsdc2");
+        generate_exception_end(ctx, EXCP_RI);
+        return;
+        break;
+    }
+
+    t0 = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, t0, rs, offset);
+    gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
+
+    switch (opc) {
+    case OPC_GSLBX:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_GSLHX:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
+                            ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_GSLWX:
+        gen_base_offset_addr(ctx, t0, rs, offset);
+        if (rd) {
+            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
+        }
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL |
+                            ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_GSLDX:
+        gen_base_offset_addr(ctx, t0, rs, offset);
+        if (rd) {
+            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
+        }
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+#endif
+    case OPC_GSLWXC1:
+        check_cp1_enabled(ctx);
+        gen_base_offset_addr(ctx, t0, rs, offset);
+        if (rd) {
+            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
+        }
+        fp0 = tcg_temp_new_i32();
+        tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL |
+                            ctx->default_tcg_memop_mask);
+        gen_store_fpr32(ctx, fp0, rt);
+        tcg_temp_free_i32(fp0);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_GSLDXC1:
+        check_cp1_enabled(ctx);
+        gen_base_offset_addr(ctx, t0, rs, offset);
+        if (rd) {
+            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
+        }
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        gen_store_fpr64(ctx, t0, rt);
+        break;
+#endif
+    case OPC_GSSBX:
+        t1 = tcg_temp_new();
+        gen_load_gpr(t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_SB);
+        tcg_temp_free(t1);
+        break;
+    case OPC_GSSHX:
+        t1 = tcg_temp_new();
+        gen_load_gpr(t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW |
+                            ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+    case OPC_GSSWX:
+        t1 = tcg_temp_new();
+        gen_load_gpr(t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
+                            ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_GSSDX:
+        t1 = tcg_temp_new();
+        gen_load_gpr(t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+#endif
+    case OPC_GSSWXC1:
+        fp0 = tcg_temp_new_i32();
+        gen_load_fpr32(ctx, fp0, rt);
+        tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL |
+                            ctx->default_tcg_memop_mask);
+        tcg_temp_free_i32(fp0);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_GSSDXC1:
+        t1 = tcg_temp_new();
+        gen_load_fpr64(ctx, t1, rt);
+        tcg_gen_qemu_st_i64(t1, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+#endif
+    default:
+        break;
+    }
+
+    tcg_temp_free(t0);
+}
+
 /* Traps */
 static void gen_trap(DisasContext *ctx, uint32_t opc,
                      int rs, int rt, int16_t imm)
@@ -31055,6 +31232,8 @@  static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
                 /* OPC_JIC, OPC_JIALC */
                 gen_compute_compact_branch(ctx, op, 0, rt, imm);
             }
+        } else if (ctx->insn_flags & ASE_LEXT) {
+            gen_loongson_lsdc2(ctx, rt, rs, rd);
         } else {
             /* OPC_LWC2, OPC_SWC2 */
             /* COP2: Not implemented. */