Message ID | 1600222344-16808-4-git-send-email-chenhc@lemote.com |
---|---|
State | New |
Headers | show |
Series | mips: Add Loongson-3 machine support | expand |
On 9/16/20 4:12 AM, Huacai Chen wrote: > From: Jiaxun Yang <jiaxun.yang@flygoat.com> > > LWC2 & SWC2 have been rewritten by Loongson EXT vendor ASE > as "load/store quad word" and "shifted load/store" groups of > instructions. > > This patch add implementation of these instructions: > gslq: load 16 bytes to GPR > gssq: store 16 bytes from GPR > gslqc1: load 16 bytes to FPR > gssqc1: store 16 bytes from FPR > > Details of Loongson-EXT is here: > https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md > > Signed-off-by: Huacai Chen <chenhc@lemote.com> > Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> > --- > target/mips/translate.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 81 insertions(+) > > diff --git a/target/mips/translate.c b/target/mips/translate.c > index 398edf7..08d51e1 100644 > --- a/target/mips/translate.c > +++ b/target/mips/translate.c > @@ -460,6 +460,17 @@ enum { > R6_OPC_SCD = 0x27 | OPC_SPECIAL3, > }; > > +/* Loongson EXT load/store quad word opcodes */ > +#define MASK_LOONGSON_GSLSQ(op) (MASK_OP_MAJOR(op) | (op & 0x8020)) > +enum { > + OPC_GSLQ = 0x0020 | OPC_LWC2, > + OPC_GSLQC1 = 0x8020 | OPC_LWC2, > + OPC_GSSHFL = OPC_LWC2, > + OPC_GSSQ = 0x0020 | OPC_SWC2, > + OPC_GSSQC1 = 0x8020 | OPC_SWC2, > + OPC_GSSHFS = OPC_SWC2, > +}; > + > /* BSHFL opcodes */ > #define MASK_BSHFL(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6))) > > @@ -5910,6 +5921,74 @@ no_rd: > tcg_temp_free_i64(t1); > } > > +static void gen_loongson_lswc2(DisasContext *ctx, int rt, > + int rs, int rd) > +{ > + TCGv t0, t1, t2; > + TCGv_i32 fp0; > + int lsq_offset = ((int)((ctx->opcode >> 6) & 0x1ff) << 23) >> 19; > + int lsq_rt1 = ctx->opcode & 0x1f; > + int shf_offset = (int8_t)(ctx->opcode >> 6); > + > + t0 = tcg_temp_new(); > + > + switch (MASK_LOONGSON_GSLSQ(ctx->opcode)) { > +#if defined(TARGET_MIPS64) Build failure (is this code tested?): target/mips/translate.c: In function ‘gen_loongson_lswc2’: target/mips/translate.c:5961:9: error: unused variable ‘lsq_rt1’ [-Werror=unused-variable] int lsq_rt1 = ctx->opcode & 0x1f; ^ target/mips/translate.c:5960:9: error: unused variable ‘lsq_offset’ [-Werror=unused-variable] int lsq_offset = ((int)((ctx->opcode >> 6) & 0x1ff) << 23) >> 19; ^ cc1: all warnings being treated as errors > + case OPC_GSLQ: > + gen_base_offset_addr(ctx, t0, rs, lsq_offset); > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + gen_store_gpr(t0, rt); > + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + gen_store_gpr(t0, lsq_rt1); > + break; > + case OPC_GSLQC1: > + check_cp1_enabled(ctx); > + gen_base_offset_addr(ctx, t0, rs, lsq_offset); > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + gen_store_fpr64(ctx, t0, rt); > + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + gen_store_fpr64(ctx, t0, lsq_rt1); > + break; > + case OPC_GSSQ: > + t1 = tcg_temp_new(); > + gen_base_offset_addr(ctx, t0, rs, lsq_offset); > + gen_load_gpr(t1, rt); > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); > + gen_load_gpr(t1, lsq_rt1); > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + tcg_temp_free(t1); > + break; > + case OPC_GSSQC1: > + check_cp1_enabled(ctx); > + t1 = tcg_temp_new(); > + gen_base_offset_addr(ctx, t0, rs, lsq_offset); > + gen_load_fpr64(ctx, t1, rt); > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); > + gen_load_fpr64(ctx, t1, lsq_rt1); > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + tcg_temp_free(t1); > + break; > +#endif > + default: > + MIPS_INVAL("loongson_gslsq"); > + generate_exception_end(ctx, EXCP_RI); > + break; > + } > + tcg_temp_free(t0); > +} > + > /* Traps */ > static void gen_trap(DisasContext *ctx, uint32_t opc, > int rs, int rt, int16_t imm) > @@ -30774,6 +30853,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) > /* OPC_BC, OPC_BALC */ > gen_compute_compact_branch(ctx, op, 0, 0, > sextract32(ctx->opcode << 2, 0, 28)); > + } else if (ctx->insn_flags & ASE_LEXT) { > + gen_loongson_lswc2(ctx, rt, rs, rd); > } else { > /* OPC_LWC2, OPC_SWC2 */ > /* COP2: Not implemented. */ >
Hi, Philippe, On Wed, Sep 16, 2020 at 3:46 AM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > > On 9/16/20 4:12 AM, Huacai Chen wrote: > > From: Jiaxun Yang <jiaxun.yang@flygoat.com> > > > > LWC2 & SWC2 have been rewritten by Loongson EXT vendor ASE > > as "load/store quad word" and "shifted load/store" groups of > > instructions. > > > > This patch add implementation of these instructions: > > gslq: load 16 bytes to GPR > > gssq: store 16 bytes from GPR > > gslqc1: load 16 bytes to FPR > > gssqc1: store 16 bytes from FPR > > > > Details of Loongson-EXT is here: > > https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md > > > > Signed-off-by: Huacai Chen <chenhc@lemote.com> > > Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> > > --- > > target/mips/translate.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 81 insertions(+) > > > > diff --git a/target/mips/translate.c b/target/mips/translate.c > > index 398edf7..08d51e1 100644 > > --- a/target/mips/translate.c > > +++ b/target/mips/translate.c > > @@ -460,6 +460,17 @@ enum { > > R6_OPC_SCD = 0x27 | OPC_SPECIAL3, > > }; > > > > +/* Loongson EXT load/store quad word opcodes */ > > +#define MASK_LOONGSON_GSLSQ(op) (MASK_OP_MAJOR(op) | (op & 0x8020)) > > +enum { > > + OPC_GSLQ = 0x0020 | OPC_LWC2, > > + OPC_GSLQC1 = 0x8020 | OPC_LWC2, > > + OPC_GSSHFL = OPC_LWC2, > > + OPC_GSSQ = 0x0020 | OPC_SWC2, > > + OPC_GSSQC1 = 0x8020 | OPC_SWC2, > > + OPC_GSSHFS = OPC_SWC2, > > +}; > > + > > /* BSHFL opcodes */ > > #define MASK_BSHFL(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6))) > > > > @@ -5910,6 +5921,74 @@ no_rd: > > tcg_temp_free_i64(t1); > > } > > > > +static void gen_loongson_lswc2(DisasContext *ctx, int rt, > > + int rs, int rd) > > +{ > > + TCGv t0, t1, t2; > > + TCGv_i32 fp0; > > + int lsq_offset = ((int)((ctx->opcode >> 6) & 0x1ff) << 23) >> 19; > > + int lsq_rt1 = ctx->opcode & 0x1f; > > + int shf_offset = (int8_t)(ctx->opcode >> 6); > > + > > + t0 = tcg_temp_new(); > > + > > + switch (MASK_LOONGSON_GSLSQ(ctx->opcode)) { > > +#if defined(TARGET_MIPS64) > > Build failure (is this code tested?): > > target/mips/translate.c: In function ‘gen_loongson_lswc2’: > target/mips/translate.c:5961:9: error: unused variable ‘lsq_rt1’ > [-Werror=unused-variable] > int lsq_rt1 = ctx->opcode & 0x1f; > ^ > target/mips/translate.c:5960:9: error: unused variable ‘lsq_offset’ > [-Werror=unused-variable] > int lsq_offset = ((int)((ctx->opcode >> 6) & 0x1ff) << 23) >> 19; > ^ > cc1: all warnings being treated as errors Thank you very much, lsq_rt1 and lsq_offset should be guarded by TARGET_MIPS64. Huacai > > > + case OPC_GSLQ: > > + gen_base_offset_addr(ctx, t0, rs, lsq_offset); > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + gen_store_gpr(t0, rt); > > + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + gen_store_gpr(t0, lsq_rt1); > > + break; > > + case OPC_GSLQC1: > > + check_cp1_enabled(ctx); > > + gen_base_offset_addr(ctx, t0, rs, lsq_offset); > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + gen_store_fpr64(ctx, t0, rt); > > + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + gen_store_fpr64(ctx, t0, lsq_rt1); > > + break; > > + case OPC_GSSQ: > > + t1 = tcg_temp_new(); > > + gen_base_offset_addr(ctx, t0, rs, lsq_offset); > > + gen_load_gpr(t1, rt); > > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); > > + gen_load_gpr(t1, lsq_rt1); > > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + tcg_temp_free(t1); > > + break; > > + case OPC_GSSQC1: > > + check_cp1_enabled(ctx); > > + t1 = tcg_temp_new(); > > + gen_base_offset_addr(ctx, t0, rs, lsq_offset); > > + gen_load_fpr64(ctx, t1, rt); > > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); > > + gen_load_fpr64(ctx, t1, lsq_rt1); > > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + tcg_temp_free(t1); > > + break; > > +#endif > > + default: > > + MIPS_INVAL("loongson_gslsq"); > > + generate_exception_end(ctx, EXCP_RI); > > + break; > > + } > > + tcg_temp_free(t0); > > +} > > + > > /* Traps */ > > static void gen_trap(DisasContext *ctx, uint32_t opc, > > int rs, int rt, int16_t imm) > > @@ -30774,6 +30853,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) > > /* OPC_BC, OPC_BALC */ > > gen_compute_compact_branch(ctx, op, 0, 0, > > sextract32(ctx->opcode << 2, 0, 28)); > > + } else if (ctx->insn_flags & ASE_LEXT) { > > + gen_loongson_lswc2(ctx, rt, rs, rd); > > } else { > > /* OPC_LWC2, OPC_SWC2 */ > > /* COP2: Not implemented. */ > >
On 9/15/20 7:12 PM, Huacai Chen wrote: > + case OPC_GSLQ: > + gen_base_offset_addr(ctx, t0, rs, lsq_offset); > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + gen_store_gpr(t0, rt); > + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + gen_store_gpr(t0, lsq_rt1); If rs == rt, this will compute the wrong address for the second load. Either avoid storing t0 back to rt until both loads are complete, or retain the address temporary and simply add 8 between the two loads. r~
Hi, Richard, On Wed, Sep 16, 2020 at 11:15 PM Richard Henderson <richard.henderson@linaro.org> wrote: > > On 9/15/20 7:12 PM, Huacai Chen wrote: > > + case OPC_GSLQ: > > + gen_base_offset_addr(ctx, t0, rs, lsq_offset); > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + gen_store_gpr(t0, rt); > > + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + gen_store_gpr(t0, lsq_rt1); > > If rs == rt, this will compute the wrong address for the second load. > > Either avoid storing t0 back to rt until both loads are complete, or retain the > address temporary and simply add 8 between the two loads. OK, this will be improved in V10. > > > r~
diff --git a/target/mips/translate.c b/target/mips/translate.c index 398edf7..08d51e1 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -460,6 +460,17 @@ enum { R6_OPC_SCD = 0x27 | OPC_SPECIAL3, }; +/* Loongson EXT load/store quad word opcodes */ +#define MASK_LOONGSON_GSLSQ(op) (MASK_OP_MAJOR(op) | (op & 0x8020)) +enum { + OPC_GSLQ = 0x0020 | OPC_LWC2, + OPC_GSLQC1 = 0x8020 | OPC_LWC2, + OPC_GSSHFL = OPC_LWC2, + OPC_GSSQ = 0x0020 | OPC_SWC2, + OPC_GSSQC1 = 0x8020 | OPC_SWC2, + OPC_GSSHFS = OPC_SWC2, +}; + /* BSHFL opcodes */ #define MASK_BSHFL(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6))) @@ -5910,6 +5921,74 @@ no_rd: tcg_temp_free_i64(t1); } +static void gen_loongson_lswc2(DisasContext *ctx, int rt, + int rs, int rd) +{ + TCGv t0, t1, t2; + TCGv_i32 fp0; + int lsq_offset = ((int)((ctx->opcode >> 6) & 0x1ff) << 23) >> 19; + int lsq_rt1 = ctx->opcode & 0x1f; + int shf_offset = (int8_t)(ctx->opcode >> 6); + + t0 = tcg_temp_new(); + + switch (MASK_LOONGSON_GSLSQ(ctx->opcode)) { +#if defined(TARGET_MIPS64) + case OPC_GSLQ: + gen_base_offset_addr(ctx, t0, rs, lsq_offset); + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + gen_store_gpr(t0, rt); + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + gen_store_gpr(t0, lsq_rt1); + break; + case OPC_GSLQC1: + check_cp1_enabled(ctx); + gen_base_offset_addr(ctx, t0, rs, lsq_offset); + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + gen_store_fpr64(ctx, t0, rt); + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + gen_store_fpr64(ctx, t0, lsq_rt1); + break; + case OPC_GSSQ: + t1 = tcg_temp_new(); + gen_base_offset_addr(ctx, t0, rs, lsq_offset); + gen_load_gpr(t1, rt); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); + gen_load_gpr(t1, lsq_rt1); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + tcg_temp_free(t1); + break; + case OPC_GSSQC1: + check_cp1_enabled(ctx); + t1 = tcg_temp_new(); + gen_base_offset_addr(ctx, t0, rs, lsq_offset); + gen_load_fpr64(ctx, t1, rt); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8); + gen_load_fpr64(ctx, t1, lsq_rt1); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + tcg_temp_free(t1); + break; +#endif + default: + MIPS_INVAL("loongson_gslsq"); + generate_exception_end(ctx, EXCP_RI); + break; + } + tcg_temp_free(t0); +} + /* Traps */ static void gen_trap(DisasContext *ctx, uint32_t opc, int rs, int rt, int16_t imm) @@ -30774,6 +30853,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) /* OPC_BC, OPC_BALC */ gen_compute_compact_branch(ctx, op, 0, 0, sextract32(ctx->opcode << 2, 0, 28)); + } else if (ctx->insn_flags & ASE_LEXT) { + gen_loongson_lswc2(ctx, rt, rs, rd); } else { /* OPC_LWC2, OPC_SWC2 */ /* COP2: Not implemented. */