Message ID | 1602059975-10115-7-git-send-email-chenhc@lemote.com |
---|---|
State | New |
Headers | show |
Series | mips: Add Loongson-3 machine support | expand |
On 10/7/20 10:39 AM, Huacai Chen wrote: > From: Jiaxun Yang <jiaxun.yang@flygoat.com> > > LDC2/SDC2 opcodes have been rewritten as "load & store with offset" > group of instructions by loongson-ext ASE. > > This patch add implementation of these instructions: > gslbx: load 1 bytes to GPR > gslhx: load 2 bytes to GPR > gslwx: load 4 bytes to GPR > gsldx: load 8 bytes to GPR > gslwxc1: load 4 bytes to FPR > gsldxc1: load 8 bytes to FPR > gssbx: store 1 bytes from GPR > gsshx: store 2 bytes from GPR > gsswx: store 4 bytes from GPR > gssdx: store 8 bytes from GPR > gsswxc1: store 4 bytes from FPR > gssdxc1: store 8 bytes from FPR > > Details of Loongson-EXT is here: > https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md > > Signed-off-by: Huacai Chen <chenhc@lemote.com> > Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> If this patch is from Jiaxun, Huacai's S-o-b should come *after*. > --- > target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 179 insertions(+) > > diff --git a/target/mips/translate.c b/target/mips/translate.c > index 916b57f..4d42cfc 100644 > --- a/target/mips/translate.c > +++ b/target/mips/translate.c > @@ -484,6 +484,24 @@ enum { > OPC_GSSDRC1 = 0x7 | OPC_GSSHFS, > }; > > +/* Loongson EXT LDC2/SDC2 opcodes */ > +#define MASK_LOONGSON_LSDC2(op) (MASK_OP_MAJOR(op) | (op & 0x7)) > + > +enum { > + OPC_GSLBX = 0x0 | OPC_LDC2, > + OPC_GSLHX = 0x1 | OPC_LDC2, > + OPC_GSLWX = 0x2 | OPC_LDC2, > + OPC_GSLDX = 0x3 | OPC_LDC2, > + OPC_GSLWXC1 = 0x6 | OPC_LDC2, > + OPC_GSLDXC1 = 0x7 | OPC_LDC2, > + OPC_GSSBX = 0x0 | OPC_SDC2, > + OPC_GSSHX = 0x1 | OPC_SDC2, > + OPC_GSSWX = 0x2 | OPC_SDC2, > + OPC_GSSDX = 0x3 | OPC_SDC2, > + OPC_GSSWXC1 = 0x6 | OPC_SDC2, > + OPC_GSSDXC1 = 0x7 | OPC_SDC2, > +}; > + > /* BSHFL opcodes */ > #define MASK_BSHFL(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6))) > > @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt, > tcg_temp_free(t0); > } > > +/* Loongson EXT LDC2/SDC2 */ > +static void gen_loongson_lsdc2(DisasContext *ctx, int rt, > + int rs, int rd) Alignment off (various occurences in this series). > +{ > + int offset = (int8_t)(ctx->opcode >> 3); Please use sextract32() which is easier to read: int32_t offset = sextract32(ctx->opcode, 3, 8); > + uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode); > + TCGv t0, t1; > + TCGv_i32 fp0; > + > + /* Pre-conditions */ > + switch (opc) { > + case OPC_GSLBX: > + case OPC_GSLHX: > + case OPC_GSLWX: > + case OPC_GSLDX: > + /* prefetch, implement as NOP */ > + if (rt == 0) { > + return; > + } > + break; > + case OPC_GSSBX: > + case OPC_GSSHX: > + case OPC_GSSWX: > + case OPC_GSSDX: > + break; > + case OPC_GSLWXC1: > +#if defined(TARGET_MIPS64) > + case OPC_GSLDXC1: > +#endif > + check_cp1_enabled(ctx); > + /* prefetch, implement as NOP */ > + if (rt == 0) { > + return; > + } > + break; > + case OPC_GSSWXC1: > +#if defined(TARGET_MIPS64) > + case OPC_GSSDXC1: > +#endif > + check_cp1_enabled(ctx); > + break; > + default: > + MIPS_INVAL("loongson_lsdc2"); > + generate_exception_end(ctx, EXCP_RI); > + return; > + break; > + } > + > + t0 = tcg_temp_new(); > + > + gen_base_offset_addr(ctx, t0, rs, offset); > + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > + > + switch (opc) { > + case OPC_GSLBX: > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB); > + gen_store_gpr(t0, rt); > + break; > + case OPC_GSLHX: > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW | > + ctx->default_tcg_memop_mask); Do Loongson EXT plan to support unaligned accesses? > + gen_store_gpr(t0, rt); > + break; > + case OPC_GSLWX: > + gen_base_offset_addr(ctx, t0, rs, offset); > + if (rd) { > + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > + } > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL | > + ctx->default_tcg_memop_mask); > + gen_store_gpr(t0, rt); > + break; > +#if defined(TARGET_MIPS64) > + case OPC_GSLDX: > + gen_base_offset_addr(ctx, t0, rs, offset); > + if (rd) { > + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > + } > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + gen_store_gpr(t0, rt); > + break; > +#endif > + case OPC_GSLWXC1: > + check_cp1_enabled(ctx); > + gen_base_offset_addr(ctx, t0, rs, offset); > + if (rd) { > + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > + } > + fp0 = tcg_temp_new_i32(); > + tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL | > + ctx->default_tcg_memop_mask); > + gen_store_fpr32(ctx, fp0, rt); > + tcg_temp_free_i32(fp0); > + break; > +#if defined(TARGET_MIPS64) > + case OPC_GSLDXC1: > + check_cp1_enabled(ctx); > + gen_base_offset_addr(ctx, t0, rs, offset); > + if (rd) { > + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > + } > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + gen_store_fpr64(ctx, t0, rt); > + break; > +#endif > + case OPC_GSSBX: > + t1 = tcg_temp_new(); > + gen_load_gpr(t1, rt); > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_SB); > + tcg_temp_free(t1); > + break; > + case OPC_GSSHX: > + t1 = tcg_temp_new(); > + gen_load_gpr(t1, rt); > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW | > + ctx->default_tcg_memop_mask); > + tcg_temp_free(t1); > + break; > + case OPC_GSSWX: > + t1 = tcg_temp_new(); > + gen_load_gpr(t1, rt); > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | > + ctx->default_tcg_memop_mask); > + tcg_temp_free(t1); > + break; > +#if defined(TARGET_MIPS64) > + case OPC_GSSDX: > + t1 = tcg_temp_new(); > + gen_load_gpr(t1, rt); > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + tcg_temp_free(t1); > + break; > +#endif > + case OPC_GSSWXC1: > + fp0 = tcg_temp_new_i32(); > + gen_load_fpr32(ctx, fp0, rt); > + tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL | > + ctx->default_tcg_memop_mask); > + tcg_temp_free_i32(fp0); > + break; > +#if defined(TARGET_MIPS64) > + case OPC_GSSDXC1: > + t1 = tcg_temp_new(); > + gen_load_fpr64(ctx, t1, rt); > + tcg_gen_qemu_st_i64(t1, t0, ctx->mem_idx, MO_TEQ | > + ctx->default_tcg_memop_mask); > + tcg_temp_free(t1); > + break; > +#endif > + default: > + break; > + } > + > + tcg_temp_free(t0); > +} > + > /* Traps */ > static void gen_trap(DisasContext *ctx, uint32_t opc, > int rs, int rt, int16_t imm) > @@ -31055,6 +31232,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) > /* OPC_JIC, OPC_JIALC */ > gen_compute_compact_branch(ctx, op, 0, rt, imm); > } > + } else if (ctx->insn_flags & ASE_LEXT) { > + gen_loongson_lsdc2(ctx, rt, rs, rd); > } else { > /* OPC_LWC2, OPC_SWC2 */ > /* COP2: Not implemented. */ >
Hi, Philippe, On Sat, Oct 10, 2020 at 9:07 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > > On 10/7/20 10:39 AM, Huacai Chen wrote: > > From: Jiaxun Yang <jiaxun.yang@flygoat.com> > > > > LDC2/SDC2 opcodes have been rewritten as "load & store with offset" > > group of instructions by loongson-ext ASE. > > > > This patch add implementation of these instructions: > > gslbx: load 1 bytes to GPR > > gslhx: load 2 bytes to GPR > > gslwx: load 4 bytes to GPR > > gsldx: load 8 bytes to GPR > > gslwxc1: load 4 bytes to FPR > > gsldxc1: load 8 bytes to FPR > > gssbx: store 1 bytes from GPR > > gsshx: store 2 bytes from GPR > > gsswx: store 4 bytes from GPR > > gssdx: store 8 bytes from GPR > > gsswxc1: store 4 bytes from FPR > > gssdxc1: store 8 bytes from FPR > > > > Details of Loongson-EXT is here: > > https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md > > > > Signed-off-by: Huacai Chen <chenhc@lemote.com> > > Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> > > If this patch is from Jiaxun, Huacai's S-o-b should come *after*. OK, I will do that. > > > --- > > target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 179 insertions(+) > > > > diff --git a/target/mips/translate.c b/target/mips/translate.c > > index 916b57f..4d42cfc 100644 > > --- a/target/mips/translate.c > > +++ b/target/mips/translate.c > > @@ -484,6 +484,24 @@ enum { > > OPC_GSSDRC1 = 0x7 | OPC_GSSHFS, > > }; > > > > +/* Loongson EXT LDC2/SDC2 opcodes */ > > +#define MASK_LOONGSON_LSDC2(op) (MASK_OP_MAJOR(op) | (op & 0x7)) > > + > > +enum { > > + OPC_GSLBX = 0x0 | OPC_LDC2, > > + OPC_GSLHX = 0x1 | OPC_LDC2, > > + OPC_GSLWX = 0x2 | OPC_LDC2, > > + OPC_GSLDX = 0x3 | OPC_LDC2, > > + OPC_GSLWXC1 = 0x6 | OPC_LDC2, > > + OPC_GSLDXC1 = 0x7 | OPC_LDC2, > > + OPC_GSSBX = 0x0 | OPC_SDC2, > > + OPC_GSSHX = 0x1 | OPC_SDC2, > > + OPC_GSSWX = 0x2 | OPC_SDC2, > > + OPC_GSSDX = 0x3 | OPC_SDC2, > > + OPC_GSSWXC1 = 0x6 | OPC_SDC2, > > + OPC_GSSDXC1 = 0x7 | OPC_SDC2, > > +}; > > + > > /* BSHFL opcodes */ > > #define MASK_BSHFL(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6))) > > > > @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt, > > tcg_temp_free(t0); > > } > > > > +/* Loongson EXT LDC2/SDC2 */ > > +static void gen_loongson_lsdc2(DisasContext *ctx, int rt, > > + int rs, int rd) > > Alignment off (various occurences in this series). OK, thanks. > > > +{ > > + int offset = (int8_t)(ctx->opcode >> 3); > > Please use sextract32() which is easier to read: > > int32_t offset = sextract32(ctx->opcode, 3, 8); OK, thanks. > > > + uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode); > > + TCGv t0, t1; > > + TCGv_i32 fp0; > > + > > + /* Pre-conditions */ > > + switch (opc) { > > + case OPC_GSLBX: > > + case OPC_GSLHX: > > + case OPC_GSLWX: > > + case OPC_GSLDX: > > + /* prefetch, implement as NOP */ > > + if (rt == 0) { > > + return; > > + } > > + break; > > + case OPC_GSSBX: > > + case OPC_GSSHX: > > + case OPC_GSSWX: > > + case OPC_GSSDX: > > + break; > > + case OPC_GSLWXC1: > > +#if defined(TARGET_MIPS64) > > + case OPC_GSLDXC1: > > +#endif > > + check_cp1_enabled(ctx); > > + /* prefetch, implement as NOP */ > > + if (rt == 0) { > > + return; > > + } > > + break; > > + case OPC_GSSWXC1: > > +#if defined(TARGET_MIPS64) > > + case OPC_GSSDXC1: > > +#endif > > + check_cp1_enabled(ctx); > > + break; > > + default: > > + MIPS_INVAL("loongson_lsdc2"); > > + generate_exception_end(ctx, EXCP_RI); > > + return; > > + break; > > + } > > + > > + t0 = tcg_temp_new(); > > + > > + gen_base_offset_addr(ctx, t0, rs, offset); > > + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > > + > > + switch (opc) { > > + case OPC_GSLBX: > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB); > > + gen_store_gpr(t0, rt); > > + break; > > + case OPC_GSLHX: > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW | > > + ctx->default_tcg_memop_mask); > > Do Loongson EXT plan to support unaligned accesses? Not support in hardware, and Linux kernel emulate the unaligned cases. > > > + gen_store_gpr(t0, rt); > > + break; > > + case OPC_GSLWX: > > + gen_base_offset_addr(ctx, t0, rs, offset); > > + if (rd) { > > + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > > + } > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL | > > + ctx->default_tcg_memop_mask); > > + gen_store_gpr(t0, rt); > > + break; > > +#if defined(TARGET_MIPS64) > > + case OPC_GSLDX: > > + gen_base_offset_addr(ctx, t0, rs, offset); > > + if (rd) { > > + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > > + } > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + gen_store_gpr(t0, rt); > > + break; > > +#endif > > + case OPC_GSLWXC1: > > + check_cp1_enabled(ctx); > > + gen_base_offset_addr(ctx, t0, rs, offset); > > + if (rd) { > > + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > > + } > > + fp0 = tcg_temp_new_i32(); > > + tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL | > > + ctx->default_tcg_memop_mask); > > + gen_store_fpr32(ctx, fp0, rt); > > + tcg_temp_free_i32(fp0); > > + break; > > +#if defined(TARGET_MIPS64) > > + case OPC_GSLDXC1: > > + check_cp1_enabled(ctx); > > + gen_base_offset_addr(ctx, t0, rs, offset); > > + if (rd) { > > + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > > + } > > + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + gen_store_fpr64(ctx, t0, rt); > > + break; > > +#endif > > + case OPC_GSSBX: > > + t1 = tcg_temp_new(); > > + gen_load_gpr(t1, rt); > > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_SB); > > + tcg_temp_free(t1); > > + break; > > + case OPC_GSSHX: > > + t1 = tcg_temp_new(); > > + gen_load_gpr(t1, rt); > > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW | > > + ctx->default_tcg_memop_mask); > > + tcg_temp_free(t1); > > + break; > > + case OPC_GSSWX: > > + t1 = tcg_temp_new(); > > + gen_load_gpr(t1, rt); > > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | > > + ctx->default_tcg_memop_mask); > > + tcg_temp_free(t1); > > + break; > > +#if defined(TARGET_MIPS64) > > + case OPC_GSSDX: > > + t1 = tcg_temp_new(); > > + gen_load_gpr(t1, rt); > > + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + tcg_temp_free(t1); > > + break; > > +#endif > > + case OPC_GSSWXC1: > > + fp0 = tcg_temp_new_i32(); > > + gen_load_fpr32(ctx, fp0, rt); > > + tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL | > > + ctx->default_tcg_memop_mask); > > + tcg_temp_free_i32(fp0); > > + break; > > +#if defined(TARGET_MIPS64) > > + case OPC_GSSDXC1: > > + t1 = tcg_temp_new(); > > + gen_load_fpr64(ctx, t1, rt); > > + tcg_gen_qemu_st_i64(t1, t0, ctx->mem_idx, MO_TEQ | > > + ctx->default_tcg_memop_mask); > > + tcg_temp_free(t1); > > + break; > > +#endif > > + default: > > + break; > > + } > > + > > + tcg_temp_free(t0); > > +} > > + > > /* Traps */ > > static void gen_trap(DisasContext *ctx, uint32_t opc, > > int rs, int rt, int16_t imm) > > @@ -31055,6 +31232,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) > > /* OPC_JIC, OPC_JIALC */ > > gen_compute_compact_branch(ctx, op, 0, rt, imm); > > } > > + } else if (ctx->insn_flags & ASE_LEXT) { > > + gen_loongson_lsdc2(ctx, rt, rs, rd); > > } else { > > /* OPC_LWC2, OPC_SWC2 */ > > /* COP2: Not implemented. */ > > Huacai
On 10/11/20 5:02 AM, Huacai Chen wrote: > Hi, Philippe, > > On Sat, Oct 10, 2020 at 9:07 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: >> >> On 10/7/20 10:39 AM, Huacai Chen wrote: >>> From: Jiaxun Yang <jiaxun.yang@flygoat.com> >>> >>> LDC2/SDC2 opcodes have been rewritten as "load & store with offset" >>> group of instructions by loongson-ext ASE. >>> >>> This patch add implementation of these instructions: >>> gslbx: load 1 bytes to GPR >>> gslhx: load 2 bytes to GPR >>> gslwx: load 4 bytes to GPR >>> gsldx: load 8 bytes to GPR >>> gslwxc1: load 4 bytes to FPR >>> gsldxc1: load 8 bytes to FPR >>> gssbx: store 1 bytes from GPR >>> gsshx: store 2 bytes from GPR >>> gsswx: store 4 bytes from GPR >>> gssdx: store 8 bytes from GPR >>> gsswxc1: store 4 bytes from FPR >>> gssdxc1: store 8 bytes from FPR >>> >>> Details of Loongson-EXT is here: >>> https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md >>> >>> Signed-off-by: Huacai Chen <chenhc@lemote.com> >>> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> >> >> If this patch is from Jiaxun, Huacai's S-o-b should come *after*. > OK, I will do that. > >> >>> --- >>> target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++ >>> 1 file changed, 179 insertions(+) >>> >>> diff --git a/target/mips/translate.c b/target/mips/translate.c >>> index 916b57f..4d42cfc 100644 >>> --- a/target/mips/translate.c >>> +++ b/target/mips/translate.c >>> @@ -484,6 +484,24 @@ enum { >>> OPC_GSSDRC1 = 0x7 | OPC_GSSHFS, >>> }; >>> >>> +/* Loongson EXT LDC2/SDC2 opcodes */ >>> +#define MASK_LOONGSON_LSDC2(op) (MASK_OP_MAJOR(op) | (op & 0x7)) >>> + >>> +enum { >>> + OPC_GSLBX = 0x0 | OPC_LDC2, >>> + OPC_GSLHX = 0x1 | OPC_LDC2, >>> + OPC_GSLWX = 0x2 | OPC_LDC2, >>> + OPC_GSLDX = 0x3 | OPC_LDC2, >>> + OPC_GSLWXC1 = 0x6 | OPC_LDC2, >>> + OPC_GSLDXC1 = 0x7 | OPC_LDC2, >>> + OPC_GSSBX = 0x0 | OPC_SDC2, >>> + OPC_GSSHX = 0x1 | OPC_SDC2, >>> + OPC_GSSWX = 0x2 | OPC_SDC2, >>> + OPC_GSSDX = 0x3 | OPC_SDC2, >>> + OPC_GSSWXC1 = 0x6 | OPC_SDC2, >>> + OPC_GSSDXC1 = 0x7 | OPC_SDC2, >>> +}; >>> + >>> /* BSHFL opcodes */ >>> #define MASK_BSHFL(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6))) >>> >>> @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt, >>> tcg_temp_free(t0); >>> } >>> >>> +/* Loongson EXT LDC2/SDC2 */ >>> +static void gen_loongson_lsdc2(DisasContext *ctx, int rt, >>> + int rs, int rd) >> >> Alignment off (various occurences in this series). > OK, thanks. > >> >>> +{ >>> + int offset = (int8_t)(ctx->opcode >> 3); >> >> Please use sextract32() which is easier to read: >> >> int32_t offset = sextract32(ctx->opcode, 3, 8); > OK, thanks. > >> >>> + uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode); >>> + TCGv t0, t1; >>> + TCGv_i32 fp0; >>> + >>> + /* Pre-conditions */ >>> + switch (opc) { >>> + case OPC_GSLBX: >>> + case OPC_GSLHX: >>> + case OPC_GSLWX: >>> + case OPC_GSLDX: >>> + /* prefetch, implement as NOP */ >>> + if (rt == 0) { >>> + return; >>> + } >>> + break; >>> + case OPC_GSSBX: >>> + case OPC_GSSHX: >>> + case OPC_GSSWX: >>> + case OPC_GSSDX: >>> + break; >>> + case OPC_GSLWXC1: >>> +#if defined(TARGET_MIPS64) >>> + case OPC_GSLDXC1: >>> +#endif >>> + check_cp1_enabled(ctx); >>> + /* prefetch, implement as NOP */ >>> + if (rt == 0) { >>> + return; >>> + } >>> + break; >>> + case OPC_GSSWXC1: >>> +#if defined(TARGET_MIPS64) >>> + case OPC_GSSDXC1: >>> +#endif >>> + check_cp1_enabled(ctx); >>> + break; >>> + default: >>> + MIPS_INVAL("loongson_lsdc2"); >>> + generate_exception_end(ctx, EXCP_RI); >>> + return; >>> + break; >>> + } >>> + >>> + t0 = tcg_temp_new(); >>> + >>> + gen_base_offset_addr(ctx, t0, rs, offset); >>> + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); >>> + >>> + switch (opc) { >>> + case OPC_GSLBX: >>> + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB); >>> + gen_store_gpr(t0, rt); >>> + break; >>> + case OPC_GSLHX: >>> + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW | >>> + ctx->default_tcg_memop_mask); >> >> Do Loongson EXT plan to support unaligned accesses? > Not support in hardware, and Linux kernel emulate the unaligned cases. OK, that was my understanding. So we don't need to use default_tcg_memop_mask, we can directly use MO_ALIGN in place instead. Regards, Phil.
Hi, Philippe, On Sun, Oct 11, 2020 at 7:13 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > > On 10/11/20 5:02 AM, Huacai Chen wrote: > > Hi, Philippe, > > > > On Sat, Oct 10, 2020 at 9:07 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > >> > >> On 10/7/20 10:39 AM, Huacai Chen wrote: > >>> From: Jiaxun Yang <jiaxun.yang@flygoat.com> > >>> > >>> LDC2/SDC2 opcodes have been rewritten as "load & store with offset" > >>> group of instructions by loongson-ext ASE. > >>> > >>> This patch add implementation of these instructions: > >>> gslbx: load 1 bytes to GPR > >>> gslhx: load 2 bytes to GPR > >>> gslwx: load 4 bytes to GPR > >>> gsldx: load 8 bytes to GPR > >>> gslwxc1: load 4 bytes to FPR > >>> gsldxc1: load 8 bytes to FPR > >>> gssbx: store 1 bytes from GPR > >>> gsshx: store 2 bytes from GPR > >>> gsswx: store 4 bytes from GPR > >>> gssdx: store 8 bytes from GPR > >>> gsswxc1: store 4 bytes from FPR > >>> gssdxc1: store 8 bytes from FPR > >>> > >>> Details of Loongson-EXT is here: > >>> https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md > >>> > >>> Signed-off-by: Huacai Chen <chenhc@lemote.com> > >>> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> > >> > >> If this patch is from Jiaxun, Huacai's S-o-b should come *after*. > > OK, I will do that. > > > >> > >>> --- > >>> target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++ > >>> 1 file changed, 179 insertions(+) > >>> > >>> diff --git a/target/mips/translate.c b/target/mips/translate.c > >>> index 916b57f..4d42cfc 100644 > >>> --- a/target/mips/translate.c > >>> +++ b/target/mips/translate.c > >>> @@ -484,6 +484,24 @@ enum { > >>> OPC_GSSDRC1 = 0x7 | OPC_GSSHFS, > >>> }; > >>> > >>> +/* Loongson EXT LDC2/SDC2 opcodes */ > >>> +#define MASK_LOONGSON_LSDC2(op) (MASK_OP_MAJOR(op) | (op & 0x7)) > >>> + > >>> +enum { > >>> + OPC_GSLBX = 0x0 | OPC_LDC2, > >>> + OPC_GSLHX = 0x1 | OPC_LDC2, > >>> + OPC_GSLWX = 0x2 | OPC_LDC2, > >>> + OPC_GSLDX = 0x3 | OPC_LDC2, > >>> + OPC_GSLWXC1 = 0x6 | OPC_LDC2, > >>> + OPC_GSLDXC1 = 0x7 | OPC_LDC2, > >>> + OPC_GSSBX = 0x0 | OPC_SDC2, > >>> + OPC_GSSHX = 0x1 | OPC_SDC2, > >>> + OPC_GSSWX = 0x2 | OPC_SDC2, > >>> + OPC_GSSDX = 0x3 | OPC_SDC2, > >>> + OPC_GSSWXC1 = 0x6 | OPC_SDC2, > >>> + OPC_GSSDXC1 = 0x7 | OPC_SDC2, > >>> +}; > >>> + > >>> /* BSHFL opcodes */ > >>> #define MASK_BSHFL(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6))) > >>> > >>> @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt, > >>> tcg_temp_free(t0); > >>> } > >>> > >>> +/* Loongson EXT LDC2/SDC2 */ > >>> +static void gen_loongson_lsdc2(DisasContext *ctx, int rt, > >>> + int rs, int rd) > >> > >> Alignment off (various occurences in this series). > > OK, thanks. > > > >> > >>> +{ > >>> + int offset = (int8_t)(ctx->opcode >> 3); > >> > >> Please use sextract32() which is easier to read: > >> > >> int32_t offset = sextract32(ctx->opcode, 3, 8); > > OK, thanks. > > > >> > >>> + uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode); > >>> + TCGv t0, t1; > >>> + TCGv_i32 fp0; > >>> + > >>> + /* Pre-conditions */ > >>> + switch (opc) { > >>> + case OPC_GSLBX: > >>> + case OPC_GSLHX: > >>> + case OPC_GSLWX: > >>> + case OPC_GSLDX: > >>> + /* prefetch, implement as NOP */ > >>> + if (rt == 0) { > >>> + return; > >>> + } > >>> + break; > >>> + case OPC_GSSBX: > >>> + case OPC_GSSHX: > >>> + case OPC_GSSWX: > >>> + case OPC_GSSDX: > >>> + break; > >>> + case OPC_GSLWXC1: > >>> +#if defined(TARGET_MIPS64) > >>> + case OPC_GSLDXC1: > >>> +#endif > >>> + check_cp1_enabled(ctx); > >>> + /* prefetch, implement as NOP */ > >>> + if (rt == 0) { > >>> + return; > >>> + } > >>> + break; > >>> + case OPC_GSSWXC1: > >>> +#if defined(TARGET_MIPS64) > >>> + case OPC_GSSDXC1: > >>> +#endif > >>> + check_cp1_enabled(ctx); > >>> + break; > >>> + default: > >>> + MIPS_INVAL("loongson_lsdc2"); > >>> + generate_exception_end(ctx, EXCP_RI); > >>> + return; > >>> + break; > >>> + } > >>> + > >>> + t0 = tcg_temp_new(); > >>> + > >>> + gen_base_offset_addr(ctx, t0, rs, offset); > >>> + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); > >>> + > >>> + switch (opc) { > >>> + case OPC_GSLBX: > >>> + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB); > >>> + gen_store_gpr(t0, rt); > >>> + break; > >>> + case OPC_GSLHX: > >>> + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW | > >>> + ctx->default_tcg_memop_mask); > >> > >> Do Loongson EXT plan to support unaligned accesses? > > Not support in hardware, and Linux kernel emulate the unaligned cases. > > OK, that was my understanding. So we don't need to use > default_tcg_memop_mask, we can directly use MO_ALIGN in > place instead. I read the code again, and found that MIPSR6 uses MO_UNALN while others use MO_ALIGN. And I also realized that Loongson-3A4000 supports unaligned access in hardware (the same as R6). So, I think I should keep default_tcg_memop_mask here. And if possible, I want to set MO_UNALN for all Loongson-3 processors. Huacai > > Regards, > > Phil.
On 10/12/20 12:33 PM, Huacai Chen wrote: > Hi, Philippe, > > On Sun, Oct 11, 2020 at 7:13 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: >> >> On 10/11/20 5:02 AM, Huacai Chen wrote: >>> Hi, Philippe, >>> >>> On Sat, Oct 10, 2020 at 9:07 PM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: >>>> >>>> On 10/7/20 10:39 AM, Huacai Chen wrote: >>>>> From: Jiaxun Yang <jiaxun.yang@flygoat.com> >>>>> >>>>> LDC2/SDC2 opcodes have been rewritten as "load & store with offset" >>>>> group of instructions by loongson-ext ASE. >>>>> >>>>> This patch add implementation of these instructions: >>>>> gslbx: load 1 bytes to GPR >>>>> gslhx: load 2 bytes to GPR >>>>> gslwx: load 4 bytes to GPR >>>>> gsldx: load 8 bytes to GPR >>>>> gslwxc1: load 4 bytes to FPR >>>>> gsldxc1: load 8 bytes to FPR >>>>> gssbx: store 1 bytes from GPR >>>>> gsshx: store 2 bytes from GPR >>>>> gsswx: store 4 bytes from GPR >>>>> gssdx: store 8 bytes from GPR >>>>> gsswxc1: store 4 bytes from FPR >>>>> gssdxc1: store 8 bytes from FPR >>>>> >>>>> Details of Loongson-EXT is here: >>>>> https://github.com/FlyGoat/loongson-insn/blob/master/loongson-ext.md >>>>> >>>>> Signed-off-by: Huacai Chen <chenhc@lemote.com> >>>>> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> >>>> >>>> If this patch is from Jiaxun, Huacai's S-o-b should come *after*. >>> OK, I will do that. >>> >>>> >>>>> --- >>>>> target/mips/translate.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++ >>>>> 1 file changed, 179 insertions(+) >>>>> >>>>> diff --git a/target/mips/translate.c b/target/mips/translate.c >>>>> index 916b57f..4d42cfc 100644 >>>>> --- a/target/mips/translate.c >>>>> +++ b/target/mips/translate.c >>>>> @@ -484,6 +484,24 @@ enum { >>>>> OPC_GSSDRC1 = 0x7 | OPC_GSSHFS, >>>>> }; >>>>> >>>>> +/* Loongson EXT LDC2/SDC2 opcodes */ >>>>> +#define MASK_LOONGSON_LSDC2(op) (MASK_OP_MAJOR(op) | (op & 0x7)) >>>>> + >>>>> +enum { >>>>> + OPC_GSLBX = 0x0 | OPC_LDC2, >>>>> + OPC_GSLHX = 0x1 | OPC_LDC2, >>>>> + OPC_GSLWX = 0x2 | OPC_LDC2, >>>>> + OPC_GSLDX = 0x3 | OPC_LDC2, >>>>> + OPC_GSLWXC1 = 0x6 | OPC_LDC2, >>>>> + OPC_GSLDXC1 = 0x7 | OPC_LDC2, >>>>> + OPC_GSSBX = 0x0 | OPC_SDC2, >>>>> + OPC_GSSHX = 0x1 | OPC_SDC2, >>>>> + OPC_GSSWX = 0x2 | OPC_SDC2, >>>>> + OPC_GSSDX = 0x3 | OPC_SDC2, >>>>> + OPC_GSSWXC1 = 0x6 | OPC_SDC2, >>>>> + OPC_GSSDXC1 = 0x7 | OPC_SDC2, >>>>> +}; >>>>> + >>>>> /* BSHFL opcodes */ >>>>> #define MASK_BSHFL(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6))) >>>>> >>>>> @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt, >>>>> tcg_temp_free(t0); >>>>> } >>>>> >>>>> +/* Loongson EXT LDC2/SDC2 */ >>>>> +static void gen_loongson_lsdc2(DisasContext *ctx, int rt, >>>>> + int rs, int rd) >>>> >>>> Alignment off (various occurences in this series). >>> OK, thanks. >>> >>>> >>>>> +{ >>>>> + int offset = (int8_t)(ctx->opcode >> 3); >>>> >>>> Please use sextract32() which is easier to read: >>>> >>>> int32_t offset = sextract32(ctx->opcode, 3, 8); >>> OK, thanks. >>> >>>> >>>>> + uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode); >>>>> + TCGv t0, t1; >>>>> + TCGv_i32 fp0; >>>>> + >>>>> + /* Pre-conditions */ >>>>> + switch (opc) { >>>>> + case OPC_GSLBX: >>>>> + case OPC_GSLHX: >>>>> + case OPC_GSLWX: >>>>> + case OPC_GSLDX: >>>>> + /* prefetch, implement as NOP */ >>>>> + if (rt == 0) { >>>>> + return; >>>>> + } >>>>> + break; >>>>> + case OPC_GSSBX: >>>>> + case OPC_GSSHX: >>>>> + case OPC_GSSWX: >>>>> + case OPC_GSSDX: >>>>> + break; >>>>> + case OPC_GSLWXC1: >>>>> +#if defined(TARGET_MIPS64) >>>>> + case OPC_GSLDXC1: >>>>> +#endif >>>>> + check_cp1_enabled(ctx); >>>>> + /* prefetch, implement as NOP */ >>>>> + if (rt == 0) { >>>>> + return; >>>>> + } >>>>> + break; >>>>> + case OPC_GSSWXC1: >>>>> +#if defined(TARGET_MIPS64) >>>>> + case OPC_GSSDXC1: >>>>> +#endif >>>>> + check_cp1_enabled(ctx); >>>>> + break; >>>>> + default: >>>>> + MIPS_INVAL("loongson_lsdc2"); >>>>> + generate_exception_end(ctx, EXCP_RI); >>>>> + return; >>>>> + break; >>>>> + } >>>>> + >>>>> + t0 = tcg_temp_new(); >>>>> + >>>>> + gen_base_offset_addr(ctx, t0, rs, offset); >>>>> + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); >>>>> + >>>>> + switch (opc) { >>>>> + case OPC_GSLBX: >>>>> + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB); >>>>> + gen_store_gpr(t0, rt); >>>>> + break; >>>>> + case OPC_GSLHX: >>>>> + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW | >>>>> + ctx->default_tcg_memop_mask); >>>> >>>> Do Loongson EXT plan to support unaligned accesses? >>> Not support in hardware, and Linux kernel emulate the unaligned cases. >> >> OK, that was my understanding. So we don't need to use >> default_tcg_memop_mask, we can directly use MO_ALIGN in >> place instead. > I read the code again, and found that MIPSR6 uses MO_UNALN while > others use MO_ALIGN. And I also realized that Loongson-3A4000 supports > unaligned access in hardware (the same as R6). So, I think I should > keep default_tcg_memop_mask here. And if possible, I want to set > MO_UNALN for all Loongson-3 processors. OK. > > Huacai >> >> Regards, >> >> Phil. >
diff --git a/target/mips/translate.c b/target/mips/translate.c index 916b57f..4d42cfc 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -484,6 +484,24 @@ enum { OPC_GSSDRC1 = 0x7 | OPC_GSSHFS, }; +/* Loongson EXT LDC2/SDC2 opcodes */ +#define MASK_LOONGSON_LSDC2(op) (MASK_OP_MAJOR(op) | (op & 0x7)) + +enum { + OPC_GSLBX = 0x0 | OPC_LDC2, + OPC_GSLHX = 0x1 | OPC_LDC2, + OPC_GSLWX = 0x2 | OPC_LDC2, + OPC_GSLDX = 0x3 | OPC_LDC2, + OPC_GSLWXC1 = 0x6 | OPC_LDC2, + OPC_GSLDXC1 = 0x7 | OPC_LDC2, + OPC_GSSBX = 0x0 | OPC_SDC2, + OPC_GSSHX = 0x1 | OPC_SDC2, + OPC_GSSWX = 0x2 | OPC_SDC2, + OPC_GSSDX = 0x3 | OPC_SDC2, + OPC_GSSWXC1 = 0x6 | OPC_SDC2, + OPC_GSSDXC1 = 0x7 | OPC_SDC2, +}; + /* BSHFL opcodes */ #define MASK_BSHFL(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6))) @@ -6172,6 +6190,165 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt, tcg_temp_free(t0); } +/* Loongson EXT LDC2/SDC2 */ +static void gen_loongson_lsdc2(DisasContext *ctx, int rt, + int rs, int rd) +{ + int offset = (int8_t)(ctx->opcode >> 3); + uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode); + TCGv t0, t1; + TCGv_i32 fp0; + + /* Pre-conditions */ + switch (opc) { + case OPC_GSLBX: + case OPC_GSLHX: + case OPC_GSLWX: + case OPC_GSLDX: + /* prefetch, implement as NOP */ + if (rt == 0) { + return; + } + break; + case OPC_GSSBX: + case OPC_GSSHX: + case OPC_GSSWX: + case OPC_GSSDX: + break; + case OPC_GSLWXC1: +#if defined(TARGET_MIPS64) + case OPC_GSLDXC1: +#endif + check_cp1_enabled(ctx); + /* prefetch, implement as NOP */ + if (rt == 0) { + return; + } + break; + case OPC_GSSWXC1: +#if defined(TARGET_MIPS64) + case OPC_GSSDXC1: +#endif + check_cp1_enabled(ctx); + break; + default: + MIPS_INVAL("loongson_lsdc2"); + generate_exception_end(ctx, EXCP_RI); + return; + break; + } + + t0 = tcg_temp_new(); + + gen_base_offset_addr(ctx, t0, rs, offset); + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); + + switch (opc) { + case OPC_GSLBX: + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB); + gen_store_gpr(t0, rt); + break; + case OPC_GSLHX: + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW | + ctx->default_tcg_memop_mask); + gen_store_gpr(t0, rt); + break; + case OPC_GSLWX: + gen_base_offset_addr(ctx, t0, rs, offset); + if (rd) { + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); + } + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL | + ctx->default_tcg_memop_mask); + gen_store_gpr(t0, rt); + break; +#if defined(TARGET_MIPS64) + case OPC_GSLDX: + gen_base_offset_addr(ctx, t0, rs, offset); + if (rd) { + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); + } + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + gen_store_gpr(t0, rt); + break; +#endif + case OPC_GSLWXC1: + check_cp1_enabled(ctx); + gen_base_offset_addr(ctx, t0, rs, offset); + if (rd) { + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); + } + fp0 = tcg_temp_new_i32(); + tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL | + ctx->default_tcg_memop_mask); + gen_store_fpr32(ctx, fp0, rt); + tcg_temp_free_i32(fp0); + break; +#if defined(TARGET_MIPS64) + case OPC_GSLDXC1: + check_cp1_enabled(ctx); + gen_base_offset_addr(ctx, t0, rs, offset); + if (rd) { + gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0); + } + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + gen_store_fpr64(ctx, t0, rt); + break; +#endif + case OPC_GSSBX: + t1 = tcg_temp_new(); + gen_load_gpr(t1, rt); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_SB); + tcg_temp_free(t1); + break; + case OPC_GSSHX: + t1 = tcg_temp_new(); + gen_load_gpr(t1, rt); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW | + ctx->default_tcg_memop_mask); + tcg_temp_free(t1); + break; + case OPC_GSSWX: + t1 = tcg_temp_new(); + gen_load_gpr(t1, rt); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); + tcg_temp_free(t1); + break; +#if defined(TARGET_MIPS64) + case OPC_GSSDX: + t1 = tcg_temp_new(); + gen_load_gpr(t1, rt); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + tcg_temp_free(t1); + break; +#endif + case OPC_GSSWXC1: + fp0 = tcg_temp_new_i32(); + gen_load_fpr32(ctx, fp0, rt); + tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL | + ctx->default_tcg_memop_mask); + tcg_temp_free_i32(fp0); + break; +#if defined(TARGET_MIPS64) + case OPC_GSSDXC1: + t1 = tcg_temp_new(); + gen_load_fpr64(ctx, t1, rt); + tcg_gen_qemu_st_i64(t1, t0, ctx->mem_idx, MO_TEQ | + ctx->default_tcg_memop_mask); + tcg_temp_free(t1); + break; +#endif + default: + break; + } + + tcg_temp_free(t0); +} + /* Traps */ static void gen_trap(DisasContext *ctx, uint32_t opc, int rs, int rt, int16_t imm) @@ -31055,6 +31232,8 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) /* OPC_JIC, OPC_JIALC */ gen_compute_compact_branch(ctx, op, 0, rt, imm); } + } else if (ctx->insn_flags & ASE_LEXT) { + gen_loongson_lsdc2(ctx, rt, rs, rd); } else { /* OPC_LWC2, OPC_SWC2 */ /* COP2: Not implemented. */