@@ -479,6 +479,14 @@ static TCGv cpu_dspctrl, btarget, bcond;
static TCGv_i32 hflags;
static TCGv_i32 fpu_fcr0, fpu_fcr31;
+/* FPU registers. These alias, but we'll only use one or the other in any
+ one TB based on MIPS_HFLAG_F64. */
+#if TCG_TARGET_REG_BITS == 64
+static TCGv_i32 fpu_f32[32];
+static TCGv_i32 fpu_fh32[32];
+#endif
+static TCGv_i64 fpu_f64[32];
+
static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
#include "gen-icount.h"
@@ -527,26 +535,40 @@ enum {
BS_EXCP = 3, /* We reached an exception condition */
};
-static const char *regnames[] =
- { "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
- "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
- "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra", };
+static const char * const regnames[] = {
+ "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
+ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+ "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra",
+};
-static const char *regnames_HI[] =
- { "HI0", "HI1", "HI2", "HI3", };
+static const char * const regnames_HI[] = {
+ "HI0", "HI1", "HI2", "HI3",
+};
-static const char *regnames_LO[] =
- { "LO0", "LO1", "LO2", "LO3", };
+static const char * const regnames_LO[] = {
+ "LO0", "LO1", "LO2", "LO3",
+};
-static const char *regnames_ACX[] =
- { "ACX0", "ACX1", "ACX2", "ACX3", };
+static const char * const regnames_ACX[] = {
+ "ACX0", "ACX1", "ACX2", "ACX3",
+};
-static const char *fregnames[] =
- { "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
- "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
- "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
- "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", };
+static const char * const fregnames[] = {
+ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
+};
+
+#if TCG_TARGET_REG_BITS == 64
+static const char * const fhregnames[] = {
+ "fh0", "fh1", "fh2", "fh3", "fh4", "fh5", "fh6", "fh7",
+ "fh8", "fh9", "fh10", "fh11", "fh12", "fh13", "fh14", "fh15",
+ "fh16", "fh17", "fh18", "fh19", "fh20", "fh21", "fh22", "fh23",
+ "fh24", "fh25", "fh26", "fh27", "fh28", "fh29", "fh30", "fh31",
+};
+#endif
#ifdef MIPS_DEBUG_DISAS
#define MIPS_DEBUG(fmt, ...) \
@@ -640,55 +662,91 @@ static inline void gen_store_srsgpr (int from, int to)
}
/* Floating point register moves. */
-static inline void gen_load_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
+static void gen_load_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
{
- tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_mov_i32(t, TCGV_LOW(fpu_f64[reg]));
+#else
+ if (ctx->hflags & MIPS_HFLAG_F64) {
+ tcg_gen_trunc_i64_i32(t, fpu_f64[reg]);
+ } else {
+ tcg_gen_mov_i32(t, fpu_f32[reg]);
+ }
+#endif
}
-static inline void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
+static void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
{
- tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_mov_i32(TCGV_LOW(fpu_f64[reg]), t);
+#else
+ if (ctx->hflags & MIPS_HFLAG_F64) {
+ TCGv_i64 t64 = MAKE_TCGV_I64(GET_TCGV_I32(t));
+ tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32);
+ } else {
+ tcg_gen_mov_i32(fpu_f32[reg], t);
+ }
+#endif
}
-static inline void gen_load_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
+static void gen_load_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
{
- tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_mov_i32(t, TCGV_HIGH(fpu_f64[reg]));
+#else
+ if (ctx->hflags & MIPS_HFLAG_F64) {
+ TCGv_i64 t64 = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t64, fpu_f64[reg], 32);
+ tcg_gen_trunc_i64_i32(t, t64);
+ tcg_temp_free_i64(t64);
+ } else {
+ tcg_gen_mov_i32(t, fpu_fh32[reg]);
+ }
+#endif
}
-static inline void gen_store_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
+static void gen_store_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
{
- tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_mov_i32(TCGV_HIGH(fpu_f64[reg]), t);
+#else
+ if (ctx->hflags & MIPS_HFLAG_F64) {
+ TCGv_i64 t64 = MAKE_TCGV_I64(GET_TCGV_I32(t));
+ tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32);
+ } else {
+ tcg_gen_mov_i32(fpu_fh32[reg], t);
+ }
+#endif
}
-static inline void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
+static void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
{
if (ctx->hflags & MIPS_HFLAG_F64) {
- tcg_gen_ld_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
+ tcg_gen_mov_i64(t, fpu_f64[reg]);
} else {
- TCGv_i32 t0 = tcg_temp_new_i32();
- TCGv_i32 t1 = tcg_temp_new_i32();
- gen_load_fpr32(ctx, t0, reg & ~1);
- gen_load_fpr32(ctx, t1, reg | 1);
- tcg_gen_concat_i32_i64(t, t0, t1);
- tcg_temp_free_i32(t0);
- tcg_temp_free_i32(t1);
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_concat32_i64(t, fpu_f64[reg & ~1], fpu_f64[reg | 1]);
+#else
+ tcg_gen_concat_i32_i64(t, fpu_f32[reg & ~1], fpu_f32[reg | 1]);
+#endif
}
}
-static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
+static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
{
if (ctx->hflags & MIPS_HFLAG_F64) {
- tcg_gen_st_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
+ tcg_gen_mov_i64(fpu_f64[reg], t);
} else {
- TCGv_i64 t0 = tcg_temp_new_i64();
- TCGv_i32 t1 = tcg_temp_new_i32();
- tcg_gen_trunc_i64_i32(t1, t);
- gen_store_fpr32(ctx, t1, reg & ~1);
- tcg_gen_shri_i64(t0, t, 32);
- tcg_gen_trunc_i64_i32(t1, t0);
- gen_store_fpr32(ctx, t1, reg | 1);
- tcg_temp_free_i32(t1);
- tcg_temp_free_i64(t0);
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_mov_i32(TCGV_LOW(fpu_f64[reg & ~1]), TCGV_LOW(t));
+ tcg_gen_mov_i32(TCGV_LOW(fpu_f64[reg | 1]), TCGV_HIGH(t));
+#else
+ TCGv_i64 t64 = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t64, t, 32);
+ tcg_gen_trunc_i64_i32(fpu_f32[reg | 1], t64);
+ tcg_temp_free_i64(t64);
+ tcg_gen_trunc_i64_i32(fpu_f32[reg & ~1], t);
+#endif
}
}
@@ -12682,6 +12740,21 @@ static void mips_tcg_init(void)
offsetof(CPUMIPSState, active_fpu.fcr31),
"fcr31");
+#if TCG_TARGET_REG_BITS == 64
+ for (i = 0; i < 32; i++) {
+ int off = offsetof(CPUMIPSState, active_fpu.fpr[i].w[FP_ENDIAN_IDX]);
+ fpu_f32[i] = tcg_global_mem_new_i32(TCG_AREG0, off, fregnames[i]);
+ }
+ for (i = 0; i < 32; i++) {
+ int off = offsetof(CPUMIPSState, active_fpu.fpr[i].w[!FP_ENDIAN_IDX]);
+ fpu_fh32[i] = tcg_global_mem_new_i32(TCG_AREG0, off, fhregnames[i]);
+ }
+#endif
+ for (i = 0; i < 32; i++) {
+ int off = offsetof(CPUMIPSState, active_fpu.fpr[i].w[FP_ENDIAN_IDX]);
+ fpu_f64[i] = tcg_global_mem_new_i64(TCG_AREG0, off, fregnames[i]);
+ }
+
/* register helpers */
#define GEN_HELPER 2
#include "helper.h"
With normal FP, this doesn't have much affect on the generated code, because most of the FP operations are not CONST/PURE, and so we spill registers in about the same frequency as the explicit load/stores. But with Loongson multimedia instructions, which are all integral and whose helpers are in fact CONST+PURE, this greatly improves the code. On a 64-bit host, rather than over-use the deposit operation, we create TCG registers for both the 64-bit FPU register as a whole and the two 32-bit halves. We only ever reference the whole register or the two half registers in any one TB, so there's no problem with aliasing. On a 32-bit host, we only create the 64-bit FPU registers, and then directly reference the internal 32-bit TCG register halves as needed. Signed-off-by: Richard Henderson <rth@twiddle.net> --- target-mips/translate.c | 161 ++++++++++++++++++++++++++++++++++------------- 1 files changed, 117 insertions(+), 44 deletions(-)