@@ -761,22 +761,16 @@ static const void * const qemu_st_helpers[4] = {
WHICH is the offset into the CPUTLBEntry structure of the slot to read.
This should be offsetof addr_read or addr_write.
- Outputs:
- LABEL_PTRS is filled with the position of the forward jumps to the
- TLB miss case. This will always be a ,PN insn, so a 19-bit offset.
-
- Returns a register loaded with the low part of the address, adjusted
- as indicated by the TLB and so is a host address. Undefined in the
- TLB miss case. */
+ The result of the TLB comparison is in %[ix]cc. The sanitized address
+ is in the returned register, maybe %o0. The TLB addend is in %o1. */
static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index,
- int s_bits, const TCGArg *args,
- uint32_t **label_ptr, int which)
+ int s_bits, const TCGArg *args, int which)
{
const int addrlo = args[addrlo_idx];
- const int r0 = tcg_target_call_iarg_regs[0];
- const int r1 = tcg_target_call_iarg_regs[1];
- const int r2 = tcg_target_call_iarg_regs[2];
+ const int r0 = TCG_REG_O0;
+ const int r1 = TCG_REG_O1;
+ const int r2 = TCG_REG_O2;
int addr = addrlo;
int tlb_ofs;
@@ -807,60 +801,39 @@ static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index,
tlb_ofs = 0;
}
- /* ld [arg1 + which], arg2 */
+ /* Load the tlb comparator and the addend. */
tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
+ tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
/* subcc arg0, arg2, %g0 */
tcg_out_cmp(s, r0, r2, 0);
- /* bne,pn %[ix]cc, label0 */
- *label_ptr = (uint32_t *)s->code_ptr;
- tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1) |
- ((TARGET_LONG_BITS == 64) << 21)));
-
- /* TLB Hit. Compute the host address into r1. The ld is in the
- branch delay slot; harmless for the TLB miss case. */
- tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
-
+ /* If the guest address must be zero-extended, do so now. */
if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
- tcg_out_arith(s, r1, r0, r1, ARITH_ADD);
- } else {
- tcg_out_arith(s, r1, addrlo, r1, ARITH_ADD);
+ return r0;
}
-
- return r1;
+ return addrlo;
}
#endif /* CONFIG_SOFTMMU */
-static void tcg_out_qemu_ld_direct(TCGContext *s, int addr, int addend,
- int datalo, int datahi, int sizeop)
-{
+static const int qemu_ld_opc[8] = {
#ifdef TARGET_WORDS_BIGENDIAN
- static const int ld_opc[8] = {
- LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX
- };
+ LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX
#else
- static const int ld_opc[8] = {
- LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE
- };
+ LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE
#endif
+};
- if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
- /* Load all 64-bits into an O/G register. */
- int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
- tcg_out_ldst_rr(s, reg64, addr, addend, ld_opc[sizeop]);
- /* Move the two 32-bit pieces into the destination registers. */
- tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
- if (reg64 != datalo) {
- tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
- }
- } else {
- tcg_out_ldst_rr(s, datalo, addr, addend, ld_opc[sizeop]);
- }
-}
+static const int qemu_st_opc[4] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ STB, STH, STW, STX
+#else
+ STB, STH_LE, STW_LE, STX_LE
+#endif
+};
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
{
int addrlo_idx = 1, datalo, datahi, addr_reg;
#if defined(CONFIG_SOFTMMU)
@@ -869,7 +842,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
#endif
datahi = datalo = args[0];
- if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
datahi = args[1];
addrlo_idx = 2;
}
@@ -877,27 +850,59 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
#if defined(CONFIG_SOFTMMU)
memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
memi = args[memi_idx];
- s_bits = opc & 3;
+ s_bits = sizeop & 3;
addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args,
- label_ptr, offsetof(CPUTLBEntry, addr_read));
+ offsetof(CPUTLBEntry, addr_read));
- /* TLB Hit. */
- tcg_out_qemu_ld_direct(s, addr_reg, TCG_REG_G0, datalo, datahi, opc);
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ int reg64;
- /* b,pt,n label1 */
- label_ptr[1] = (uint32_t *)s->code_ptr;
- tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
- | (1 << 29) | (1 << 19)));
+ /* bne,pn %[xi]cc, label0 */
+ label_ptr[0] = (uint32_t *)s->code_ptr;
+ tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
+ | ((TARGET_LONG_BITS == 64) << 21)));
+
+ /* TLB Hit. */
+ /* Load all 64-bits into an O/G register. */
+ reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
+ tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
+
+ /* Move the two 32-bit pieces into the destination registers. */
+ tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
+ if (reg64 != datalo) {
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
+ }
+
+ /* b,pt,n label1 */
+ label_ptr[1] = (uint32_t *)s->code_ptr;
+ tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
+ | (1 << 29) | (1 << 19)));
+ } else {
+ /* The fast path is exactly one insn. Thus we can perform the
+ entire TLB Hit in the (annulled) delay slot of the branch
+ over the TLB Miss case. */
+
+ /* beq,a,pt %[xi]cc, label0 */
+ label_ptr[0] = NULL;
+ label_ptr[1] = (uint32_t *)s->code_ptr;
+ tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
+ | ((TARGET_LONG_BITS == 64) << 21)
+ | (1 << 29) | (1 << 19)));
+ /* delay slot */
+ tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
+ }
/* TLB Miss. */
- *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
- (unsigned long)label_ptr[0]);
- n = 0;
-#ifdef CONFIG_TCG_PASS_AREG0
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
-#endif
+ if (label_ptr[0]) {
+ *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
+ (unsigned long)label_ptr[0]);
+ }
+ n = ARG_OFFSET;
+ if (ARG_OFFSET) {
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ }
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
args[addrlo_idx + 1]);
@@ -925,7 +930,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
n = tcg_target_call_oarg_regs[0];
/* datalo = sign_extend(arg0) */
- switch(opc) {
+ switch (sizeop) {
case 0 | 4:
/* Recall that SRA sign extends from bit 31 through bit 63. */
tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL);
@@ -962,40 +967,35 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
addr_reg = TCG_REG_I5;
}
- tcg_out_qemu_ld_direct(s, addr_reg,
- (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
- datalo, datahi, opc);
-#endif /* CONFIG_SOFTMMU */
-}
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
-static void tcg_out_qemu_st_direct(TCGContext *s, int addr, int addend,
- int datalo, int datahi, int sizeop)
-{
-#ifdef TARGET_WORDS_BIGENDIAN
- static const int st_opc[4] = { STB, STH, STW, STX };
-#else
- static const int st_opc[4] = { STB, STH_LE, STW_LE, STX_LE };
-#endif
+ tcg_out_ldst_rr(s, reg64, addr_reg,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_ld_opc[sizeop]);
- if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
- tcg_out_arithi(s, TCG_REG_O0, datalo, 0, SHIFT_SRL);
- tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
- tcg_out_arith(s, TCG_REG_O0, TCG_REG_O0, TCG_REG_O2, ARITH_OR);
- datalo = TCG_REG_O0;
+ tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
+ if (reg64 != datalo) {
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
+ }
+ } else {
+ tcg_out_ldst_rr(s, datalo, addr_reg,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_ld_opc[sizeop]);
}
- tcg_out_ldst_rr(s, datalo, addr, addend, st_opc[sizeop]);
+#endif /* CONFIG_SOFTMMU */
}
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
{
int addrlo_idx = 1, datalo, datahi, addr_reg;
#if defined(CONFIG_SOFTMMU)
int memi_idx, memi, n;
- uint32_t *label_ptr[2];
+ uint32_t *label_ptr;
#endif
datahi = datalo = args[0];
- if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
datahi = args[1];
addrlo_idx = 2;
}
@@ -1004,33 +1004,40 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
memi = args[memi_idx];
- addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, opc, args,
- label_ptr, offsetof(CPUTLBEntry, addr_write));
+ addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args,
+ offsetof(CPUTLBEntry, addr_write));
- /* TLB Hit. */
- tcg_out_qemu_st_direct(s, addr_reg, TCG_REG_G0, datalo, datahi, opc);
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */
+ /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
+ tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
+ tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
+ tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
+ datalo = TCG_REG_G1;
+ }
- /* b,pt,n label1 */
- label_ptr[1] = (uint32_t *)s->code_ptr;
- tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
+ /* The fast path is exactly one insn. Thus we can perform the entire
+ TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
+ /* beq,a,pt %[xi]cc, label0 */
+ label_ptr = (uint32_t *)s->code_ptr;
+ tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
+ | ((TARGET_LONG_BITS == 64) << 21)
| (1 << 29) | (1 << 19)));
+ /* delay slot */
+ tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
/* TLB Miss. */
-
- *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
- (unsigned long)label_ptr[0]);
-
- n = 0;
-#ifdef CONFIG_TCG_PASS_AREG0
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
-#endif
+ n = ARG_OFFSET;
+ if (ARG_OFFSET) {
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ }
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
args[addrlo_idx + 1]);
}
tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
args[addrlo_idx]);
- if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi);
}
tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo);
@@ -1042,7 +1049,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
sizeof(long));
/* qemu_st_helper[s_bits](arg0, arg1, arg2) */
- tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[opc]
+ tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop]
- (tcg_target_ulong)s->code_ptr) >> 2)
& 0x3fffffff));
/* delay slot */
@@ -1053,17 +1060,25 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
sizeof(long));
- *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr -
- (unsigned long)label_ptr[1]);
+ *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr -
+ (unsigned long)label_ptr);
#else
addr_reg = args[addrlo_idx];
if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
addr_reg = TCG_REG_I5;
}
- tcg_out_qemu_st_direct(s, addr_reg,
- (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
- datalo, datahi, opc);
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */
+ /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
+ tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
+ tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
+ tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
+ datalo = TCG_REG_G1;
+ }
+ tcg_out_ldst_rr(s, datalo, addr_reg,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_st_opc[sizeop]);
#endif /* CONFIG_SOFTMMU */
}
Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/sparc/tcg-target.c | 235 +++++++++++++++++++++++++---------------------- 1 files changed, 125 insertions(+), 110 deletions(-)