Message ID | 1332894743-27418-6-git-send-email-rth@twiddle.net |
---|---|
State | New |
Headers | show |
On Wed, Mar 28, 2012 at 00:32, Richard Henderson <rth@twiddle.net> wrote: > Given that we have an opcode for all sizes, all endianness, > turn the functions into a simple table lookup. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > tcg/sparc/tcg-target.c | 384 +++++++++++++++++++----------------------------- > 1 files changed, 150 insertions(+), 234 deletions(-) > > diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c > index c74fc2c..5cea5a8 100644 > --- a/tcg/sparc/tcg-target.c > +++ b/tcg/sparc/tcg-target.c > @@ -294,6 +294,16 @@ static inline int tcg_target_const_match(tcg_target_long val, > #define ASI_PRIMARY_LITTLE 0x88 > #endif > > +#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE)) > +#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE)) > +#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE)) > +#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE)) > +#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE)) > + > +#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE)) > +#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) > +#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) > + > static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2, > int op) > { > @@ -366,66 +376,46 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, > } > } > > -static inline void tcg_out_ld_raw(TCGContext *s, int ret, > - tcg_target_long arg) > +static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1, > + int a2, int op) > { > - tcg_out_sethi(s, ret, arg); > - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) | > - INSN_IMM13(arg & 0x3ff)); > + tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); > } > > -static inline void tcg_out_ld_ptr(TCGContext *s, int ret, > - tcg_target_long arg) > +static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, > + int offset, int op) > { > - if (!check_fit_tl(arg, 10)) > - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL); > - if (TCG_TARGET_REG_BITS == 64) { > - tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) | > - INSN_IMM13(arg & 0x3ff)); > - } else { > - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) | > - INSN_IMM13(arg & 0x3ff)); > - } > -} > - > -static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op) > -{ > - if (check_fit_tl(offset, 13)) > + if (check_fit_tl(offset, 13)) { > tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | > INSN_IMM13(offset)); > - else { > + } else { > tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset); > - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) | > - INSN_RS2(addr)); > + tcg_out_ldst_rr(s, ret, addr, TCG_REG_I5, op); > } > } > > -static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr, > - int offset, int op, int asi) > -{ > - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset); > - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) | > - INSN_ASI(asi) | INSN_RS2(addr)); > -} > - > static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, > TCGReg arg1, tcg_target_long arg2) > { > - if (type == TCG_TYPE_I32) > - tcg_out_ldst(s, ret, arg1, arg2, LDUW); > - else > - tcg_out_ldst(s, ret, arg1, arg2, LDX); > + tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX)); > } > > static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, > TCGReg arg1, tcg_target_long arg2) > { > - if (type == TCG_TYPE_I32) > - tcg_out_ldst(s, arg, arg1, arg2, STW); > - else > - tcg_out_ldst(s, arg, arg1, arg2, STX); > + tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); > +} > + > +static inline void tcg_out_ld_ptr(TCGContext *s, int ret, > + tcg_target_long arg) > +{ > + if (!check_fit_tl(arg, 10)) { > + tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); > + } > + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); > } > > + > static inline void tcg_out_sety(TCGContext *s, int rs) > { > tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); > @@ -757,22 +747,16 @@ static const void * const qemu_st_helpers[4] = { > WHICH is the offset into the CPUTLBEntry structure of the slot to read. > This should be offsetof addr_read or addr_write. > > - Outputs: > - LABEL_PTRS is filled with the position of the forward jumps to the > - TLB miss case. This will always be a ,PN insn, so a 19-bit offset. > - > - Returns a register loaded with the low part of the address, adjusted > - as indicated by the TLB and so is a host address. Undefined in the > - TLB miss case. */ > + The result of the TLB comparison is in %[ix]cc. The sanitized address > + is in the returned register, maybe %o0. The TLB addend is in %o1. */ > > static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index, > - int s_bits, const TCGArg *args, > - uint32_t **label_ptr, int which) > + int s_bits, const TCGArg *args, int which) > { > const int addrlo = args[addrlo_idx]; > - const int r0 = tcg_target_call_iarg_regs[0]; > - const int r1 = tcg_target_call_iarg_regs[1]; > - const int r2 = tcg_target_call_iarg_regs[2]; > + const int r0 = TCG_REG_O0; > + const int r1 = TCG_REG_O1; > + const int r2 = TCG_REG_O2; > int addr = addrlo; > int tlb_ofs; > > @@ -803,110 +787,39 @@ static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index, > tlb_ofs = 0; > } > > - /* ld [arg1 + which], arg2 */ > + /* Load the tlb comparator and the addend. */ > tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which); > + tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); > > /* subcc arg0, arg2, %g0 */ > tcg_out_cmp(s, r0, r2, 0); > > - /* bne,pn %[ix]cc, label0 */ > - *label_ptr = (uint32_t *)s->code_ptr; > - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1) | > - ((TARGET_LONG_BITS == 64) << 21))); > - > - /* TLB Hit. Compute the host address into r1. The ld is in the > - branch delay slot; harmless for the TLB miss case. */ > - tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); > - > + /* If the guest address must be zero-extended, do so now. */ > if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { > tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); > - tcg_out_arith(s, r1, r0, r1, ARITH_ADD); > - } else { > - tcg_out_arith(s, r1, addrlo, r1, ARITH_ADD); > + return r0; > } > - > - return r1; > + return addrlo; > } > #endif /* CONFIG_SOFTMMU */ > > -static void tcg_out_qemu_ld_direct(TCGContext *s, int addr, int datalo, > - int datahi, int sizeop) > -{ > +static const int qemu_ld_opc[8] = { > #ifdef TARGET_WORDS_BIGENDIAN > - const int bigendian = 1; > + LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX > #else > - const int bigendian = 0; > + LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE > #endif > - switch (sizeop) { > - case 0: > - /* ldub [addr], datalo */ > - tcg_out_ldst(s, datalo, addr, 0, LDUB); > - break; > - case 0 | 4: > - /* ldsb [addr], datalo */ > - tcg_out_ldst(s, datalo, addr, 0, LDSB); > - break; > - case 1: > - if (bigendian) { > - /* lduh [addr], datalo */ > - tcg_out_ldst(s, datalo, addr, 0, LDUH); > - } else { > - /* lduha [addr] ASI_PRIMARY_LITTLE, datalo */ > - tcg_out_ldst_asi(s, datalo, addr, 0, LDUHA, ASI_PRIMARY_LITTLE); > - } > - break; > - case 1 | 4: > - if (bigendian) { > - /* ldsh [addr], datalo */ > - tcg_out_ldst(s, datalo, addr, 0, LDSH); > - } else { > - /* ldsha [addr] ASI_PRIMARY_LITTLE, datalo */ > - tcg_out_ldst_asi(s, datalo, addr, 0, LDSHA, ASI_PRIMARY_LITTLE); > - } > - break; > - case 2: > - if (bigendian) { > - /* lduw [addr], datalo */ > - tcg_out_ldst(s, datalo, addr, 0, LDUW); > - } else { > - /* lduwa [addr] ASI_PRIMARY_LITTLE, datalo */ > - tcg_out_ldst_asi(s, datalo, addr, 0, LDUWA, ASI_PRIMARY_LITTLE); > - } > - break; > - case 2 | 4: > - if (bigendian) { > - /* ldsw [addr], datalo */ > - tcg_out_ldst(s, datalo, addr, 0, LDSW); > - } else { > - /* ldswa [addr] ASI_PRIMARY_LITTLE, datalo */ > - tcg_out_ldst_asi(s, datalo, addr, 0, LDSWA, ASI_PRIMARY_LITTLE); > - } > - break; > - case 3: > - if (TCG_TARGET_REG_BITS == 64) { > - if (bigendian) { > - /* ldx [addr], datalo */ > - tcg_out_ldst(s, datalo, addr, 0, LDX); > - } else { > - /* ldxa [addr] ASI_PRIMARY_LITTLE, datalo */ > - tcg_out_ldst_asi(s, datalo, addr, 0, LDXA, ASI_PRIMARY_LITTLE); > - } > - } else { > - if (bigendian) { > - tcg_out_ldst(s, datahi, addr, 0, LDUW); > - tcg_out_ldst(s, datalo, addr, 4, LDUW); > - } else { > - tcg_out_ldst_asi(s, datalo, addr, 0, LDUWA, ASI_PRIMARY_LITTLE); > - tcg_out_ldst_asi(s, datahi, addr, 4, LDUWA, ASI_PRIMARY_LITTLE); > - } > - } > - break; > - default: > - tcg_abort(); > - } > -} > +}; > > -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) > +static const int qemu_st_opc[4] = { > +#ifdef TARGET_WORDS_BIGENDIAN > + STB, STH, STW, STX > +#else > + STB, STH_LE, STW_LE, STX_LE > +#endif > +}; > + > +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop) > { > int addrlo_idx = 1, datalo, datahi, addr_reg; > #if defined(CONFIG_SOFTMMU) > @@ -915,7 +828,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) > #endif > > datahi = datalo = args[0]; > - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { > + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { > datahi = args[1]; > addrlo_idx = 2; > } > @@ -923,27 +836,59 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) > #if defined(CONFIG_SOFTMMU) > memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS); > memi = args[memi_idx]; > - s_bits = opc & 3; > + s_bits = sizeop & 3; > > addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args, > - label_ptr, offsetof(CPUTLBEntry, addr_read)); > + offsetof(CPUTLBEntry, addr_read)); > > - /* TLB Hit. */ > - tcg_out_qemu_ld_direct(s, addr_reg, datalo, datahi, opc); > + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { > + int reg64; > > - /* b,pt,n label1 */ > - label_ptr[1] = (uint32_t *)s->code_ptr; > - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) > - | (1 << 29) | (1 << 19))); > + /* bne,pn %[xi]cc, label0 */ > + label_ptr[0] = (uint32_t *)s->code_ptr; > + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1) > + | ((TARGET_LONG_BITS == 64) << 21))); > + > + /* TLB Hit. */ > + /* Load all 64-bits into an O/G register. */ > + reg64 = (datalo < 16 ? datalo : TCG_REG_O0); > + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]); > + > + /* Move the two 32-bit pieces into the destination registers. */ > + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); > + if (reg64 != datalo) { > + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); > + } > + > + /* b,pt,n label1 */ > + label_ptr[1] = (uint32_t *)s->code_ptr; > + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) > + | (1 << 29) | (1 << 19))); > + } else { > + /* The fast path is exactly one insn. Thus we can perform the > + entire TLB Hit in the (annulled) delay slot of the branch > + over the TLB Miss case. */ > + > + /* beq,a,pt %[xi]cc, label0 */ > + label_ptr[0] = NULL; > + label_ptr[1] = (uint32_t *)s->code_ptr; > + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) > + | ((TARGET_LONG_BITS == 64) << 21) > + | (1 << 29) | (1 << 19))); > + /* delay slot */ > + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]); > + } > > /* TLB Miss. */ > > - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - > - (unsigned long)label_ptr[0]); > - n = 0; > -#ifdef CONFIG_TCG_PASS_AREG0 > - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0); > -#endif > + if (label_ptr[0]) { > + *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - > + (unsigned long)label_ptr[0]); > + } > + n = ARG_OFFSET; > + if (ARG_OFFSET) { > + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); WARNING: suspect code indent for conditional statements (4, 7) #395: FILE: tcg/sparc/tcg-target.c:889: + if (ARG_OFFSET) { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); WARNING: suspect code indent for conditional statements (4, 9) #542: FILE: tcg/sparc/tcg-target.c:1013: + if (ARG_OFFSET) { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); total: 0 errors, 2 warnings, 525 lines checked > + } > if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { > tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], > args[addrlo_idx + 1]); > @@ -971,7 +916,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) > > n = tcg_target_call_oarg_regs[0]; > /* datalo = sign_extend(arg0) */ > - switch(opc) { > + switch (sizeop) { > case 0 | 4: > /* Recall that SRA sign extends from bit 31 through bit 63. */ > tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL); > @@ -1008,75 +953,31 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) > tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL); > addr_reg = TCG_REG_I5; > } > - tcg_out_qemu_ld_direct(s, addr_reg, datalo, datahi, opc); > -#endif /* CONFIG_SOFTMMU */ > -} > + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { > + int reg64 = (datalo < 16 ? datalo : TCG_REG_O0); > > -static void tcg_out_qemu_st_direct(TCGContext *s, int addr, int datalo, > - int datahi, int sizeop) > -{ > -#ifdef TARGET_WORDS_BIGENDIAN > - const int bigendian = 1; > -#else > - const int bigendian = 0; > -#endif > - switch (sizeop) { > - case 0: > - /* stb datalo, [addr] */ > - tcg_out_ldst(s, datalo, addr, 0, STB); > - break; > - case 1: > - if (bigendian) { > - /* sth datalo, [addr] */ > - tcg_out_ldst(s, datalo, addr, 0, STH); > - } else { > - /* stha datalo, [addr] ASI_PRIMARY_LITTLE */ > - tcg_out_ldst_asi(s, datalo, addr, 0, STHA, ASI_PRIMARY_LITTLE); > - } > - break; > - case 2: > - if (bigendian) { > - /* stw datalo, [addr] */ > - tcg_out_ldst(s, datalo, addr, 0, STW); > - } else { > - /* stwa datalo, [addr] ASI_PRIMARY_LITTLE */ > - tcg_out_ldst_asi(s, datalo, addr, 0, STWA, ASI_PRIMARY_LITTLE); > - } > - break; > - case 3: > - if (TCG_TARGET_REG_BITS == 64) { > - if (bigendian) { > - /* stx datalo, [addr] */ > - tcg_out_ldst(s, datalo, addr, 0, STX); > - } else { > - /* stxa datalo, [addr] ASI_PRIMARY_LITTLE */ > - tcg_out_ldst_asi(s, datalo, addr, 0, STXA, ASI_PRIMARY_LITTLE); > - } > - } else { > - if (bigendian) { > - tcg_out_ldst(s, datahi, addr, 0, STW); > - tcg_out_ldst(s, datalo, addr, 4, STW); > - } else { > - tcg_out_ldst_asi(s, datalo, addr, 0, STWA, ASI_PRIMARY_LITTLE); > - tcg_out_ldst_asi(s, datahi, addr, 4, STWA, ASI_PRIMARY_LITTLE); > - } > + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]); > + > + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); > + if (reg64 != datalo) { > + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); > } > - break; > - default: > - tcg_abort(); > + } else { > + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]); > } > +#endif /* CONFIG_SOFTMMU */ > } > > -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) > +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop) > { > int addrlo_idx = 1, datalo, datahi, addr_reg; > #if defined(CONFIG_SOFTMMU) > int memi_idx, memi, n; > - uint32_t *label_ptr[2]; > + uint32_t *label_ptr; > #endif > > datahi = datalo = args[0]; > - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { > + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { > datahi = args[1]; > addrlo_idx = 2; > } > @@ -1085,33 +986,40 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) > memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS); > memi = args[memi_idx]; > > - addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, opc, args, > - label_ptr, offsetof(CPUTLBEntry, addr_write)); > + addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args, > + offsetof(CPUTLBEntry, addr_write)); > > - /* TLB Hit. */ > - tcg_out_qemu_st_direct(s, addr_reg, datalo, datahi, opc); > + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { > + /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */ > + /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */ > + tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL); > + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); > + tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR); > + datalo = TCG_REG_G1; > + } > > - /* b,pt,n label1 */ > - label_ptr[1] = (uint32_t *)s->code_ptr; > - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) > + /* The fast path is exactly one insn. Thus we can perform the entire > + TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ > + /* beq,a,pt %[xi]cc, label0 */ > + label_ptr = (uint32_t *)s->code_ptr; > + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) > + | ((TARGET_LONG_BITS == 64) << 21) > | (1 << 29) | (1 << 19))); > + /* delay slot */ > + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]); > > /* TLB Miss. */ > - > - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - > - (unsigned long)label_ptr[0]); > - > - n = 0; > -#ifdef CONFIG_TCG_PASS_AREG0 > - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0); > -#endif > + n = ARG_OFFSET; > + if (ARG_OFFSET) { > + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); > + } > if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { > tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], > args[addrlo_idx + 1]); > } > tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], > args[addrlo_idx]); > - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { > + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { > tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi); > } > tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo); > @@ -1123,7 +1031,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) > sizeof(long)); > > /* qemu_st_helper[s_bits](arg0, arg1, arg2) */ > - tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[opc] > + tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop] > - (tcg_target_ulong)s->code_ptr) >> 2) > & 0x3fffffff)); > /* delay slot */ > @@ -1134,15 +1042,23 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) > TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE - > sizeof(long)); > > - *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr - > - (unsigned long)label_ptr[1]); > + *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr - > + (unsigned long)label_ptr); > #else > addr_reg = args[addrlo_idx]; > if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { > tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL); > addr_reg = TCG_REG_I5; > } > - tcg_out_qemu_st_direct(s, addr_reg, datalo, datahi, opc); > + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { > + /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */ > + /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */ > + tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL); > + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); > + tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR); > + datalo = TCG_REG_G1; > + } > + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_st_opc[sizeop]); > #endif /* CONFIG_SOFTMMU */ > } > > -- > 1.7.7.6 >
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index c74fc2c..5cea5a8 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -294,6 +294,16 @@ static inline int tcg_target_const_match(tcg_target_long val, #define ASI_PRIMARY_LITTLE 0x88 #endif +#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE)) + +#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) + static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2, int op) { @@ -366,66 +376,46 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, } } -static inline void tcg_out_ld_raw(TCGContext *s, int ret, - tcg_target_long arg) +static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1, + int a2, int op) { - tcg_out_sethi(s, ret, arg); - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) | - INSN_IMM13(arg & 0x3ff)); + tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); } -static inline void tcg_out_ld_ptr(TCGContext *s, int ret, - tcg_target_long arg) +static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, + int offset, int op) { - if (!check_fit_tl(arg, 10)) - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL); - if (TCG_TARGET_REG_BITS == 64) { - tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) | - INSN_IMM13(arg & 0x3ff)); - } else { - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) | - INSN_IMM13(arg & 0x3ff)); - } -} - -static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op) -{ - if (check_fit_tl(offset, 13)) + if (check_fit_tl(offset, 13)) { tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | INSN_IMM13(offset)); - else { + } else { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset); - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) | - INSN_RS2(addr)); + tcg_out_ldst_rr(s, ret, addr, TCG_REG_I5, op); } } -static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr, - int offset, int op, int asi) -{ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset); - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) | - INSN_ASI(asi) | INSN_RS2(addr)); -} - static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, tcg_target_long arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst(s, ret, arg1, arg2, LDUW); - else - tcg_out_ldst(s, ret, arg1, arg2, LDX); + tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX)); } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, tcg_target_long arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst(s, arg, arg1, arg2, STW); - else - tcg_out_ldst(s, arg, arg1, arg2, STX); + tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); +} + +static inline void tcg_out_ld_ptr(TCGContext *s, int ret, + tcg_target_long arg) +{ + if (!check_fit_tl(arg, 10)) { + tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); + } + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); } + static inline void tcg_out_sety(TCGContext *s, int rs) { tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); @@ -757,22 +747,16 @@ static const void * const qemu_st_helpers[4] = { WHICH is the offset into the CPUTLBEntry structure of the slot to read. This should be offsetof addr_read or addr_write. - Outputs: - LABEL_PTRS is filled with the position of the forward jumps to the - TLB miss case. This will always be a ,PN insn, so a 19-bit offset. - - Returns a register loaded with the low part of the address, adjusted - as indicated by the TLB and so is a host address. Undefined in the - TLB miss case. */ + The result of the TLB comparison is in %[ix]cc. The sanitized address + is in the returned register, maybe %o0. The TLB addend is in %o1. */ static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index, - int s_bits, const TCGArg *args, - uint32_t **label_ptr, int which) + int s_bits, const TCGArg *args, int which) { const int addrlo = args[addrlo_idx]; - const int r0 = tcg_target_call_iarg_regs[0]; - const int r1 = tcg_target_call_iarg_regs[1]; - const int r2 = tcg_target_call_iarg_regs[2]; + const int r0 = TCG_REG_O0; + const int r1 = TCG_REG_O1; + const int r2 = TCG_REG_O2; int addr = addrlo; int tlb_ofs; @@ -803,110 +787,39 @@ static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index, tlb_ofs = 0; } - /* ld [arg1 + which], arg2 */ + /* Load the tlb comparator and the addend. */ tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which); + tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); /* subcc arg0, arg2, %g0 */ tcg_out_cmp(s, r0, r2, 0); - /* bne,pn %[ix]cc, label0 */ - *label_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1) | - ((TARGET_LONG_BITS == 64) << 21))); - - /* TLB Hit. Compute the host address into r1. The ld is in the - branch delay slot; harmless for the TLB miss case. */ - tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); - + /* If the guest address must be zero-extended, do so now. */ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); - tcg_out_arith(s, r1, r0, r1, ARITH_ADD); - } else { - tcg_out_arith(s, r1, addrlo, r1, ARITH_ADD); + return r0; } - - return r1; + return addrlo; } #endif /* CONFIG_SOFTMMU */ -static void tcg_out_qemu_ld_direct(TCGContext *s, int addr, int datalo, - int datahi, int sizeop) -{ +static const int qemu_ld_opc[8] = { #ifdef TARGET_WORDS_BIGENDIAN - const int bigendian = 1; + LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX #else - const int bigendian = 0; + LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE #endif - switch (sizeop) { - case 0: - /* ldub [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDUB); - break; - case 0 | 4: - /* ldsb [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDSB); - break; - case 1: - if (bigendian) { - /* lduh [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDUH); - } else { - /* lduha [addr] ASI_PRIMARY_LITTLE, datalo */ - tcg_out_ldst_asi(s, datalo, addr, 0, LDUHA, ASI_PRIMARY_LITTLE); - } - break; - case 1 | 4: - if (bigendian) { - /* ldsh [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDSH); - } else { - /* ldsha [addr] ASI_PRIMARY_LITTLE, datalo */ - tcg_out_ldst_asi(s, datalo, addr, 0, LDSHA, ASI_PRIMARY_LITTLE); - } - break; - case 2: - if (bigendian) { - /* lduw [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDUW); - } else { - /* lduwa [addr] ASI_PRIMARY_LITTLE, datalo */ - tcg_out_ldst_asi(s, datalo, addr, 0, LDUWA, ASI_PRIMARY_LITTLE); - } - break; - case 2 | 4: - if (bigendian) { - /* ldsw [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDSW); - } else { - /* ldswa [addr] ASI_PRIMARY_LITTLE, datalo */ - tcg_out_ldst_asi(s, datalo, addr, 0, LDSWA, ASI_PRIMARY_LITTLE); - } - break; - case 3: - if (TCG_TARGET_REG_BITS == 64) { - if (bigendian) { - /* ldx [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDX); - } else { - /* ldxa [addr] ASI_PRIMARY_LITTLE, datalo */ - tcg_out_ldst_asi(s, datalo, addr, 0, LDXA, ASI_PRIMARY_LITTLE); - } - } else { - if (bigendian) { - tcg_out_ldst(s, datahi, addr, 0, LDUW); - tcg_out_ldst(s, datalo, addr, 4, LDUW); - } else { - tcg_out_ldst_asi(s, datalo, addr, 0, LDUWA, ASI_PRIMARY_LITTLE); - tcg_out_ldst_asi(s, datahi, addr, 4, LDUWA, ASI_PRIMARY_LITTLE); - } - } - break; - default: - tcg_abort(); - } -} +}; -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +static const int qemu_st_opc[4] = { +#ifdef TARGET_WORDS_BIGENDIAN + STB, STH, STW, STX +#else + STB, STH_LE, STW_LE, STX_LE +#endif +}; + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop) { int addrlo_idx = 1, datalo, datahi, addr_reg; #if defined(CONFIG_SOFTMMU) @@ -915,7 +828,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #endif datahi = datalo = args[0]; - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { datahi = args[1]; addrlo_idx = 2; } @@ -923,27 +836,59 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #if defined(CONFIG_SOFTMMU) memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS); memi = args[memi_idx]; - s_bits = opc & 3; + s_bits = sizeop & 3; addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args, - label_ptr, offsetof(CPUTLBEntry, addr_read)); + offsetof(CPUTLBEntry, addr_read)); - /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, addr_reg, datalo, datahi, opc); + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + int reg64; - /* b,pt,n label1 */ - label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) - | (1 << 29) | (1 << 19))); + /* bne,pn %[xi]cc, label0 */ + label_ptr[0] = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1) + | ((TARGET_LONG_BITS == 64) << 21))); + + /* TLB Hit. */ + /* Load all 64-bits into an O/G register. */ + reg64 = (datalo < 16 ? datalo : TCG_REG_O0); + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]); + + /* Move the two 32-bit pieces into the destination registers. */ + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); + if (reg64 != datalo) { + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); + } + + /* b,pt,n label1 */ + label_ptr[1] = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) + | (1 << 29) | (1 << 19))); + } else { + /* The fast path is exactly one insn. Thus we can perform the + entire TLB Hit in the (annulled) delay slot of the branch + over the TLB Miss case. */ + + /* beq,a,pt %[xi]cc, label0 */ + label_ptr[0] = NULL; + label_ptr[1] = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) + | ((TARGET_LONG_BITS == 64) << 21) + | (1 << 29) | (1 << 19))); + /* delay slot */ + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]); + } /* TLB Miss. */ - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[0]); - n = 0; -#ifdef CONFIG_TCG_PASS_AREG0 - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0); -#endif + if (label_ptr[0]) { + *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - + (unsigned long)label_ptr[0]); + } + n = ARG_OFFSET; + if (ARG_OFFSET) { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + } if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], args[addrlo_idx + 1]); @@ -971,7 +916,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) n = tcg_target_call_oarg_regs[0]; /* datalo = sign_extend(arg0) */ - switch(opc) { + switch (sizeop) { case 0 | 4: /* Recall that SRA sign extends from bit 31 through bit 63. */ tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL); @@ -1008,75 +953,31 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL); addr_reg = TCG_REG_I5; } - tcg_out_qemu_ld_direct(s, addr_reg, datalo, datahi, opc); -#endif /* CONFIG_SOFTMMU */ -} + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + int reg64 = (datalo < 16 ? datalo : TCG_REG_O0); -static void tcg_out_qemu_st_direct(TCGContext *s, int addr, int datalo, - int datahi, int sizeop) -{ -#ifdef TARGET_WORDS_BIGENDIAN - const int bigendian = 1; -#else - const int bigendian = 0; -#endif - switch (sizeop) { - case 0: - /* stb datalo, [addr] */ - tcg_out_ldst(s, datalo, addr, 0, STB); - break; - case 1: - if (bigendian) { - /* sth datalo, [addr] */ - tcg_out_ldst(s, datalo, addr, 0, STH); - } else { - /* stha datalo, [addr] ASI_PRIMARY_LITTLE */ - tcg_out_ldst_asi(s, datalo, addr, 0, STHA, ASI_PRIMARY_LITTLE); - } - break; - case 2: - if (bigendian) { - /* stw datalo, [addr] */ - tcg_out_ldst(s, datalo, addr, 0, STW); - } else { - /* stwa datalo, [addr] ASI_PRIMARY_LITTLE */ - tcg_out_ldst_asi(s, datalo, addr, 0, STWA, ASI_PRIMARY_LITTLE); - } - break; - case 3: - if (TCG_TARGET_REG_BITS == 64) { - if (bigendian) { - /* stx datalo, [addr] */ - tcg_out_ldst(s, datalo, addr, 0, STX); - } else { - /* stxa datalo, [addr] ASI_PRIMARY_LITTLE */ - tcg_out_ldst_asi(s, datalo, addr, 0, STXA, ASI_PRIMARY_LITTLE); - } - } else { - if (bigendian) { - tcg_out_ldst(s, datahi, addr, 0, STW); - tcg_out_ldst(s, datalo, addr, 4, STW); - } else { - tcg_out_ldst_asi(s, datalo, addr, 0, STWA, ASI_PRIMARY_LITTLE); - tcg_out_ldst_asi(s, datahi, addr, 4, STWA, ASI_PRIMARY_LITTLE); - } + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]); + + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); + if (reg64 != datalo) { + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); } - break; - default: - tcg_abort(); + } else { + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]); } +#endif /* CONFIG_SOFTMMU */ } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop) { int addrlo_idx = 1, datalo, datahi, addr_reg; #if defined(CONFIG_SOFTMMU) int memi_idx, memi, n; - uint32_t *label_ptr[2]; + uint32_t *label_ptr; #endif datahi = datalo = args[0]; - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { datahi = args[1]; addrlo_idx = 2; } @@ -1085,33 +986,40 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS); memi = args[memi_idx]; - addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, opc, args, - label_ptr, offsetof(CPUTLBEntry, addr_write)); + addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args, + offsetof(CPUTLBEntry, addr_write)); - /* TLB Hit. */ - tcg_out_qemu_st_direct(s, addr_reg, datalo, datahi, opc); + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */ + /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */ + tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL); + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); + tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR); + datalo = TCG_REG_G1; + } - /* b,pt,n label1 */ - label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) + /* The fast path is exactly one insn. Thus we can perform the entire + TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ + /* beq,a,pt %[xi]cc, label0 */ + label_ptr = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) + | ((TARGET_LONG_BITS == 64) << 21) | (1 << 29) | (1 << 19))); + /* delay slot */ + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]); /* TLB Miss. */ - - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[0]); - - n = 0; -#ifdef CONFIG_TCG_PASS_AREG0 - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0); -#endif + n = ARG_OFFSET; + if (ARG_OFFSET) { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + } if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], args[addrlo_idx + 1]); } tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], args[addrlo_idx]); - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi); } tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo); @@ -1123,7 +1031,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) sizeof(long)); /* qemu_st_helper[s_bits](arg0, arg1, arg2) */ - tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[opc] + tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop] - (tcg_target_ulong)s->code_ptr) >> 2) & 0x3fffffff)); /* delay slot */ @@ -1134,15 +1042,23 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE - sizeof(long)); - *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[1]); + *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr - + (unsigned long)label_ptr); #else addr_reg = args[addrlo_idx]; if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL); addr_reg = TCG_REG_I5; } - tcg_out_qemu_st_direct(s, addr_reg, datalo, datahi, opc); + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */ + /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */ + tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL); + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); + tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR); + datalo = TCG_REG_G1; + } + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_st_opc[sizeop]); #endif /* CONFIG_SOFTMMU */ }
Given that we have an opcode for all sizes, all endianness, turn the functions into a simple table lookup. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/sparc/tcg-target.c | 384 +++++++++++++++++++----------------------------- 1 files changed, 150 insertions(+), 234 deletions(-)