From patchwork Wed Mar 28 00:32:14 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 149057 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 2D8A0B6F98 for ; Wed, 28 Mar 2012 11:43:00 +1100 (EST) Received: from localhost ([::1]:50895 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SCgpa-00080c-Go for incoming@patchwork.ozlabs.org; Tue, 27 Mar 2012 20:33:42 -0400 Received: from eggs.gnu.org ([208.118.235.92]:32981) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SCgoc-0006If-VM for qemu-devel@nongnu.org; Tue, 27 Mar 2012 20:32:45 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1SCgoY-0008Sq-OY for qemu-devel@nongnu.org; Tue, 27 Mar 2012 20:32:42 -0400 Received: from mail-gx0-f173.google.com ([209.85.161.173]:64818) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SCgoY-0008SX-Fx for qemu-devel@nongnu.org; Tue, 27 Mar 2012 20:32:38 -0400 Received: by ggnp2 with SMTP id p2so520341ggn.4 for ; Tue, 27 Mar 2012 17:32:36 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=sender:from:to:cc:subject:date:message-id:x-mailer:in-reply-to :references; bh=02BNuemoLD8renuN5dd8ShA90olKpYr293MDTDx5E4w=; b=wX67zdYv9zbJ+h4jzZqfqRyEPVNipPL7JI0ovtxnVYd/HbyvBARxvwyjjmXAFD65ld mXQuhKRDEnVSQEu0H85+2P2vjl+bHgX1P54XooA6XkoXOKQt0p6/aV7hVj4J/6yP3jBr eEvKjgCaHWdFR+PODJnTfQhyHLeN4kE5vl7VWCeoAJWqt0cRyr9k7wypvYniclrFNS+v YJpikMg+8j5ReE0N2VV8HwwQN+GmKxXedwfMNWz37Xx46r0CouTDrtitEQTrU6bWcCSy MkQ0XwNrsnT0NPQ7ulrZmwbW+92NR1sUoDQTEVOo+GfxCcUXN2AkHtnB7UrflLJkNpT9 hHhw== Received: by 10.68.237.1 with SMTP id uy1mr2885771pbc.99.1332894756385; Tue, 27 Mar 2012 17:32:36 -0700 (PDT) Received: from anchor.twiddle.home.com ([173.160.232.49]) by mx.google.com with ESMTPS id f5sm1418749pbe.26.2012.03.27.17.32.35 (version=TLSv1/SSLv3 cipher=OTHER); Tue, 27 Mar 2012 17:32:35 -0700 (PDT) From: Richard Henderson To: qemu-devel@nongnu.org Date: Tue, 27 Mar 2012 17:32:14 -0700 Message-Id: <1332894743-27418-6-git-send-email-rth@twiddle.net> X-Mailer: git-send-email 1.7.7.6 In-Reply-To: <1332894743-27418-1-git-send-email-rth@twiddle.net> References: <1332894743-27418-1-git-send-email-rth@twiddle.net> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 209.85.161.173 Cc: blauwirbel@gmail.com Subject: [Qemu-devel] [PATCH 05/14] tcg-sparc: Simplify qemu_ld/st direct memory paths. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Given that we have an opcode for all sizes, all endianness, turn the functions into a simple table lookup. Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c | 384 +++++++++++++++++++----------------------------- 1 files changed, 150 insertions(+), 234 deletions(-) diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index c74fc2c..5cea5a8 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -294,6 +294,16 @@ static inline int tcg_target_const_match(tcg_target_long val, #define ASI_PRIMARY_LITTLE 0x88 #endif +#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE)) + +#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) + static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2, int op) { @@ -366,66 +376,46 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, } } -static inline void tcg_out_ld_raw(TCGContext *s, int ret, - tcg_target_long arg) +static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1, + int a2, int op) { - tcg_out_sethi(s, ret, arg); - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) | - INSN_IMM13(arg & 0x3ff)); + tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); } -static inline void tcg_out_ld_ptr(TCGContext *s, int ret, - tcg_target_long arg) +static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, + int offset, int op) { - if (!check_fit_tl(arg, 10)) - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL); - if (TCG_TARGET_REG_BITS == 64) { - tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) | - INSN_IMM13(arg & 0x3ff)); - } else { - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) | - INSN_IMM13(arg & 0x3ff)); - } -} - -static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op) -{ - if (check_fit_tl(offset, 13)) + if (check_fit_tl(offset, 13)) { tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | INSN_IMM13(offset)); - else { + } else { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset); - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) | - INSN_RS2(addr)); + tcg_out_ldst_rr(s, ret, addr, TCG_REG_I5, op); } } -static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr, - int offset, int op, int asi) -{ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset); - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) | - INSN_ASI(asi) | INSN_RS2(addr)); -} - static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, tcg_target_long arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst(s, ret, arg1, arg2, LDUW); - else - tcg_out_ldst(s, ret, arg1, arg2, LDX); + tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX)); } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, tcg_target_long arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst(s, arg, arg1, arg2, STW); - else - tcg_out_ldst(s, arg, arg1, arg2, STX); + tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); +} + +static inline void tcg_out_ld_ptr(TCGContext *s, int ret, + tcg_target_long arg) +{ + if (!check_fit_tl(arg, 10)) { + tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); + } + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); } + static inline void tcg_out_sety(TCGContext *s, int rs) { tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); @@ -757,22 +747,16 @@ static const void * const qemu_st_helpers[4] = { WHICH is the offset into the CPUTLBEntry structure of the slot to read. This should be offsetof addr_read or addr_write. - Outputs: - LABEL_PTRS is filled with the position of the forward jumps to the - TLB miss case. This will always be a ,PN insn, so a 19-bit offset. - - Returns a register loaded with the low part of the address, adjusted - as indicated by the TLB and so is a host address. Undefined in the - TLB miss case. */ + The result of the TLB comparison is in %[ix]cc. The sanitized address + is in the returned register, maybe %o0. The TLB addend is in %o1. */ static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index, - int s_bits, const TCGArg *args, - uint32_t **label_ptr, int which) + int s_bits, const TCGArg *args, int which) { const int addrlo = args[addrlo_idx]; - const int r0 = tcg_target_call_iarg_regs[0]; - const int r1 = tcg_target_call_iarg_regs[1]; - const int r2 = tcg_target_call_iarg_regs[2]; + const int r0 = TCG_REG_O0; + const int r1 = TCG_REG_O1; + const int r2 = TCG_REG_O2; int addr = addrlo; int tlb_ofs; @@ -803,110 +787,39 @@ static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index, tlb_ofs = 0; } - /* ld [arg1 + which], arg2 */ + /* Load the tlb comparator and the addend. */ tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which); + tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); /* subcc arg0, arg2, %g0 */ tcg_out_cmp(s, r0, r2, 0); - /* bne,pn %[ix]cc, label0 */ - *label_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1) | - ((TARGET_LONG_BITS == 64) << 21))); - - /* TLB Hit. Compute the host address into r1. The ld is in the - branch delay slot; harmless for the TLB miss case. */ - tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); - + /* If the guest address must be zero-extended, do so now. */ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); - tcg_out_arith(s, r1, r0, r1, ARITH_ADD); - } else { - tcg_out_arith(s, r1, addrlo, r1, ARITH_ADD); + return r0; } - - return r1; + return addrlo; } #endif /* CONFIG_SOFTMMU */ -static void tcg_out_qemu_ld_direct(TCGContext *s, int addr, int datalo, - int datahi, int sizeop) -{ +static const int qemu_ld_opc[8] = { #ifdef TARGET_WORDS_BIGENDIAN - const int bigendian = 1; + LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX #else - const int bigendian = 0; + LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE #endif - switch (sizeop) { - case 0: - /* ldub [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDUB); - break; - case 0 | 4: - /* ldsb [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDSB); - break; - case 1: - if (bigendian) { - /* lduh [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDUH); - } else { - /* lduha [addr] ASI_PRIMARY_LITTLE, datalo */ - tcg_out_ldst_asi(s, datalo, addr, 0, LDUHA, ASI_PRIMARY_LITTLE); - } - break; - case 1 | 4: - if (bigendian) { - /* ldsh [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDSH); - } else { - /* ldsha [addr] ASI_PRIMARY_LITTLE, datalo */ - tcg_out_ldst_asi(s, datalo, addr, 0, LDSHA, ASI_PRIMARY_LITTLE); - } - break; - case 2: - if (bigendian) { - /* lduw [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDUW); - } else { - /* lduwa [addr] ASI_PRIMARY_LITTLE, datalo */ - tcg_out_ldst_asi(s, datalo, addr, 0, LDUWA, ASI_PRIMARY_LITTLE); - } - break; - case 2 | 4: - if (bigendian) { - /* ldsw [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDSW); - } else { - /* ldswa [addr] ASI_PRIMARY_LITTLE, datalo */ - tcg_out_ldst_asi(s, datalo, addr, 0, LDSWA, ASI_PRIMARY_LITTLE); - } - break; - case 3: - if (TCG_TARGET_REG_BITS == 64) { - if (bigendian) { - /* ldx [addr], datalo */ - tcg_out_ldst(s, datalo, addr, 0, LDX); - } else { - /* ldxa [addr] ASI_PRIMARY_LITTLE, datalo */ - tcg_out_ldst_asi(s, datalo, addr, 0, LDXA, ASI_PRIMARY_LITTLE); - } - } else { - if (bigendian) { - tcg_out_ldst(s, datahi, addr, 0, LDUW); - tcg_out_ldst(s, datalo, addr, 4, LDUW); - } else { - tcg_out_ldst_asi(s, datalo, addr, 0, LDUWA, ASI_PRIMARY_LITTLE); - tcg_out_ldst_asi(s, datahi, addr, 4, LDUWA, ASI_PRIMARY_LITTLE); - } - } - break; - default: - tcg_abort(); - } -} +}; -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +static const int qemu_st_opc[4] = { +#ifdef TARGET_WORDS_BIGENDIAN + STB, STH, STW, STX +#else + STB, STH_LE, STW_LE, STX_LE +#endif +}; + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop) { int addrlo_idx = 1, datalo, datahi, addr_reg; #if defined(CONFIG_SOFTMMU) @@ -915,7 +828,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #endif datahi = datalo = args[0]; - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { datahi = args[1]; addrlo_idx = 2; } @@ -923,27 +836,59 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #if defined(CONFIG_SOFTMMU) memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS); memi = args[memi_idx]; - s_bits = opc & 3; + s_bits = sizeop & 3; addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args, - label_ptr, offsetof(CPUTLBEntry, addr_read)); + offsetof(CPUTLBEntry, addr_read)); - /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, addr_reg, datalo, datahi, opc); + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + int reg64; - /* b,pt,n label1 */ - label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) - | (1 << 29) | (1 << 19))); + /* bne,pn %[xi]cc, label0 */ + label_ptr[0] = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1) + | ((TARGET_LONG_BITS == 64) << 21))); + + /* TLB Hit. */ + /* Load all 64-bits into an O/G register. */ + reg64 = (datalo < 16 ? datalo : TCG_REG_O0); + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]); + + /* Move the two 32-bit pieces into the destination registers. */ + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); + if (reg64 != datalo) { + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); + } + + /* b,pt,n label1 */ + label_ptr[1] = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) + | (1 << 29) | (1 << 19))); + } else { + /* The fast path is exactly one insn. Thus we can perform the + entire TLB Hit in the (annulled) delay slot of the branch + over the TLB Miss case. */ + + /* beq,a,pt %[xi]cc, label0 */ + label_ptr[0] = NULL; + label_ptr[1] = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) + | ((TARGET_LONG_BITS == 64) << 21) + | (1 << 29) | (1 << 19))); + /* delay slot */ + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]); + } /* TLB Miss. */ - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[0]); - n = 0; -#ifdef CONFIG_TCG_PASS_AREG0 - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0); -#endif + if (label_ptr[0]) { + *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - + (unsigned long)label_ptr[0]); + } + n = ARG_OFFSET; + if (ARG_OFFSET) { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + } if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], args[addrlo_idx + 1]); @@ -971,7 +916,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) n = tcg_target_call_oarg_regs[0]; /* datalo = sign_extend(arg0) */ - switch(opc) { + switch (sizeop) { case 0 | 4: /* Recall that SRA sign extends from bit 31 through bit 63. */ tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL); @@ -1008,75 +953,31 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL); addr_reg = TCG_REG_I5; } - tcg_out_qemu_ld_direct(s, addr_reg, datalo, datahi, opc); -#endif /* CONFIG_SOFTMMU */ -} + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + int reg64 = (datalo < 16 ? datalo : TCG_REG_O0); -static void tcg_out_qemu_st_direct(TCGContext *s, int addr, int datalo, - int datahi, int sizeop) -{ -#ifdef TARGET_WORDS_BIGENDIAN - const int bigendian = 1; -#else - const int bigendian = 0; -#endif - switch (sizeop) { - case 0: - /* stb datalo, [addr] */ - tcg_out_ldst(s, datalo, addr, 0, STB); - break; - case 1: - if (bigendian) { - /* sth datalo, [addr] */ - tcg_out_ldst(s, datalo, addr, 0, STH); - } else { - /* stha datalo, [addr] ASI_PRIMARY_LITTLE */ - tcg_out_ldst_asi(s, datalo, addr, 0, STHA, ASI_PRIMARY_LITTLE); - } - break; - case 2: - if (bigendian) { - /* stw datalo, [addr] */ - tcg_out_ldst(s, datalo, addr, 0, STW); - } else { - /* stwa datalo, [addr] ASI_PRIMARY_LITTLE */ - tcg_out_ldst_asi(s, datalo, addr, 0, STWA, ASI_PRIMARY_LITTLE); - } - break; - case 3: - if (TCG_TARGET_REG_BITS == 64) { - if (bigendian) { - /* stx datalo, [addr] */ - tcg_out_ldst(s, datalo, addr, 0, STX); - } else { - /* stxa datalo, [addr] ASI_PRIMARY_LITTLE */ - tcg_out_ldst_asi(s, datalo, addr, 0, STXA, ASI_PRIMARY_LITTLE); - } - } else { - if (bigendian) { - tcg_out_ldst(s, datahi, addr, 0, STW); - tcg_out_ldst(s, datalo, addr, 4, STW); - } else { - tcg_out_ldst_asi(s, datalo, addr, 0, STWA, ASI_PRIMARY_LITTLE); - tcg_out_ldst_asi(s, datahi, addr, 4, STWA, ASI_PRIMARY_LITTLE); - } + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]); + + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); + if (reg64 != datalo) { + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); } - break; - default: - tcg_abort(); + } else { + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]); } +#endif /* CONFIG_SOFTMMU */ } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop) { int addrlo_idx = 1, datalo, datahi, addr_reg; #if defined(CONFIG_SOFTMMU) int memi_idx, memi, n; - uint32_t *label_ptr[2]; + uint32_t *label_ptr; #endif datahi = datalo = args[0]; - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { datahi = args[1]; addrlo_idx = 2; } @@ -1085,33 +986,40 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS); memi = args[memi_idx]; - addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, opc, args, - label_ptr, offsetof(CPUTLBEntry, addr_write)); + addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args, + offsetof(CPUTLBEntry, addr_write)); - /* TLB Hit. */ - tcg_out_qemu_st_direct(s, addr_reg, datalo, datahi, opc); + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */ + /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */ + tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL); + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); + tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR); + datalo = TCG_REG_G1; + } - /* b,pt,n label1 */ - label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) + /* The fast path is exactly one insn. Thus we can perform the entire + TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ + /* beq,a,pt %[xi]cc, label0 */ + label_ptr = (uint32_t *)s->code_ptr; + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) + | ((TARGET_LONG_BITS == 64) << 21) | (1 << 29) | (1 << 19))); + /* delay slot */ + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]); /* TLB Miss. */ - - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[0]); - - n = 0; -#ifdef CONFIG_TCG_PASS_AREG0 - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0); -#endif + n = ARG_OFFSET; + if (ARG_OFFSET) { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + } if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], args[addrlo_idx + 1]); } tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], args[addrlo_idx]); - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi); } tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo); @@ -1123,7 +1031,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) sizeof(long)); /* qemu_st_helper[s_bits](arg0, arg1, arg2) */ - tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[opc] + tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop] - (tcg_target_ulong)s->code_ptr) >> 2) & 0x3fffffff)); /* delay slot */ @@ -1134,15 +1042,23 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE - sizeof(long)); - *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[1]); + *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr - + (unsigned long)label_ptr); #else addr_reg = args[addrlo_idx]; if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL); addr_reg = TCG_REG_I5; } - tcg_out_qemu_st_direct(s, addr_reg, datalo, datahi, opc); + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { + /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */ + /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */ + tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL); + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); + tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR); + datalo = TCG_REG_G1; + } + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_st_opc[sizeop]); #endif /* CONFIG_SOFTMMU */ }