From patchwork Tue Oct 9 20:32:27 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Aurelien Jarno X-Patchwork-Id: 190447 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 558542C0094 for ; Wed, 10 Oct 2012 08:02:34 +1100 (EST) Received: from localhost ([::1]:58039 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TLgUA-0007MB-FU for incoming@patchwork.ozlabs.org; Tue, 09 Oct 2012 16:33:02 -0400 Received: from eggs.gnu.org ([208.118.235.92]:51445) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TLgTm-0006mc-Jr for qemu-devel@nongnu.org; Tue, 09 Oct 2012 16:32:40 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1TLgTk-0007HK-Mg for qemu-devel@nongnu.org; Tue, 09 Oct 2012 16:32:38 -0400 Received: from hall.aurel32.net ([88.191.126.93]:42160) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TLgTk-0007HA-D9 for qemu-devel@nongnu.org; Tue, 09 Oct 2012 16:32:36 -0400 Received: from [2001:470:d4ed:0:ea11:32ff:fea1:831a] (helo=ohm.aurel32.net) by hall.aurel32.net with esmtpsa (TLS1.0:DHE_RSA_AES_128_CBC_SHA1:16) (Exim 4.72) (envelope-from ) id 1TLgTe-0007DY-Tn; Tue, 09 Oct 2012 22:32:31 +0200 Received: from aurel32 by ohm.aurel32.net with local (Exim 4.80) (envelope-from ) id 1TLgTd-0005u3-BK; Tue, 09 Oct 2012 22:32:29 +0200 From: Aurelien Jarno To: qemu-devel@nongnu.org Date: Tue, 9 Oct 2012 22:32:27 +0200 Message-Id: <1349814748-22552-4-git-send-email-aurelien@aurel32.net> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1349814748-22552-1-git-send-email-aurelien@aurel32.net> References: <1349814748-22552-1-git-send-email-aurelien@aurel32.net> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 3) X-Received-From: 88.191.126.93 Cc: Aurelien Jarno Subject: [Qemu-devel] [PATCH 3/4] tcg/ia64: remove suboptimal register shifting in qemu_ld/st ops X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Remove suboptimal register shifting in qemu_ld/st ops, introduced at the CONFIG_TCG_PASS_AREG0 time. As mem_idx is now loaded in register R58/R59 for the slow path, we have to make sure to do it last, to not add additional register constraints. Signed-off-by: Aurelien Jarno Reviewed-by: Richard Henderson --- tcg/ia64/tcg-target.c | 76 ++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index d4d350f..16edc1b 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -1438,7 +1438,7 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, /* Load and compare a TLB entry, and return the result in (p6, p7). R2 is loaded with the address of the addend TLB entry. - R56 is loaded with the address, zero extented on 32-bit targets. */ + R57 is loaded with the address, zero extented on 32-bit targets. */ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, int s_bits, uint64_t offset_rw, uint64_t offset_addend) @@ -1456,9 +1456,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2, offset_rw, TCG_REG_R2), #if TARGET_LONG_BITS == 32 - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R56, addr_reg), + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg), #else - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R56, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57, 0, addr_reg), #endif tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, @@ -1466,12 +1466,12 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, tcg_out_bundle(s, mII, tcg_opc_m3 (TCG_REG_P0, (TARGET_LONG_BITS == 32 - ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R57, + ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56, TCG_REG_R2, offset_addend - offset_rw), tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R56), + TCG_REG_R3, TCG_REG_R57), tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, - TCG_REG_P7, TCG_REG_R3, TCG_REG_R57)); + TCG_REG_P7, TCG_REG_R3, TCG_REG_R56)); } /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, @@ -1508,8 +1508,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) /* P6 is the fast path, and P7 the slow path */ tcg_out_bundle(s, mLX, - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R57, - mem_index, TCG_REG_R0), + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, + TCG_REG_R56, 0, TCG_AREG0), tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]), tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, (tcg_target_long) qemu_ld_helpers[s_bits])); @@ -1517,7 +1517,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, TCG_REG_R2, 8), tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R56), + TCG_REG_R3, TCG_REG_R57), tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R3, 0)); if (bswap && s_bits == 1) { @@ -1541,23 +1541,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); } - /* XXX/FIXME: suboptimal */ - tcg_out_bundle(s, mII, - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58, - mem_index, TCG_REG_R0), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R57, 0, TCG_REG_R56), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R56, 0, TCG_AREG0)); if (!bswap || s_bits == 0) { tcg_out_bundle(s, miB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58, + mem_index, TCG_REG_R0), tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, TCG_REG_B0, TCG_REG_B6)); } else { tcg_out_bundle(s, miB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58, + mem_index, TCG_REG_R0), tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, TCG_REG_R8, TCG_REG_R8, 0xb), tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, @@ -1609,8 +1603,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) /* P6 is the fast path, and P7 the slow path */ tcg_out_bundle(s, mLX, - tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R57, - 0, data_reg), + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, + TCG_REG_R56, 0, TCG_AREG0), tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]), tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, (tcg_target_long) qemu_st_helpers[opc])); @@ -1618,31 +1612,42 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, TCG_REG_R2, 8), tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R56), + TCG_REG_R3, TCG_REG_R57), tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R3, 0)); if (!bswap || opc == 0) { - tcg_out_bundle(s, mII, + tcg_out_bundle(s, mii, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, + 0, data_reg), tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); } else if (opc == 1) { - tcg_out_bundle(s, mII, + tcg_out_bundle(s, miI, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 15, 15), + TCG_REG_R2, data_reg, 15, 15)); + tcg_out_bundle(s, miI, + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, + 0, data_reg), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, TCG_REG_R2, TCG_REG_R2, 0xb)); data_reg = TCG_REG_R2; } else if (opc == 2) { - tcg_out_bundle(s, mII, + tcg_out_bundle(s, miI, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 31, 31), + TCG_REG_R2, data_reg, 31, 31)); + tcg_out_bundle(s, miI, + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, + 0, data_reg), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, TCG_REG_R2, TCG_REG_R2, 0xb)); data_reg = TCG_REG_R2; @@ -1650,25 +1655,18 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) tcg_out_bundle(s, miI, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, + 0, data_reg), tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, TCG_REG_R2, data_reg, 0xb)); data_reg = TCG_REG_R2; } - /* XXX/FIXME: suboptimal */ - tcg_out_bundle(s, mII, - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59, - mem_index, TCG_REG_R0), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R58, 0, TCG_REG_R57), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R57, 0, TCG_REG_R56)); tcg_out_bundle(s, miB, tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc], data_reg, TCG_REG_R3), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R56, 0, TCG_AREG0), + tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59, + mem_index, TCG_REG_R0), tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, TCG_REG_B0, TCG_REG_B6)); }