From patchwork Thu May 27 20:46:33 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 53830 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id D4EE6B7D1F for ; Fri, 28 May 2010 08:07:10 +1000 (EST) Received: from localhost ([127.0.0.1]:54441 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1OHkq6-00008r-QN for incoming@patchwork.ozlabs.org; Thu, 27 May 2010 17:42:06 -0400 Received: from [140.186.70.92] (port=56391 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1OHjzi-0002Pa-1t for qemu-devel@nongnu.org; Thu, 27 May 2010 16:47:59 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.69) (envelope-from ) id 1OHjzg-0005U4-AI for qemu-devel@nongnu.org; Thu, 27 May 2010 16:47:57 -0400 Received: from are.twiddle.net ([75.149.56.221]:51270) by eggs.gnu.org with esmtp (Exim 4.69) (envelope-from ) id 1OHjzf-0005Tu-S8 for qemu-devel@nongnu.org; Thu, 27 May 2010 16:47:56 -0400 Received: from anchor.twiddle.home (anchor.twiddle.home [172.31.0.4]) by are.twiddle.net (Postfix) with ESMTPS id 4E234576; Thu, 27 May 2010 13:47:55 -0700 (PDT) Received: from anchor.twiddle.home (anchor.twiddle.home [127.0.0.1]) by anchor.twiddle.home (8.14.4/8.14.4) with ESMTP id o4RKlsGL031052; Thu, 27 May 2010 13:47:54 -0700 Received: (from rth@localhost) by anchor.twiddle.home (8.14.4/8.14.4/Submit) id o4RKlsiV031051; Thu, 27 May 2010 13:47:54 -0700 From: Richard Henderson To: qemu-devel@nongnu.org Date: Thu, 27 May 2010 13:46:33 -0700 Message-Id: <1274993204-30766-52-git-send-email-rth@twiddle.net> X-Mailer: git-send-email 1.7.0.1 In-Reply-To: <1274993204-30766-1-git-send-email-rth@twiddle.net> References: <1274993204-30766-1-git-send-email-rth@twiddle.net> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 2) Cc: agraf@suse.de, aurelien@aurel32.net Subject: [Qemu-devel] [PATCH 51/62] tcg-s390: Conditionalize AND IMMEDIATE instructions. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org The 32-bit immediate AND instructions are in the extended-immediate facility. Use these only if present. At the same time, pull the logic to load immediates into registers into a constraint letter for TCG. Signed-off-by: Richard Henderson --- tcg/s390/tcg-target.c | 209 ++++++++++++++++++++++++++++-------------------- 1 files changed, 122 insertions(+), 87 deletions(-) diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 8a7c9ae..359f6d1 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -33,10 +33,11 @@ do { } while (0) #endif -#define TCG_CT_CONST_32 0x100 -#define TCG_CT_CONST_NEG 0x200 -#define TCG_CT_CONST_ADDI 0x400 -#define TCG_CT_CONST_MULI 0x800 +#define TCG_CT_CONST_32 0x0100 +#define TCG_CT_CONST_NEG 0x0200 +#define TCG_CT_CONST_ADDI 0x0400 +#define TCG_CT_CONST_MULI 0x0800 +#define TCG_CT_CONST_ANDI 0x1000 #define TCG_TMP0 TCG_REG_R14 @@ -353,6 +354,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct &= ~TCG_CT_REG; ct->ct |= TCG_CT_CONST_MULI; break; + case 'A': + ct->ct &= ~TCG_CT_REG; + ct->ct |= TCG_CT_CONST_ANDI; + break; default: break; } @@ -362,9 +367,66 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) return 0; } +/* Immediates to be used with logical AND. This is an optimization only, + since a full 64-bit immediate AND can always be performed with 4 sequential + NI[LH][LH] instructions. What we're looking for is immediates that we + can load efficiently, and the immediate load plus the reg-reg AND is + smaller than the sequential NI's. */ + +static int tcg_match_andi(int ct, tcg_target_ulong val) +{ + int i; + + if (facilities & FACILITY_EXT_IMM) { + if (ct & TCG_CT_CONST_32) { + /* All 32-bit ANDs can be performed with 1 48-bit insn. */ + return 1; + } + + /* Zero-extensions. */ + if (val == 0xff || val == 0xffff || val == 0xffffffff) { + return 1; + } + } else { + if (ct & TCG_CT_CONST_32) { + val = (uint32_t)val; + } else if (val == 0xffffffff) { + return 1; + } + } + + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = ~(0xffffull << i*16); + if ((val & mask) == mask) { + return 1; + } + } + + /* Look for 16-bit values performing the mask. These are better + to load with LLI[LH][LH]. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = 0xffffull << i*16; + if ((val & mask) == val) { + return 0; + } + } + + /* Look for 32-bit values performing the 64-bit mask. These + are better to load with LLI[LH]F, or if extended immediates + not available, with a pair of LLI insns. */ + if ((ct & TCG_CT_CONST_32) == 0) { + if (val <= 0xffffffff || (val & 0xffffffff) == 0) { + return 0; + } + } + + return 1; +} + /* Test if a constant matches the constraint. */ -static inline int tcg_target_const_match(tcg_target_long val, - const TCGArgConstraint *arg_ct) +static int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct) { int ct = arg_ct->ct; @@ -401,6 +463,8 @@ static inline int tcg_target_const_match(tcg_target_long val, } else { return val == (int16_t)val; } + } else if (ct & TCG_CT_CONST_ANDI) { + return tcg_match_andi(ct, val); } return 0; @@ -764,37 +828,6 @@ static void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val) } -static void tgen32_andi(TCGContext *s, TCGReg dest, uint32_t val) -{ - /* Zero-th, look for no-op. */ - if (val == -1) { - return; - } - - /* First, look for the zero-extensions. */ - if (val == 0xff) { - tgen_ext8u(s, dest, dest); - return; - } - if (val == 0xffff) { - tgen_ext16u(s, dest, dest); - return; - } - - /* Second, try all 32-bit insns that can perform it in one go. */ - if ((val & 0xffff0000) == 0xffff0000) { - tcg_out_insn(s, RI, NILL, dest, val); - return; - } - if ((val & 0x0000ffff) == 0x0000ffff) { - tcg_out_insn(s, RI, NILH, dest, val >> 16); - return; - } - - /* Lastly, perform the entire operation with a 48-bit insn. */ - tcg_out_insn(s, RIL, NILF, dest, val); -} - static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val) { static const S390Opcode ni_insns[4] = { @@ -806,69 +839,61 @@ static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val) int i; - /* Zero-th, look for no-op. */ + /* Look for no-op. */ if (val == -1) { return; } - /* First, look for the zero-extensions. */ - if (val == 0xff) { - tgen_ext8u(s, dest, dest); - return; - } - if (val == 0xffff) { - tgen_ext16u(s, dest, dest); - return; - } + /* Look for the zero-extensions. */ if (val == 0xffffffff) { tgen_ext32u(s, dest, dest); return; } - /* Second, try all 32-bit insns that can perform it in one go. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = ~(0xffffull << i*16); - if ((val & mask) == mask) { - tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); + if (facilities & FACILITY_EXT_IMM) { + if (val == 0xff) { + tgen_ext8u(s, TCG_TYPE_I64, dest, dest); return; } - } - - /* Third, try all 48-bit insns that can perform it in one go. */ - for (i = 0; i < 2; i++) { - tcg_target_ulong mask = ~(0xffffffffull << i*32); - if ((val & mask) == mask) { - tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); + if (val == 0xffff) { + tgen_ext16u(s, TCG_TYPE_I64, dest, dest); return; } - } - /* Fourth, look for masks that can be loaded with one instruction - into a register. This is slightly smaller than using two 48-bit - masks, as below. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = ~(0xffffull << i*16); - if ((val & mask) == 0) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val); - tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); - return; + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = ~(0xffffull << i*16); + if ((val & mask) == mask) { + tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); + return; + } } - } - for (i = 0; i < 2; i++) { - tcg_target_ulong mask = ~(0xffffffffull << i*32); - if ((val & mask) == 0) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val); - tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); - return; + /* Try all 48-bit insns that can perform it in one go. */ + if (facilities & FACILITY_EXT_IMM) { + for (i = 0; i < 2; i++) { + tcg_target_ulong mask = ~(0xffffffffull << i*32); + if ((val & mask) == mask) { + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); + return; + } + } } - } - /* Last, perform the AND via sequential modifications to the - high and low parts. Do this via recursion to handle 16-bit - vs 32-bit masks in each half. */ - tgen64_andi(s, dest, val | 0xffffffff00000000ull); - tgen64_andi(s, dest, val | 0x00000000ffffffffull); + /* Perform the AND via sequential modifications to the high and low + parts. Do this via recursion to handle 16-bit vs 32-bit masks in + each half. */ + tgen64_andi(s, dest, val | 0xffffffff00000000ull); + tgen64_andi(s, dest, val | 0x00000000ffffffffull); + } else { + /* With no extended-immediate facility, just emit the sequence. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = 0xffffull << i*16; + if ((val & mask) != mask) { + tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); + } + } + } } static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val) @@ -1121,6 +1146,16 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data, } #if defined(CONFIG_SOFTMMU) +static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val) +{ + if (tcg_match_andi(0, val)) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val); + tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); + } else { + tgen64_andi(s, dest, val); + } +} + static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, int mem_index, int opc, uint16_t **label2_ptr_p, int is_store) @@ -1140,8 +1175,8 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, SH64_REG_NONE, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tgen64_andi(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tgen64_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); if (is_store) { ofs = offsetof(CPUState, tlb_table[mem_index][0].addr_write); @@ -1413,7 +1448,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_and_i32: if (const_args[2]) { - tgen32_andi(s, args[0], args[2]); + tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull); } else { tcg_out_insn(s, RR, NR, args[0], args[2]); } @@ -1728,7 +1763,7 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } }, { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, - { INDEX_op_and_i32, { "r", "0", "ri" } }, + { INDEX_op_and_i32, { "r", "0", "rWA" } }, { INDEX_op_or_i32, { "r", "0", "ri" } }, { INDEX_op_xor_i32, { "r", "0", "ri" } }, { INDEX_op_neg_i32, { "r", "r" } }, @@ -1789,7 +1824,7 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } }, { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, - { INDEX_op_and_i64, { "r", "0", "ri" } }, + { INDEX_op_and_i64, { "r", "0", "rA" } }, { INDEX_op_or_i64, { "r", "0", "ri" } }, { INDEX_op_xor_i64, { "r", "0", "ri" } }, { INDEX_op_neg_i64, { "r", "r" } },