From patchwork Sun Dec 22 11:50:36 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Aurelien Jarno X-Patchwork-Id: 304463 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id F21312C0078 for ; Sun, 22 Dec 2013 23:21:39 +1100 (EST) Received: from localhost ([::1]:57504 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Vui2G-0007Y0-68 for incoming@patchwork.ozlabs.org; Sun, 22 Dec 2013 07:21:32 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:42436) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Vui1w-0007Xr-W9 for qemu-devel@nongnu.org; Sun, 22 Dec 2013 07:21:18 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Vui1r-0005Jq-BT for qemu-devel@nongnu.org; Sun, 22 Dec 2013 07:21:12 -0500 Received: from hall.aurel32.net ([2001:bc8:30d7:101::1]:50536) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Vui1r-0005Jm-1n for qemu-devel@nongnu.org; Sun, 22 Dec 2013 07:21:07 -0500 Received: from [37.162.107.224] (helo=ohm.rr44.fr) by hall.aurel32.net with esmtpsa (TLS1.2:DHE_RSA_AES_128_CBC_SHA1:128) (Exim 4.80) (envelope-from ) id 1Vui1o-0004E2-05; Sun, 22 Dec 2013 13:21:04 +0100 Received: from aurel32 by ohm.rr44.fr with local (Exim 4.80) (envelope-from ) id 1VuhYO-0002XQ-3e; Sun, 22 Dec 2013 12:50:40 +0100 From: Aurelien Jarno To: qemu-devel@nongnu.org Date: Sun, 22 Dec 2013 12:50:36 +0100 Message-Id: <1387713039-9584-7-git-send-email-aurelien@aurel32.net> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1387713039-9584-1-git-send-email-aurelien@aurel32.net> References: <1387713039-9584-1-git-send-email-aurelien@aurel32.net> X-detected-operating-system: by eggs.gnu.org: Error: Malformed IPv6 address (bad octet value). X-Received-From: 2001:bc8:30d7:101::1 Cc: Aurelien Jarno Subject: [Qemu-devel] [PATCH v2 6/9] target-sh4: split out Q and M from of SR and optimize div1 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Splitting Q and M out of SR, it's possible to optimize div1 by using TCG code instead of an helper. Signed-off-by: Aurelien Jarno --- target-sh4/cpu.h | 13 ++++-- target-sh4/helper.h | 1 - target-sh4/op_helper.c | 118 ------------------------------------------------ target-sh4/translate.c | 67 +++++++++++++++++++++++---- 4 files changed, 68 insertions(+), 131 deletions(-) diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h index b7dd7ab..82221c8 100644 --- a/target-sh4/cpu.h +++ b/target-sh4/cpu.h @@ -140,6 +140,8 @@ typedef struct CPUSH4State { uint32_t gregs[24]; /* general registers */ float32 fregs[32]; /* floating point registers */ uint32_t sr; /* status register (with T split out) */ + uint32_t sr_m; /* M bit of status register */ + uint32_t sr_q; /* Q bit of status register */ uint32_t sr_t; /* T bit of status register */ uint32_t ssr; /* saved status register */ uint32_t spc; /* saved program counter */ @@ -342,13 +344,18 @@ static inline int cpu_ptel_pr (uint32_t ptel) static inline target_ulong cpu_read_sr(CPUSH4State *env) { - return (env->sr & ~(1u << SR_T)) | (env->sr_t << SR_T); + return (env->sr & ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T))) | + (env->sr_m << SR_M) | + (env->sr_q << SR_Q) | + (env->sr_t << SR_T); } static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr) { - env->sr_t = sr & (1u << SR_T); - env->sr = sr & ~(1u << SR_T); + env->sr_m = (sr >> SR_M) & 1; + env->sr_q = (sr >> SR_Q) & 1; + env->sr_t = (sr >> SR_T) & 1; + env->sr = sr & ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T)); } static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, diff --git a/target-sh4/helper.h b/target-sh4/helper.h index 7162448..fbbe264 100644 --- a/target-sh4/helper.h +++ b/target-sh4/helper.h @@ -13,7 +13,6 @@ DEF_HELPER_3(movcal, void, env, i32, i32) DEF_HELPER_1(discard_movcal_backup, void, env) DEF_HELPER_2(ocbi, void, env, i32) -DEF_HELPER_3(div1, i32, env, i32, i32) DEF_HELPER_3(macl, void, env, i32, i32) DEF_HELPER_3(macw, void, env, i32, i32) diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c index 0e881a8..3ed0e2d 100644 --- a/target-sh4/op_helper.c +++ b/target-sh4/op_helper.c @@ -166,124 +166,6 @@ void helper_ocbi(CPUSH4State *env, uint32_t address) } } -#define T (env->sr_t) -#define Q (env->sr & (1u << SR_Q) ? 1 : 0) -#define M (env->sr & (1u << SR_M) ? 1 : 0) -#define SETT (env->sr_t = 1) -#define CLRT (env->sr_t = 0) -#define SETQ (env->sr |= (1u << SR_Q)) -#define CLRQ (env->sr &= ~(1u << SR_Q)) -#define SETM (env->sr |= (1u << SR_M)) -#define CLRM (env->sr &= ~(1u << SR_M)) - -uint32_t helper_div1(CPUSH4State *env, uint32_t arg0, uint32_t arg1) -{ - uint32_t tmp0, tmp2; - uint8_t old_q, tmp1 = 0xff; - - //printf("div1 arg0=0x%08x arg1=0x%08x M=%d Q=%d T=%d\n", arg0, arg1, M, Q, T); - old_q = Q; - if ((0x80000000 & arg1) != 0) - SETQ; - else - CLRQ; - tmp2 = arg0; - arg1 <<= 1; - arg1 |= T; - switch (old_q) { - case 0: - switch (M) { - case 0: - tmp0 = arg1; - arg1 -= tmp2; - tmp1 = arg1 > tmp0; - switch (Q) { - case 0: - if (tmp1) - SETQ; - else - CLRQ; - break; - case 1: - if (tmp1 == 0) - SETQ; - else - CLRQ; - break; - } - break; - case 1: - tmp0 = arg1; - arg1 += tmp2; - tmp1 = arg1 < tmp0; - switch (Q) { - case 0: - if (tmp1 == 0) - SETQ; - else - CLRQ; - break; - case 1: - if (tmp1) - SETQ; - else - CLRQ; - break; - } - break; - } - break; - case 1: - switch (M) { - case 0: - tmp0 = arg1; - arg1 += tmp2; - tmp1 = arg1 < tmp0; - switch (Q) { - case 0: - if (tmp1) - SETQ; - else - CLRQ; - break; - case 1: - if (tmp1 == 0) - SETQ; - else - CLRQ; - break; - } - break; - case 1: - tmp0 = arg1; - arg1 -= tmp2; - tmp1 = arg1 > tmp0; - switch (Q) { - case 0: - if (tmp1 == 0) - SETQ; - else - CLRQ; - break; - case 1: - if (tmp1) - SETQ; - else - CLRQ; - break; - } - break; - } - break; - } - if (Q == M) - SETT; - else - CLRT; - //printf("Output: arg1=0x%08x M=%d Q=%d T=%d\n", arg1, M, Q, T); - return arg1; -} - void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1) { int64_t res; diff --git a/target-sh4/translate.c b/target-sh4/translate.c index 4ef0398..d4046f8 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -59,7 +59,8 @@ enum { /* global register indexes */ static TCGv_ptr cpu_env; static TCGv cpu_gregs[24]; -static TCGv cpu_pc, cpu_sr, cpu_sr_t, cpu_ssr, cpu_spc, cpu_gbr; +static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t; +static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr; static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl; static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst; static TCGv cpu_fregs[32]; @@ -107,6 +108,10 @@ void sh4_translate_init(void) offsetof(CPUSH4State, pc), "PC"); cpu_sr = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUSH4State, sr), "SR"); + cpu_sr_m = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUSH4State, sr_m), "SR_M"); + cpu_sr_q = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUSH4State, sr_q), "SR_Q"); cpu_sr_t = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUSH4State, sr_t), "SR_T"); cpu_ssr = tcg_global_mem_new_i32(TCG_AREG0, @@ -175,14 +180,28 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, } static void gen_read_sr(TCGv dst) { - tcg_gen_andi_i32(dst, cpu_sr, ~(1u << SR_T)); - tcg_gen_or_i32(dst, dst, cpu_sr_t); + TCGv t0 = tcg_temp_new(); + tcg_gen_andi_i32(dst, cpu_sr, + ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T))); + tcg_gen_shli_i32(t0, cpu_sr_q, SR_Q); + tcg_gen_or_i32(dst, dst, t0); + tcg_gen_shli_i32(t0, cpu_sr_m, SR_M); + tcg_gen_or_i32(dst, dst, t0); + tcg_gen_shli_i32(t0, cpu_sr_t, SR_T); + tcg_gen_or_i32(dst, dst, t0); + tcg_temp_free_i32(t0); } static void gen_write_sr(TCGv src) { - tcg_gen_andi_i32(cpu_sr, src, ~(1u << SR_T)); - tcg_gen_andi_i32(cpu_sr_t, src, (1u << SR_T)); + tcg_gen_andi_i32(cpu_sr, src, + ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T))); + tcg_gen_shri_i32(cpu_sr_q, src, SR_Q); + tcg_gen_andi_i32(cpu_sr_q, cpu_sr_q, 1); + tcg_gen_shri_i32(cpu_sr_m, src, SR_M); + tcg_gen_andi_i32(cpu_sr_m, cpu_sr_m, 1); + tcg_gen_shri_i32(cpu_sr_t, src, SR_T); + tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1); } static void gen_goto_tb(DisasContext * ctx, int n, target_ulong dest) @@ -389,7 +408,8 @@ static void _decode_opc(DisasContext * ctx) switch (ctx->opcode) { case 0x0019: /* div0u */ - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~((1u << SR_M) | (1u << SR_Q))); + tcg_gen_movi_i32(cpu_sr_m, 0); + tcg_gen_movi_i32(cpu_sr_q, 0); tcg_gen_movi_i32(cpu_sr_t, 0); return; case 0x000b: /* rts */ @@ -708,8 +728,8 @@ static void _decode_opc(DisasContext * ctx) return; case 0x2007: /* div0s Rm,Rn */ { - gen_copy_bit_i32(cpu_sr, SR_Q, REG(B11_8), 31); /* SR_Q */ - gen_copy_bit_i32(cpu_sr, SR_M, REG(B7_4), 31); /* SR_M */ + tcg_gen_shri_i32(cpu_sr_q, REG(B11_8), 31); /* SR_Q */ + tcg_gen_mov_i32(cpu_sr_m, cpu_sr_q); /* SR_M */ TCGv val = tcg_temp_new(); tcg_gen_xor_i32(cpu_sr_t, REG(B7_4), REG(B11_8)); tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31); /* SR_T */ @@ -717,7 +737,36 @@ static void _decode_opc(DisasContext * ctx) } return; case 0x3004: /* div1 Rm,Rn */ - gen_helper_div1(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8)); + { + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv zero = tcg_const_i32(0); + + /* shift left arg1, saving the bit being pushed out and inserting + T on the right */ + tcg_gen_shri_i32(t0, REG(B11_8), 31); + tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1); + tcg_gen_or_i32(REG(B11_8), REG(B11_8), cpu_sr_t); + + /* add or subtract arg0 from arg1 depending if Q == M */ + tcg_gen_xor_i32(t1, cpu_sr_q, cpu_sr_m); + tcg_gen_subi_i32(t1, t1, 1); + tcg_gen_neg_i32(t2, REG(B7_4)); + tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, zero, REG(B7_4), t2); + tcg_gen_add2_i32(REG(B11_8), t1, REG(B11_8), zero, t2, t1); + + /* compute T and Q depending on carry */ + tcg_gen_andi_i32(t1, t1, 1); + tcg_gen_xor_i32(t1, t1, t0); + tcg_gen_xori_i32(cpu_sr_t, t1, 1); + tcg_gen_xor_i32(cpu_sr_q, cpu_sr_m, t1); + + tcg_temp_free(zero); + tcg_temp_free(t2); + tcg_temp_free(t1); + tcg_temp_free(t0); + } return; case 0x300d: /* dmuls.l Rm,Rn */ tcg_gen_muls2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));