From patchwork Fri Apr 1 14:30:34 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Maydell X-Patchwork-Id: 89267 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id AE122B6F93 for ; Sat, 2 Apr 2011 01:38:43 +1100 (EST) Received: from localhost ([127.0.0.1]:39774 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Q5fUW-0005Yk-1I for incoming@patchwork.ozlabs.org; Fri, 01 Apr 2011 10:38:24 -0400 Received: from [140.186.70.92] (port=57212 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Q5fNK-0002mO-7b for qemu-devel@nongnu.org; Fri, 01 Apr 2011 10:30:59 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Q5fNH-0004sr-EZ for qemu-devel@nongnu.org; Fri, 01 Apr 2011 10:30:58 -0400 Received: from mnementh.archaic.org.uk ([81.2.115.146]:45963) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Q5fNG-0004or-W1 for qemu-devel@nongnu.org; Fri, 01 Apr 2011 10:30:55 -0400 Received: from pm215 by mnementh.archaic.org.uk with local (Exim 4.72) (envelope-from ) id 1Q5fN5-0007mV-4Q; Fri, 01 Apr 2011 15:30:43 +0100 From: Peter Maydell To: Anthony Liguori , qemu-devel@nongnu.org Date: Fri, 1 Apr 2011 15:30:34 +0100 Message-Id: <1301668243-29886-2-git-send-email-peter.maydell@linaro.org> X-Mailer: git-send-email 1.7.2.5 In-Reply-To: <1301668243-29886-1-git-send-email-peter.maydell@linaro.org> References: <1301668243-29886-1-git-send-email-peter.maydell@linaro.org> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 2) X-Received-From: 81.2.115.146 Cc: Aurelien Jarno Subject: [Qemu-devel] [PATCH 01/10] target-arm: Make Neon helper routines use correct FP status X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Make the Neon helper routines use the correct FP status from the CPUEnv rather than using a dummy static one. This means they will correctly handle denormals and NaNs and will set FPSCR exception bits properly. Signed-off-by: Peter Maydell --- target-arm/helpers.h | 22 +++++++++++----------- target-arm/neon_helper.c | 21 ++++++++++----------- target-arm/translate.c | 42 ++++++++++++++++++++++-------------------- 3 files changed, 43 insertions(+), 42 deletions(-) diff --git a/target-arm/helpers.h b/target-arm/helpers.h index bd6977c..e2260b6 100644 --- a/target-arm/helpers.h +++ b/target-arm/helpers.h @@ -350,17 +350,17 @@ DEF_HELPER_2(neon_qneg_s8, i32, env, i32) DEF_HELPER_2(neon_qneg_s16, i32, env, i32) DEF_HELPER_2(neon_qneg_s32, i32, env, i32) -DEF_HELPER_2(neon_min_f32, i32, i32, i32) -DEF_HELPER_2(neon_max_f32, i32, i32, i32) -DEF_HELPER_2(neon_abd_f32, i32, i32, i32) -DEF_HELPER_2(neon_add_f32, i32, i32, i32) -DEF_HELPER_2(neon_sub_f32, i32, i32, i32) -DEF_HELPER_2(neon_mul_f32, i32, i32, i32) -DEF_HELPER_2(neon_ceq_f32, i32, i32, i32) -DEF_HELPER_2(neon_cge_f32, i32, i32, i32) -DEF_HELPER_2(neon_cgt_f32, i32, i32, i32) -DEF_HELPER_2(neon_acge_f32, i32, i32, i32) -DEF_HELPER_2(neon_acgt_f32, i32, i32, i32) +DEF_HELPER_3(neon_min_f32, i32, env, i32, i32) +DEF_HELPER_3(neon_max_f32, i32, env, i32, i32) +DEF_HELPER_3(neon_abd_f32, i32, env, i32, i32) +DEF_HELPER_3(neon_add_f32, i32, env, i32, i32) +DEF_HELPER_3(neon_sub_f32, i32, env, i32, i32) +DEF_HELPER_3(neon_mul_f32, i32, env, i32, i32) +DEF_HELPER_3(neon_ceq_f32, i32, env, i32, i32) +DEF_HELPER_3(neon_cge_f32, i32, env, i32, i32) +DEF_HELPER_3(neon_cgt_f32, i32, env, i32, i32) +DEF_HELPER_3(neon_acge_f32, i32, env, i32, i32) +DEF_HELPER_3(neon_acgt_f32, i32, env, i32, i32) /* iwmmxt_helper.c */ DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64) diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index 002a9c1..97bc1e6 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -18,8 +18,7 @@ #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q -static float_status neon_float_status; -#define NFS &neon_float_status +#define NFS (&env->vfp.standard_fp_status) /* Helper routines to perform bitwise copies between float and int. */ static inline float32 vfp_itos(uint32_t i) @@ -1794,21 +1793,21 @@ uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x) } /* NEON Float helpers. */ -uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b) +uint32_t HELPER(neon_min_f32)(CPUState *env, uint32_t a, uint32_t b) { float32 f0 = vfp_itos(a); float32 f1 = vfp_itos(b); return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b; } -uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b) +uint32_t HELPER(neon_max_f32)(CPUState *env, uint32_t a, uint32_t b) { float32 f0 = vfp_itos(a); float32 f1 = vfp_itos(b); return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b; } -uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) +uint32_t HELPER(neon_abd_f32)(CPUState *env, uint32_t a, uint32_t b) { float32 f0 = vfp_itos(a); float32 f1 = vfp_itos(b); @@ -1817,24 +1816,24 @@ uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) : float32_sub(f1, f0, NFS)); } -uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b) +uint32_t HELPER(neon_add_f32)(CPUState *env, uint32_t a, uint32_t b) { return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS)); } -uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b) +uint32_t HELPER(neon_sub_f32)(CPUState *env, uint32_t a, uint32_t b) { return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS)); } -uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b) +uint32_t HELPER(neon_mul_f32)(CPUState *env, uint32_t a, uint32_t b) { return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS)); } /* Floating point comparisons produce an integer result. */ #define NEON_VOP_FCMP(name, cmp) \ -uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \ +uint32_t HELPER(neon_##name)(CPUState *env, uint32_t a, uint32_t b) \ { \ if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \ return ~0; \ @@ -1846,14 +1845,14 @@ NEON_VOP_FCMP(ceq_f32, ==) NEON_VOP_FCMP(cge_f32, >=) NEON_VOP_FCMP(cgt_f32, >) -uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b) +uint32_t HELPER(neon_acge_f32)(CPUState *env, uint32_t a, uint32_t b) { float32 f0 = float32_abs(vfp_itos(a)); float32 f1 = float32_abs(vfp_itos(b)); return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0; } -uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b) +uint32_t HELPER(neon_acgt_f32)(CPUState *env, uint32_t a, uint32_t b) { float32 f0 = float32_abs(vfp_itos(a)); float32 f1 = float32_abs(vfp_itos(b)); diff --git a/target-arm/translate.c b/target-arm/translate.c index f69912f..cf2440e 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -4519,56 +4519,56 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 26: /* Floating point arithnetic. */ switch ((u << 2) | size) { case 0: /* VADD */ - gen_helper_neon_add_f32(tmp, tmp, tmp2); + gen_helper_neon_add_f32(tmp, cpu_env, tmp, tmp2); break; case 2: /* VSUB */ - gen_helper_neon_sub_f32(tmp, tmp, tmp2); + gen_helper_neon_sub_f32(tmp, cpu_env, tmp, tmp2); break; case 4: /* VPADD */ - gen_helper_neon_add_f32(tmp, tmp, tmp2); + gen_helper_neon_add_f32(tmp, cpu_env, tmp, tmp2); break; case 6: /* VABD */ - gen_helper_neon_abd_f32(tmp, tmp, tmp2); + gen_helper_neon_abd_f32(tmp, cpu_env, tmp, tmp2); break; default: return 1; } break; case 27: /* Float multiply. */ - gen_helper_neon_mul_f32(tmp, tmp, tmp2); + gen_helper_neon_mul_f32(tmp, cpu_env, tmp, tmp2); if (!u) { tcg_temp_free_i32(tmp2); tmp2 = neon_load_reg(rd, pass); if (size == 0) { - gen_helper_neon_add_f32(tmp, tmp, tmp2); + gen_helper_neon_add_f32(tmp, cpu_env, tmp, tmp2); } else { - gen_helper_neon_sub_f32(tmp, tmp2, tmp); + gen_helper_neon_sub_f32(tmp, cpu_env, tmp2, tmp); } } break; case 28: /* Float compare. */ if (!u) { - gen_helper_neon_ceq_f32(tmp, tmp, tmp2); + gen_helper_neon_ceq_f32(tmp, cpu_env, tmp, tmp2); } else { if (size == 0) - gen_helper_neon_cge_f32(tmp, tmp, tmp2); + gen_helper_neon_cge_f32(tmp, cpu_env, tmp, tmp2); else - gen_helper_neon_cgt_f32(tmp, tmp, tmp2); + gen_helper_neon_cgt_f32(tmp, cpu_env, tmp, tmp2); } break; case 29: /* Float compare absolute. */ if (!u) return 1; if (size == 0) - gen_helper_neon_acge_f32(tmp, tmp, tmp2); + gen_helper_neon_acge_f32(tmp, cpu_env, tmp, tmp2); else - gen_helper_neon_acgt_f32(tmp, tmp, tmp2); + gen_helper_neon_acgt_f32(tmp, cpu_env, tmp, tmp2); break; case 30: /* Float min/max. */ if (size == 0) - gen_helper_neon_max_f32(tmp, tmp, tmp2); + gen_helper_neon_max_f32(tmp, cpu_env, tmp, tmp2); else - gen_helper_neon_min_f32(tmp, tmp, tmp2); + gen_helper_neon_min_f32(tmp, cpu_env, tmp, tmp2); break; case 31: if (size == 0) @@ -5232,7 +5232,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); } } else if (op & 1) { - gen_helper_neon_mul_f32(tmp, tmp, tmp2); + gen_helper_neon_mul_f32(tmp, cpu_env, tmp, tmp2); } else { switch (size) { case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; @@ -5250,13 +5250,15 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_neon_add(size, tmp, tmp2); break; case 1: - gen_helper_neon_add_f32(tmp, tmp, tmp2); + gen_helper_neon_add_f32(tmp, cpu_env, + tmp, tmp2); break; case 4: gen_neon_rsb(size, tmp, tmp2); break; case 5: - gen_helper_neon_sub_f32(tmp, tmp2, tmp); + gen_helper_neon_sub_f32(tmp, cpu_env, + tmp2, tmp); break; default: abort(); @@ -5641,21 +5643,21 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) break; case 24: case 27: /* Float VCGT #0, Float VCLE #0 */ tmp2 = tcg_const_i32(0); - gen_helper_neon_cgt_f32(tmp, tmp, tmp2); + gen_helper_neon_cgt_f32(tmp, cpu_env, tmp, tmp2); tcg_temp_free(tmp2); if (op == 27) tcg_gen_not_i32(tmp, tmp); break; case 25: case 28: /* Float VCGE #0, Float VCLT #0 */ tmp2 = tcg_const_i32(0); - gen_helper_neon_cge_f32(tmp, tmp, tmp2); + gen_helper_neon_cge_f32(tmp, cpu_env, tmp, tmp2); tcg_temp_free(tmp2); if (op == 28) tcg_gen_not_i32(tmp, tmp); break; case 26: /* Float VCEQ #0 */ tmp2 = tcg_const_i32(0); - gen_helper_neon_ceq_f32(tmp, tmp, tmp2); + gen_helper_neon_ceq_f32(tmp, cpu_env, tmp, tmp2); tcg_temp_free(tmp2); break; case 30: /* Float VABS */