target-alpha: An approach to fp insn qualifiers

Message ID	4B26D8EF.10801@twiddle.net
State	New
Headers	show Return-Path: <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org> Message-ID: <4B26D8EF.10801@twiddle.net> Date: Mon, 14 Dec 2009 16:31:43 -0800 From: Richard Henderson <rth@twiddle.net> User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.4pre) Gecko/20090922 Fedora/3.0-3.9.b4.fc12 Thunderbird/3.0b4 MIME-Version: 1.0 To: Laurent Desnogues <laurent.desnogues@gmail.com> Subject: Re: [Qemu-devel] target-alpha: An approach to fp insn qualifiers References: <4B267DBC.8050909@twiddle.net> <761ea48b0912141211keb5bbben584d7fe76f44d78c@mail.gmail.com> In-Reply-To: <761ea48b0912141211keb5bbben584d7fe76f44d78c@mail.gmail.com> Content-Type: multipart/mixed; boundary="------------050606010501030204080507" Cc: qemu-devel@nongnu.org Precedence: list Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h index c0dff4b..c1c0470 100644 --- a/target-alpha/cpu.h +++ b/target-alpha/cpu.h @@ -430,9 +430,13 @@ enum { }; /* Arithmetic exception */ -enum { - EXCP_ARITH_OVERFLOW, -}; +#define EXC_M_IOV (1<<16) /* Integer Overflow */ +#define EXC_M_INE (1<<15) /* Inexact result */ +#define EXC_M_UNF (1<<14) /* Underflow */ +#define EXC_M_FOV (1<<13) /* Overflow */ +#define EXC_M_DZE (1<<12) /* Division by zero */ +#define EXC_M_INV (1<<11) /* Invalid operation */ +#define EXC_M_SWC (1<<10) /* Software completion */ enum { IR_V0 = 0, diff --git a/target-alpha/helper.c b/target-alpha/helper.c index a658f97..a29f785 100644 --- a/target-alpha/helper.c +++ b/target-alpha/helper.c @@ -27,41 +27,13 @@ uint64_t cpu_alpha_load_fpcr (CPUState *env) { - uint64_t ret = 0; - int flags, mask; - - flags = env->fp_status.float_exception_flags; - ret |= (uint64_t) flags << 52; - if (flags) - ret |= FPCR_SUM; - env->ipr[IPR_EXC_SUM] &= ~0x3E; - env->ipr[IPR_EXC_SUM] |= flags << 1; - - mask = env->fp_status.float_exception_mask; - if (mask & float_flag_invalid) - ret |= FPCR_INVD; - if (mask & float_flag_divbyzero) - ret |= FPCR_DZED; - if (mask & float_flag_overflow) - ret |= FPCR_OVFD; - if (mask & float_flag_underflow) - ret |= FPCR_UNFD; - if (mask & float_flag_inexact) - ret |= FPCR_INED; - - switch (env->fp_status.float_rounding_mode) { - case float_round_nearest_even: - ret |= 2ULL << FPCR_DYN_SHIFT; - break; - case float_round_down: - ret |= 1ULL << FPCR_DYN_SHIFT; - break; - case float_round_up: - ret |= 3ULL << FPCR_DYN_SHIFT; - break; - case float_round_to_zero: - break; - } + uint64_t ret = env->fp_status.float_exception_flags; + + if (ret) + ret = FPCR_SUM | (ret << 52); + + ret |= env->fpcr & ~(FPCR_SUM | FPCR_STATUS_MASK); + return ret; } @@ -69,6 +41,8 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val) { int round_mode, mask; + env->fpcr = val; + set_float_exception_flags((val >> 52) & 0x3F, &env->fp_status); mask = 0; @@ -86,6 +60,7 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val) switch ((val >> FPCR_DYN_SHIFT) & 3) { case 0: + default: round_mode = float_round_to_zero; break; case 1: @@ -99,6 +74,11 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val) break; } set_float_rounding_mode(round_mode, &env->fp_status); + + mask = 0; + if ((val & (FPCR_UNDZ|FPCR_UNFD)) == (FPCR_UNDZ|FPCR_UNFD)) + mask = 1; + set_flush_to_zero(mask, &env->fp_status); } #if defined(CONFIG_USER_ONLY) diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c index 3bb0020..d031f56 100644 --- a/target-alpha/op_helper.c +++ b/target-alpha/op_helper.c @@ -78,7 +78,7 @@ uint64_t helper_addqv (uint64_t op1, uint64_t op2) uint64_t tmp = op1; op1 += op2; if (unlikely((tmp ^ op2 ^ (-1ULL)) & (tmp ^ op1) & (1ULL << 63))) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return op1; } @@ -88,7 +88,7 @@ uint64_t helper_addlv (uint64_t op1, uint64_t op2) uint64_t tmp = op1; op1 = (uint32_t)(op1 + op2); if (unlikely((tmp ^ op2 ^ (-1UL)) & (tmp ^ op1) & (1UL << 31))) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return op1; } @@ -98,7 +98,7 @@ uint64_t helper_subqv (uint64_t op1, uint64_t op2) uint64_t res; res = op1 - op2; if (unlikely((op1 ^ op2) & (res ^ op1) & (1ULL << 63))) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return res; } @@ -108,7 +108,7 @@ uint64_t helper_sublv (uint64_t op1, uint64_t op2) uint32_t res; res = op1 - op2; if (unlikely((op1 ^ op2) & (res ^ op1) & (1UL << 31))) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return res; } @@ -118,7 +118,7 @@ uint64_t helper_mullv (uint64_t op1, uint64_t op2) int64_t res = (int64_t)op1 * (int64_t)op2; if (unlikely((int32_t)res != res)) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return (int64_t)((int32_t)res); } @@ -130,7 +130,7 @@ uint64_t helper_mulqv (uint64_t op1, uint64_t op2) muls64(&tl, &th, op1, op2); /* If th != 0 && th != -1, then we had an overflow */ if (unlikely((th + 1) > 1)) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return tl; } @@ -370,87 +370,175 @@ uint64_t helper_unpkbw (uint64_t op1) /* Floating point helpers */ +/* ??? Not implemented is setting EXC_MASK, containing a bitmask of + destination registers of instructions that have caused arithmetic + traps. Not needed for userspace emulation, or for complete + emulation of the entire fpu stack within qemu. But we would need + it to invoke a guest kernel's entArith trap handler properly. + + It would be possible to encode the FP destination register in the + QUAL parameter for the FPU helpers below; additional changes would + be required for ADD/V et al above. */ + +#define QUAL_RM_N 0x080 /* Round mode nearest even */ +#define QUAL_RM_C 0x000 /* Round mode chopped */ +#define QUAL_RM_M 0x040 /* Round mode minus infinity */ +#define QUAL_RM_D 0x0c0 /* Round mode dynamic */ +#define QUAL_RM_MASK 0x0c0 + +#define QUAL_U 0x100 /* Underflow enable (fp output) */ +#define QUAL_V 0x100 /* Overflow enable (int output) */ +#define QUAL_S 0x400 /* Software completion enable */ +#define QUAL_I 0x200 /* Inexact detection enable */ + /* If the floating-point qualifiers specified a rounding mode, set that rounding mode and remember the original mode for resetting at the end of the instruction. */ -static inline uint32_t begin_quals_roundmode(uint32_t qual) +static inline uint32_t begin_fp_roundmode(uint32_t qual) { uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = rm; - switch (qual & 0xc0) { - case 0x80: + switch (qual & QUAL_RM_MASK) { + default: + case QUAL_RM_N: rm = float_round_nearest_even; break; - case 0x00: + case QUAL_RM_C: rm = float_round_to_zero; break; - case 0x40: + case QUAL_RM_M: rm = float_round_down; break; - case 0xc0: + case QUAL_RM_D: return old_rm; } - set_float_rounding_mode(rm, &FP_STATUS); + if (old_rm != rm) + set_float_rounding_mode(rm, &FP_STATUS); return old_rm; } -/* If the floating-point qualifiers specified extra exception processing - (i.e. /u or /su), zero the exception flags so that we can determine if - the current instruction raises any exceptions. Save the old acrued - exception status so that we can restore them at the end of the insn. */ -static inline uint32_t begin_quals_exception(uint32_t qual) +/* Zero the exception flags so that we can determine if the current + instruction raises any exceptions. Save the old acrued exception + status so that we can restore them at the end of the insn. */ +static inline uint32_t begin_fp_exception(void) { - uint32_t old_exc = 0; - if (qual & 0x500) { - old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8; - set_float_exception_flags(0, &FP_STATUS); - } + uint32_t old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8; + set_float_exception_flags(0, &FP_STATUS); return old_exc; } +static inline uint32_t begin_fp_flush_to_zero(uint32_t quals) +{ + /* If underflow detection is disabled, silently flush to zero. + Note that flush-to-zero mode may already be enabled via the FPCR. */ + if ((quals & QUAL_U) == 0 && !FP_STATUS.flush_to_zero) { + set_flush_to_zero(1, &FP_STATUS); + return 0x10000; + } + return 0; +} + /* Begin processing an fp operation. Return a token that should be passed when completing the fp operation. */ -static inline uint32_t begin_fp(uint32_t quals) +static uint32_t begin_fp(uint32_t quals) { uint32_t ret = 0; - ret |= begin_quals_roundmode(quals); - ret |= begin_quals_exception(quals); + ret |= begin_fp_roundmode(quals); + ret |= begin_fp_flush_to_zero(quals); + ret |= begin_fp_exception(); return ret; } /* End processing an fp operation. */ -static inline void end_fp(uint32_t quals, uint32_t orig) -{ - uint8_t exc = FP_STATUS.float_exception_flags; - set_float_exception_flags(exc | (orig >> 8), &FP_STATUS); - set_float_rounding_mode(orig & 0xff, &FP_STATUS); +static inline void end_fp_roundmode(uint32_t orig) +{ + uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = orig & 0xff; + if (unlikely(rm != old_rm)) + set_float_rounding_mode(old_rm, &FP_STATUS); +} - /* TODO: check quals and exc and raise any exceptions needed. */ +static inline void end_fp_flush_to_zero(uint32_t orig) +{ + if (orig & 0x10000) + set_flush_to_zero(0, &FP_STATUS); } -/* Raise any exceptions needed for using F, given the insn qualifiers. */ -static inline void float32_input(uint32_t quals, float32 f) +static void end_fp_exception(uint32_t quals, uint32_t orig) { - /* If /s is used, no exceptions are raised immediately. */ - /* ??? This for userspace only. If we are emulating the real hw, then - we may well need to trap to the kernel for software emulation. */ - /* ??? Shouldn't we raise an exception for SNAN? */ - if (quals & 0x500) - return; - /* TODO: Check for inf, nan, denormal and trap. */ + uint8_t exc = FP_STATUS.float_exception_flags; + + /* If inexact detection is disabled, silently clear it. */ + if ((quals & QUAL_I) == 0) + exc &= ~float_flag_inexact; + + orig = (orig >> 8) & 0xff; + set_float_exception_flags(exc | orig, &FP_STATUS); + + /* Raise an exception as required. */ + if (unlikely(exc)) { + if (quals & QUAL_S) + exc &= ~FP_STATUS.float_exception_mask; + if (exc) { + uint32_t hw_exc = 0; + + if (exc & float_flag_invalid) + hw_exc |= EXC_M_INV; + if (exc & float_flag_divbyzero) + hw_exc |= EXC_M_DZE; + if (exc & float_flag_overflow) + hw_exc |= EXC_M_FOV; + if (exc & float_flag_underflow) + hw_exc |= EXC_M_UNF; + if (exc & float_flag_inexact) + hw_exc |= EXC_M_INE; + + helper_excp(EXCP_ARITH, hw_exc); + } + } } -static inline void float64_input(uint32_t quals, float64 f) +static void end_fp(uint32_t quals, uint32_t orig) { - /* TODO: Exactly like above, except for float64. */ + end_fp_roundmode(orig); + end_fp_flush_to_zero(orig); + end_fp_exception(quals, orig); } +static uint64_t remap_ieee_input(uint32_t quals, uint64_t a) +{ + uint64_t frac; + uint32_t exp; + + exp = (uint32_t)(a >> 52) & 0x7ff; + frac = a & 0xfffffffffffffull; + + if (exp == 0) { + if (frac != 0) { + /* If DNZ is set, flush denormals to zero on input. */ + if (env->fpcr & FPCR_DNZ) + a = a & (1ull << 63); + /* If software completion not enabled, trap. */ + else if ((quals & QUAL_S) == 0) + helper_excp(EXCP_ARITH, EXC_M_UNF); + } + } else if (exp == 0x7ff) { + /* Infinity or NaN. If software completion is not enabled, trap. + If /s is enabled, we'll properly signal for SNaN on output. */ + /* ??? I'm not sure these exception bit flags are correct. I do + know that the Linux kernel, at least, doesn't rely on them and + just emulates the insn to figure out what exception to use. */ + if ((quals & QUAL_S) == 0) + helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV); + } + + return a; +} /* F floating (VAX) */ -static inline uint64_t float32_to_f(float32 fa) +static uint64_t float32_to_f(float32 fa) { uint64_t r, exp, mant, sig; CPU_FloatU a; @@ -483,7 +571,7 @@ static inline uint64_t float32_to_f(float32 fa) return r; } -static inline float32 f_to_float32(uint64_t a) +static float32 f_to_float32(uint64_t a) { uint32_t exp, mant_sig; CPU_FloatU r; @@ -535,8 +623,6 @@ uint64_t helper_addf (uint64_t a, uint64_t b, uint32_t quals) fb = f_to_float32(b); token = begin_fp(quals); - float32_input(quals, fa); - float32_input(quals, fb); fr = float32_add(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -552,8 +638,6 @@ uint64_t helper_subf (uint64_t a, uint64_t b, uint32_t quals) fb = f_to_float32(b); token = begin_fp(quals); - float32_input(quals, fa); - float32_input(quals, fb); fr = float32_sub(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -569,8 +653,6 @@ uint64_t helper_mulf (uint64_t a, uint64_t b, uint32_t quals) fb = f_to_float32(b); token = begin_fp(quals); - float32_input(quals, fa); - float32_input(quals, fb); fr = float32_mul(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -586,8 +668,6 @@ uint64_t helper_divf (uint64_t a, uint64_t b, uint32_t quals) fb = f_to_float32(b); token = begin_fp(quals); - float32_input(quals, fa); - float32_input(quals, fb); fr = float32_div(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -602,7 +682,6 @@ uint64_t helper_sqrtf (uint64_t t, uint32_t quals) ft = f_to_float32(t); token = begin_fp(quals); - float32_input(quals, ft); fr = float32_sqrt(ft, &FP_STATUS); end_fp(quals, token); @@ -611,7 +690,7 @@ uint64_t helper_sqrtf (uint64_t t, uint32_t quals) /* G floating (VAX) */ -static inline uint64_t float64_to_g(float64 fa) +static uint64_t float64_to_g(float64 fa) { uint64_t r, exp, mant, sig; CPU_DoubleU a; @@ -644,7 +723,7 @@ static inline uint64_t float64_to_g(float64 fa) return r; } -static inline float64 g_to_float64(uint64_t a) +static float64 g_to_float64(uint64_t a) { uint64_t exp, mant_sig; CPU_DoubleU r; @@ -696,8 +775,6 @@ uint64_t helper_addg (uint64_t a, uint64_t b, uint32_t quals) fb = g_to_float64(b); token = begin_fp(quals); - float64_input(quals, fa); - float64_input(quals, fb); fr = float64_add(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -713,8 +790,6 @@ uint64_t helper_subg (uint64_t a, uint64_t b, uint32_t quals) fb = g_to_float64(b); token = begin_fp(quals); - float64_input(quals, fa); - float64_input(quals, fb); fr = float64_sub(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -730,8 +805,6 @@ uint64_t helper_mulg (uint64_t a, uint64_t b, uint32_t quals) fb = g_to_float64(b); token = begin_fp(quals); - float64_input(quals, fa); - float64_input(quals, fb); fr = float64_mul(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -747,8 +820,6 @@ uint64_t helper_divg (uint64_t a, uint64_t b, uint32_t quals) fb = g_to_float64(b); token = begin_fp(quals); - float64_input(quals, fa); - float64_input(quals, fb); fr = float64_div(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -763,7 +834,6 @@ uint64_t helper_sqrtg (uint64_t a, uint32_t quals) fa = g_to_float64(a); token = begin_fp(quals); - float64_input(quals, fa); fr = float64_sqrt(fa, &FP_STATUS); end_fp(quals, token); @@ -774,7 +844,7 @@ uint64_t helper_sqrtg (uint64_t a, uint32_t quals) /* S floating (single) */ /* Taken from linux/arch/alpha/kernel/traps.c, s_mem_to_reg. */ -static inline uint64_t float32_to_s_int(uint32_t fi) +static uint64_t float32_to_s_int(uint32_t fi) { uint32_t frac = fi & 0x7fffff; uint32_t sign = fi >> 31; @@ -796,7 +866,7 @@ static inline uint64_t float32_to_s_int(uint32_t fi) | ((uint64_t)frac << 29)); } -static inline uint64_t float32_to_s(float32 fa) +static uint64_t float32_to_s(float32 fa) { CPU_FloatU a; a.f = fa; @@ -825,17 +895,19 @@ uint64_t helper_memory_to_s (uint32_t a) return float32_to_s_int(a); } +static float32 input_s(uint32_t quals, uint64_t a) +{ + return s_to_float32(remap_ieee_input(quals, a)); +} + uint64_t helper_adds (uint64_t a, uint64_t b, uint32_t quals) { float32 fa, fb, fr; uint32_t token; - fa = s_to_float32(a); - fb = s_to_float32(b); - token = begin_fp(quals); - float32_input(quals, fa); - float32_input(quals, fb); + fa = input_s(quals, a); + fb = input_s(quals, b); fr = float32_add(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -847,12 +919,9 @@ uint64_t helper_subs (uint64_t a, uint64_t b, uint32_t quals) float32 fa, fb, fr; uint32_t token; - fa = s_to_float32(a); - fb = s_to_float32(b); - token = begin_fp(quals); - float32_input(quals, fa); - float32_input(quals, fb); + fa = input_s(quals, a); + fb = input_s(quals, b); fr = float32_sub(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -864,12 +933,9 @@ uint64_t helper_muls (uint64_t a, uint64_t b, uint32_t quals) float32 fa, fb, fr; uint32_t token; - fa = s_to_float32(a); - fb = s_to_float32(b); - token = begin_fp(quals); - float32_input(quals, fa); - float32_input(quals, fb); + fa = input_s(quals, a); + fb = input_s(quals, b); fr = float32_mul(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -881,12 +947,9 @@ uint64_t helper_divs (uint64_t a, uint64_t b, uint32_t quals) float32 fa, fb, fr; uint32_t token; - fa = s_to_float32(a); - fb = s_to_float32(b); - token = begin_fp(quals); - float32_input(quals, fa); - float32_input(quals, fb); + fa = input_s(quals, a); + fb = input_s(quals, b); fr = float32_div(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -898,10 +961,8 @@ uint64_t helper_sqrts (uint64_t a, uint32_t quals) float32 fa, fr; uint32_t token; - fa = s_to_float32(a); - token = begin_fp(quals); - float32_input(quals, fa); + fa = input_s(quals, a); fr = float32_sqrt(fa, &FP_STATUS); end_fp(quals, token); @@ -926,17 +987,20 @@ static inline uint64_t float64_to_t(float64 fa) return r.ll; } +/* Raise any exceptions needed for using F, given the insn qualifiers. */ +static float64 input_t(uint32_t quals, uint64_t a) +{ + return t_to_float64(remap_ieee_input(quals, a)); +} + uint64_t helper_addt (uint64_t a, uint64_t b, uint32_t quals) { float64 fa, fb, fr; uint32_t token; - fa = t_to_float64(a); - fb = t_to_float64(b); - token = begin_fp(quals); - float64_input(quals, fa); - float64_input(quals, fb); + fa = input_t(quals, a); + fb = input_t(quals, b); fr = float64_add(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -948,12 +1012,9 @@ uint64_t helper_subt (uint64_t a, uint64_t b, uint32_t quals) float64 fa, fb, fr; uint32_t token; - fa = t_to_float64(a); - fb = t_to_float64(b); - token = begin_fp(quals); - float64_input(quals, fa); - float64_input(quals, fb); + fa = input_t(quals, a); + fb = input_t(quals, b); fr = float64_sub(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -965,12 +1026,9 @@ uint64_t helper_mult (uint64_t a, uint64_t b, uint32_t quals) float64 fa, fb, fr; uint32_t token; - fa = t_to_float64(a); - fb = t_to_float64(b); - token = begin_fp(quals); - float64_input(quals, fa); - float64_input(quals, fb); + fa = input_t(quals, a); + fb = input_t(quals, b); fr = float64_mul(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -982,12 +1040,9 @@ uint64_t helper_divt (uint64_t a, uint64_t b, uint32_t quals) float64 fa, fb, fr; uint32_t token; - fa = t_to_float64(a); - fb = t_to_float64(b); - token = begin_fp(quals); - float64_input(quals, fa); - float64_input(quals, fb); + fa = input_t(quals, a); + fb = input_t(quals, b); fr = float64_div(fa, fb, &FP_STATUS); end_fp(quals, token); @@ -999,10 +1054,8 @@ uint64_t helper_sqrtt (uint64_t a, uint32_t quals) float64 fa, fr; uint32_t token; - fa = t_to_float64(a); - token = begin_fp(quals); - float64_input(quals, fa); + fa = input_t(quals, a); fr = float64_sqrt(fa, &FP_STATUS); end_fp(quals, token); @@ -1028,6 +1081,8 @@ uint64_t helper_cpyse(uint64_t a, uint64_t b) /* Comparisons */ +/* ??? Software completion qualifier missing. */ + uint64_t helper_cmptun (uint64_t a, uint64_t b) { float64 fa, fb; @@ -1126,10 +1181,8 @@ uint64_t helper_cvtts (uint64_t a, uint32_t quals) float32 fr; uint32_t token; - fa = t_to_float64(a); - token = begin_fp(quals); - float64_input(quals, fa); + fa = input_t(quals, a); fr = float64_to_float32(fa, &FP_STATUS); end_fp(quals, token); @@ -1142,10 +1195,8 @@ uint64_t helper_cvtst (uint64_t a, uint32_t quals) float64 fr; uint32_t token; - fa = s_to_float32(a); - token = begin_fp(quals); - float32_input(quals, fa); + fa = input_s(quals, a); fr = float32_to_float64(fa, &FP_STATUS); end_fp(quals, token); @@ -1164,115 +1215,125 @@ uint64_t helper_cvtqs (uint64_t a, uint32_t quals) return float32_to_s(fr); } -uint64_t helper_cvttq (uint64_t a, uint32_t quals) -{ - uint64_t ret, frac; - uint32_t token, exp, sign, exc = 0; - - token = begin_fp(quals); +/* Implement float64 to uint64 conversion without overflow enabled. + In this mode we must supply the truncated result. This behaviour + is used by the compiler to get unsigned conversion for free with + the same instruction. */ - /* Alpha integer conversion does not saturate, as the generic routine - does. Instead it supplies a truncated result. This fact is relied - upon by GCC in that without overflow enabled we can get unsigned - conversion for free with the same instruction. */ +static uint64_t cvttq_noqual_internal(uint64_t a, uint32_t rounding_mode) +{ + uint64_t frac, ret = 0; + uint32_t exp, sign; + int shift; sign = (a >> 63); exp = (uint32_t)(a >> 52) & 0x7ff; frac = a & 0xfffffffffffffull; - if (exp == 0) { - ret = 0; - if (frac != 0) { - /* ??? If DNZ set, map to zero without trapping. */ - /* ??? Figure out what kind of exception signal to send. */ - if (!(quals & 0x400)) - helper_excp(EXCP_ARITH, 0); - goto do_underflow; - } - } else if (exp == 0x7ff) { - /* In keeping with the truncation result, both infinity and NaN - give result of zero. See Table B-2 in the Alpha Architecture - Handbook. */ - ret = 0; - exc = float_flag_invalid; + /* We already handled denormals in remap_ieee_input; infinities and + nans are defined to return zero as per truncation. */ + if (exp == 0 || exp == 0x7ff) + return 0; - /* Without /s qualifier, both Inf and NaN trap. SNaN always traps. */ - if (!(quals & 0x400) || (frac & 0x4000000000000ull)) - helper_excp(EXCP_ARITH, 0); + /* Restore implicit bit. */ + frac |= 0x10000000000000ull; + + /* Note that neither overflow exceptions nor inexact exceptions + are desired. This lets us streamline the checks quite a bit. */ + shift = exp - 1023 - 52; + if (shift >= 0) { + /* In this case the number is so large that we must shift + the fraction left. There is no rounding to do. */ + if (shift < 63) { + ret = frac << shift; + } } else { - int32_t shift; - - /* Restore implicit bit. */ - frac |= 0x10000000000000ull; - - shift = exp - 1023 - 52; - if (shift > 0) { - /* In this case the number is so large that we must shift - the fraction left. There is no rounding to do, but we - must still set inexact for overflow. */ - if (shift < 63) { - ret = frac << shift; - if ((ret >> shift) != frac) - exc = float_flag_inexact; - } else { - exc = float_flag_inexact; - ret = 0; - } - } else if (shift == 0) { - /* The exponent is exactly right for the 52-bit fraction. */ - ret = frac; + uint64_t round; + + /* In this case the number is smaller than the fraction as + represented by the 52 bit number. Here we must think + about rounding the result. Handle this by shifting the + fractional part of the number into the high bits of ROUND. + This will let us efficiently handle round-to-nearest. */ + shift = -shift; + if (shift < 63) { + ret = frac >> shift; + round = frac << (64 - shift); } else { - uint64_t round; - - /* In this case the number is smaller than the fraction as - represented by the 52 bit number. Here we must think - about rounding the result. Handle this by shifting the - fractional part of the number into the high bits of ROUND. - This will let us efficiently handle round-to-nearest. */ - shift = -shift; - if (shift < 63) { - ret = frac >> shift; - round = frac << (64 - shift); - } else { - do_underflow: - /* The exponent is so small we shift out everything. */ - ret = 0; - round = 1; - } + /* The exponent is so small we shift out everything. + Leave a sticky bit for proper rounding below. */ + round = 1; + } - if (round) { - exc = float_flag_inexact; - switch (FP_STATUS.float_rounding_mode) { - case float_round_nearest_even: - if (round == (1ull << 63)) { - /* The remaining fraction is exactly 0.5; - round to even. */ - ret += (ret & 1); - } else if (round > (1ull << 63)) { - ret += 1; - } - break; - case float_round_to_zero: - break; - case float_round_up: - if (!sign) - ret += 1; - break; - case float_round_down: - if (sign) - ret += 1; - break; + if (round) { + switch (rounding_mode) { + case float_round_nearest_even: + if (round == (1ull << 63)) { + /* Remaining fraction is exactly 0.5; round to even. */ + ret += (ret & 1); + } else if (round > (1ull << 63)) { + ret += 1; } + break; + case float_round_to_zero: + break; + case float_round_up: + if (!sign) + ret += 1; + break; + case float_round_down: + if (sign) + ret += 1; + break; } } - - if (sign) - ret = -ret; } - if (exc) - float_raise(exc, &FP_STATUS); - end_fp(quals, token); + if (sign) + ret = -ret; + return ret; +} + +uint64_t helper_cvttq (uint64_t a, uint32_t quals) +{ + uint64_t ret; + + a = remap_ieee_input(quals, a); + + if (quals & QUAL_V) { + float64 fa = t_to_float64(a); + uint32_t token; + + token = begin_fp_exception(); + if ((quals & QUAL_RM_MASK) == QUAL_RM_C) { + ret = float64_to_int64_round_to_zero(fa, &FP_STATUS); + } else { + token |= begin_fp_roundmode(quals); + ret = float64_to_int64(fa, &FP_STATUS); + end_fp_roundmode(token); + } + end_fp_exception(quals, token); + } else { + uint32_t round_mode; + + switch (quals & QUAL_RM_MASK) { + case QUAL_RM_N: + round_mode = float_round_nearest_even; + break; + case QUAL_RM_C: + default: + round_mode = float_round_to_zero; + break; + case QUAL_RM_M: + round_mode = float_round_down; + break; + case QUAL_RM_D: + round_mode = FP_STATUS.float_rounding_mode; + break; + } + + ret = cvttq_noqual_internal(a, round_mode); + } return ret; } @@ -1310,7 +1371,6 @@ uint64_t helper_cvtgf (uint64_t a, uint32_t quals) fa = g_to_float64(a); token = begin_fp(quals); - float64_input(quals, fa); fr = float64_to_float32(fa, &FP_STATUS); end_fp(quals, token); @@ -1326,7 +1386,6 @@ uint64_t helper_cvtgq (uint64_t a, uint32_t quals) fa = g_to_float64(a); token = begin_fp(quals); - float64_input(quals, fa); ret = float64_to_int64(fa, &FP_STATUS); end_fp(quals, token); @@ -1352,35 +1411,24 @@ uint64_t helper_cvtlq (uint64_t a) return (lo & 0x3FFFFFFF) | (hi & 0xc0000000); } -static inline uint64_t __helper_cvtql(uint64_t a, int s, int v) -{ - uint64_t r; - - r = ((uint64_t)(a & 0xC0000000)) << 32; - r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29; - - if (v && (int64_t)((int32_t)r) != (int64_t)r) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); - } - if (s) { - /* TODO */ - } - return r; -} - uint64_t helper_cvtql (uint64_t a) { - return __helper_cvtql(a, 0, 0); + return ((a & 0xC0000000) << 32) | ((a & 0x7FFFFFFF) << 29); } uint64_t helper_cvtqlv (uint64_t a) { - return __helper_cvtql(a, 0, 1); + if ((int32_t)a != (int64_t)a) + helper_excp(EXCP_ARITH, EXC_M_IOV); + return helper_cvtql(a); } uint64_t helper_cvtqlsv (uint64_t a) { - return __helper_cvtql(a, 1, 1); + /* ??? I'm pretty sure there's nothing that /sv needs to do that /v + doesn't do. The only thing I can think is that /sv is a valid + instruction merely for completeness in the ISA. */ + return helper_cvtqlv(a); } /* PALcode support special instructions */

target-alpha: An approach to fp insn qualifiers

Commit Message

Patch