Message ID | 4B2BFD85.6070702@twiddle.net |
---|---|
State | New |
Headers | show |
On Fri, Dec 18, 2009 at 02:09:09PM -0800, Richard Henderson wrote: > This is a squashed version of the 3 or 4 incremental patches that I > had sent out for implementing the alpha fpu instruction qualifiers. > > First of all, this patch has a lot of coding style issues. I have reported some of them at the beginning of the file, but stopped at some point. My main concern about this patch is that I don't really understand why the current fp exceptions, the current rounding mode or flush_to_zero mode are stored in FP_STATUS. I think it would be better to have dedicated variable(s) in the cpu state structure, as it is done in other emulated architectures. For example instead of saving the exception, doing a few fp instructions, and restoring them, it is better to have a separate variable that holds the current CPU FPU state (which probably already exists as (part of) a CPU register), always clear the FP_STATUS.float_exception_flags variable before an instruction or sequence of instructions, and copy the bits that needs to be copied back to the variable holding the CPU FPU state. That would save a lot of mask and shift operation that is currently done in your patch, and also a lot of save and restore operations when executing code. > commit 572164702dd83955fc8783c85811ec86c3fb6e4a > Author: Richard Henderson <rth@twiddle.net> > Date: Fri Dec 18 10:50:32 2009 -0800 > > target-alpha: Implement fp insn qualifiers. > > Adds a third constant argument to the fpu helpers, which contain the > unparsed qualifier bits. The helper functions use new begin_fp/end_fp > routines that extract the rounding mode from the qualifier bits, as > well as raise exceptions for non-finite inputs and outputs also as > directed by the qualifier bits. > > cpu_alpha_load/store_fpcr modified to load/store the majority of the > bits from env->fpcr. This because we hadn't been saving a few of the > fpcr bits in the fp_status field: in particular DNZ. > > Re-implement cvttq without saturation of overflow results, to match > the Alpha specification. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > > diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h > index c0dff4b..c1c0470 100644 > --- a/target-alpha/cpu.h > +++ b/target-alpha/cpu.h > @@ -430,9 +430,13 @@ enum { > }; > > /* Arithmetic exception */ > -enum { > - EXCP_ARITH_OVERFLOW, > -}; > +#define EXC_M_IOV (1<<16) /* Integer Overflow */ > +#define EXC_M_INE (1<<15) /* Inexact result */ > +#define EXC_M_UNF (1<<14) /* Underflow */ > +#define EXC_M_FOV (1<<13) /* Overflow */ > +#define EXC_M_DZE (1<<12) /* Division by zero */ > +#define EXC_M_INV (1<<11) /* Invalid operation */ > +#define EXC_M_SWC (1<<10) /* Software completion */ > > enum { > IR_V0 = 0, > diff --git a/target-alpha/helper.c b/target-alpha/helper.c > index be7d37b..94821bd 100644 > --- a/target-alpha/helper.c > +++ b/target-alpha/helper.c > @@ -27,41 +27,13 @@ > > uint64_t cpu_alpha_load_fpcr (CPUState *env) > { > - uint64_t ret = 0; > - int flags, mask; > - > - flags = env->fp_status.float_exception_flags; > - ret |= (uint64_t) flags << 52; > - if (flags) > - ret |= FPCR_SUM; > - env->ipr[IPR_EXC_SUM] &= ~0x3E; > - env->ipr[IPR_EXC_SUM] |= flags << 1; > - > - mask = env->fp_status.float_exception_mask; > - if (mask & float_flag_invalid) > - ret |= FPCR_INVD; > - if (mask & float_flag_divbyzero) > - ret |= FPCR_DZED; > - if (mask & float_flag_overflow) > - ret |= FPCR_OVFD; > - if (mask & float_flag_underflow) > - ret |= FPCR_UNFD; > - if (mask & float_flag_inexact) > - ret |= FPCR_INED; > - > - switch (env->fp_status.float_rounding_mode) { > - case float_round_nearest_even: > - ret |= 2ULL << FPCR_DYN_SHIFT; > - break; > - case float_round_down: > - ret |= 1ULL << FPCR_DYN_SHIFT; > - break; > - case float_round_up: > - ret |= 3ULL << FPCR_DYN_SHIFT; > - break; > - case float_round_to_zero: > - break; > - } > + uint64_t ret = env->fp_status.float_exception_flags; > + > + if (ret) > + ret = FPCR_SUM | (ret << 52); Coding style. > + > + ret |= env->fpcr & ~(FPCR_SUM | FPCR_STATUS_MASK); > + > return ret; > } > > @@ -69,6 +41,8 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val) > { > int round_mode, mask; > > + env->fpcr = val; > + > set_float_exception_flags((val >> 52) & 0x3F, &env->fp_status); > > mask = 0; > @@ -86,6 +60,7 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val) > > switch ((val >> FPCR_DYN_SHIFT) & 3) { > case 0: > + default: > round_mode = float_round_to_zero; > break; > case 1: > @@ -100,6 +75,11 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val) > break; > } > set_float_rounding_mode(round_mode, &env->fp_status); > + > + mask = 0; > + if ((val & (FPCR_UNDZ|FPCR_UNFD)) == (FPCR_UNDZ|FPCR_UNFD)) > + mask = 1; Coding style. Also the name of the variable "mask" is a bit misleading for true/false variable. > + set_flush_to_zero(mask, &env->fp_status); > } > > #if defined(CONFIG_USER_ONLY) > diff --git a/target-alpha/helper.h b/target-alpha/helper.h > index bedd3c0..1521a84 100644 > --- a/target-alpha/helper.h > +++ b/target-alpha/helper.h > @@ -41,33 +41,33 @@ DEF_HELPER_1(store_fpcr, void, i64) > > DEF_HELPER_1(f_to_memory, i32, i64) > DEF_HELPER_1(memory_to_f, i64, i32) > -DEF_HELPER_2(addf, i64, i64, i64) > -DEF_HELPER_2(subf, i64, i64, i64) > -DEF_HELPER_2(mulf, i64, i64, i64) > -DEF_HELPER_2(divf, i64, i64, i64) > -DEF_HELPER_1(sqrtf, i64, i64) > +DEF_HELPER_3(addf, i64, i64, i64, i32) > +DEF_HELPER_3(subf, i64, i64, i64, i32) > +DEF_HELPER_3(mulf, i64, i64, i64, i32) > +DEF_HELPER_3(divf, i64, i64, i64, i32) > +DEF_HELPER_2(sqrtf, i64, i64, i32) > > DEF_HELPER_1(g_to_memory, i64, i64) > DEF_HELPER_1(memory_to_g, i64, i64) > -DEF_HELPER_2(addg, i64, i64, i64) > -DEF_HELPER_2(subg, i64, i64, i64) > -DEF_HELPER_2(mulg, i64, i64, i64) > -DEF_HELPER_2(divg, i64, i64, i64) > -DEF_HELPER_1(sqrtg, i64, i64) > +DEF_HELPER_3(addg, i64, i64, i64, i32) > +DEF_HELPER_3(subg, i64, i64, i64, i32) > +DEF_HELPER_3(mulg, i64, i64, i64, i32) > +DEF_HELPER_3(divg, i64, i64, i64, i32) > +DEF_HELPER_2(sqrtg, i64, i64, i32) > > DEF_HELPER_1(s_to_memory, i32, i64) > DEF_HELPER_1(memory_to_s, i64, i32) > -DEF_HELPER_2(adds, i64, i64, i64) > -DEF_HELPER_2(subs, i64, i64, i64) > -DEF_HELPER_2(muls, i64, i64, i64) > -DEF_HELPER_2(divs, i64, i64, i64) > -DEF_HELPER_1(sqrts, i64, i64) > - > -DEF_HELPER_2(addt, i64, i64, i64) > -DEF_HELPER_2(subt, i64, i64, i64) > -DEF_HELPER_2(mult, i64, i64, i64) > -DEF_HELPER_2(divt, i64, i64, i64) > -DEF_HELPER_1(sqrtt, i64, i64) > +DEF_HELPER_3(adds, i64, i64, i64, i32) > +DEF_HELPER_3(subs, i64, i64, i64, i32) > +DEF_HELPER_3(muls, i64, i64, i64, i32) > +DEF_HELPER_3(divs, i64, i64, i64, i32) > +DEF_HELPER_2(sqrts, i64, i64, i32) > + > +DEF_HELPER_3(addt, i64, i64, i64, i32) > +DEF_HELPER_3(subt, i64, i64, i64, i32) > +DEF_HELPER_3(mult, i64, i64, i64, i32) > +DEF_HELPER_3(divt, i64, i64, i64, i32) > +DEF_HELPER_2(sqrtt, i64, i64, i32) > > DEF_HELPER_2(cmptun, i64, i64, i64) > DEF_HELPER_2(cmpteq, i64, i64, i64) > @@ -81,15 +81,15 @@ DEF_HELPER_2(cpys, i64, i64, i64) > DEF_HELPER_2(cpysn, i64, i64, i64) > DEF_HELPER_2(cpyse, i64, i64, i64) > > -DEF_HELPER_1(cvtts, i64, i64) > -DEF_HELPER_1(cvtst, i64, i64) > -DEF_HELPER_1(cvttq, i64, i64) > -DEF_HELPER_1(cvtqs, i64, i64) > -DEF_HELPER_1(cvtqt, i64, i64) > -DEF_HELPER_1(cvtqf, i64, i64) > -DEF_HELPER_1(cvtgf, i64, i64) > -DEF_HELPER_1(cvtgq, i64, i64) > -DEF_HELPER_1(cvtqg, i64, i64) > +DEF_HELPER_2(cvtts, i64, i64, i32) > +DEF_HELPER_2(cvtst, i64, i64, i32) > +DEF_HELPER_2(cvttq, i64, i64, i32) > +DEF_HELPER_2(cvtqs, i64, i64, i32) > +DEF_HELPER_2(cvtqt, i64, i64, i32) > +DEF_HELPER_2(cvtqf, i64, i64, i32) > +DEF_HELPER_2(cvtgf, i64, i64, i32) > +DEF_HELPER_2(cvtgq, i64, i64, i32) > +DEF_HELPER_2(cvtqg, i64, i64, i32) > DEF_HELPER_1(cvtlq, i64, i64) > DEF_HELPER_1(cvtql, i64, i64) > DEF_HELPER_1(cvtqlv, i64, i64) > diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c > index b2abf6c..2d1c3d5 100644 > --- a/target-alpha/op_helper.c > +++ b/target-alpha/op_helper.c > @@ -24,7 +24,7 @@ > > /*****************************************************************************/ > /* Exceptions processing helpers */ > -void helper_excp (int excp, int error) > +void QEMU_NORETURN helper_excp (int excp, int error) > { > env->exception_index = excp; > env->error_code = error; > @@ -78,7 +78,7 @@ uint64_t helper_addqv (uint64_t op1, uint64_t op2) > uint64_t tmp = op1; > op1 += op2; > if (unlikely((tmp ^ op2 ^ (-1ULL)) & (tmp ^ op1) & (1ULL << 63))) { > - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); > + helper_excp(EXCP_ARITH, EXC_M_IOV); > } > return op1; > } > @@ -88,7 +88,7 @@ uint64_t helper_addlv (uint64_t op1, uint64_t op2) > uint64_t tmp = op1; > op1 = (uint32_t)(op1 + op2); > if (unlikely((tmp ^ op2 ^ (-1UL)) & (tmp ^ op1) & (1UL << 31))) { > - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); > + helper_excp(EXCP_ARITH, EXC_M_IOV); > } > return op1; > } > @@ -98,7 +98,7 @@ uint64_t helper_subqv (uint64_t op1, uint64_t op2) > uint64_t res; > res = op1 - op2; > if (unlikely((op1 ^ op2) & (res ^ op1) & (1ULL << 63))) { > - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); > + helper_excp(EXCP_ARITH, EXC_M_IOV); > } > return res; > } > @@ -108,7 +108,7 @@ uint64_t helper_sublv (uint64_t op1, uint64_t op2) > uint32_t res; > res = op1 - op2; > if (unlikely((op1 ^ op2) & (res ^ op1) & (1UL << 31))) { > - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); > + helper_excp(EXCP_ARITH, EXC_M_IOV); > } > return res; > } > @@ -118,7 +118,7 @@ uint64_t helper_mullv (uint64_t op1, uint64_t op2) > int64_t res = (int64_t)op1 * (int64_t)op2; > > if (unlikely((int32_t)res != res)) { > - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); > + helper_excp(EXCP_ARITH, EXC_M_IOV); > } > return (int64_t)((int32_t)res); > } > @@ -130,7 +130,7 @@ uint64_t helper_mulqv (uint64_t op1, uint64_t op2) > muls64(&tl, &th, op1, op2); > /* If th != 0 && th != -1, then we had an overflow */ > if (unlikely((th + 1) > 1)) { > - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); > + helper_excp(EXCP_ARITH, EXC_M_IOV); > } > return tl; > } > @@ -370,8 +370,175 @@ uint64_t helper_unpkbw (uint64_t op1) > > /* Floating point helpers */ > > +/* ??? Not implemented is setting EXC_MASK, containing a bitmask of > + destination registers of instructions that have caused arithmetic > + traps. Not needed for userspace emulation, or for complete > + emulation of the entire fpu stack within qemu. But we would need > + it to invoke a guest kernel's entArith trap handler properly. > + > + It would be possible to encode the FP destination register in the > + QUAL parameter for the FPU helpers below; additional changes would > + be required for ADD/V et al above. */ > + > +#define QUAL_RM_N 0x080 /* Round mode nearest even */ > +#define QUAL_RM_C 0x000 /* Round mode chopped */ > +#define QUAL_RM_M 0x040 /* Round mode minus infinity */ > +#define QUAL_RM_D 0x0c0 /* Round mode dynamic */ > +#define QUAL_RM_MASK 0x0c0 > + > +#define QUAL_U 0x100 /* Underflow enable (fp output) */ > +#define QUAL_V 0x100 /* Overflow enable (int output) */ > +#define QUAL_S 0x400 /* Software completion enable */ > +#define QUAL_I 0x200 /* Inexact detection enable */ > + > +/* If the floating-point qualifiers specified a rounding mode, > + set that rounding mode and remember the original mode for > + resetting at the end of the instruction. */ > +static inline uint32_t begin_fp_roundmode(uint32_t qual) > +{ > + uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = rm; > + > + switch (qual & QUAL_RM_MASK) { > + default: > + case QUAL_RM_N: > + rm = float_round_nearest_even; > + break; > + case QUAL_RM_C: > + rm = float_round_to_zero; > + break; > + case QUAL_RM_M: > + rm = float_round_down; > + break; > + case QUAL_RM_D: > + return old_rm; Does it corresponds to the mode describe above as to be implemented? > + } > + if (old_rm != rm) > + set_float_rounding_mode(rm, &FP_STATUS); Coding style. > + return old_rm; > +} > + > +/* Zero the exception flags so that we can determine if the current > + instruction raises any exceptions. Save the old acrued exception > + status so that we can restore them at the end of the insn. */ > +static inline uint32_t begin_fp_exception(void) > +{ > + uint32_t old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8; > + set_float_exception_flags(0, &FP_STATUS); > + return old_exc; > +} > + > +static inline uint32_t begin_fp_flush_to_zero(uint32_t quals) > +{ > + /* If underflow detection is disabled, silently flush to zero. > + Note that flush-to-zero mode may already be enabled via the FPCR. */ > + if ((quals & QUAL_U) == 0 && !FP_STATUS.flush_to_zero) { > + set_flush_to_zero(1, &FP_STATUS); > + return 0x10000; What does this constant corresponds to? > + } > + return 0; > +} > + > +/* Begin processing an fp operation. Return a token that should be passed > + when completing the fp operation. */ > +static uint32_t begin_fp(uint32_t quals) > +{ > + uint32_t ret = 0; > + > + ret |= begin_fp_roundmode(quals); > + ret |= begin_fp_flush_to_zero(quals); > + ret |= begin_fp_exception(); > + > + return ret; > +} > + > +/* End processing an fp operation. */ > + > +static inline void end_fp_roundmode(uint32_t orig) > +{ > + uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = orig & 0xff; > + if (unlikely(rm != old_rm)) > + set_float_rounding_mode(old_rm, &FP_STATUS); coding style > +} > + > +static inline void end_fp_flush_to_zero(uint32_t orig) > +{ > + if (orig & 0x10000) What does this constant corresponds to? I guess it matches the previous one. > + set_flush_to_zero(0, &FP_STATUS); coding style > +} > + > +static void end_fp_exception(uint32_t quals, uint32_t orig) > +{ > + uint8_t exc = FP_STATUS.float_exception_flags; > + > + /* If inexact detection is disabled, silently clear it. */ > + if ((quals & QUAL_I) == 0) > + exc &= ~float_flag_inexact; Coding style. > + > + orig = (orig >> 8) & 0xff; > + set_float_exception_flags(exc | orig, &FP_STATUS); > + > + /* Raise an exception as required. */ > + if (unlikely(exc)) { > + if (quals & QUAL_S) > + exc &= ~FP_STATUS.float_exception_mask; > + if (exc) { > + uint32_t hw_exc = 0; > + > + if (exc & float_flag_invalid) > + hw_exc |= EXC_M_INV; > + if (exc & float_flag_divbyzero) > + hw_exc |= EXC_M_DZE; > + if (exc & float_flag_overflow) > + hw_exc |= EXC_M_FOV; > + if (exc & float_flag_underflow) > + hw_exc |= EXC_M_UNF; > + if (exc & float_flag_inexact) > + hw_exc |= EXC_M_INE; > + > + helper_excp(EXCP_ARITH, hw_exc); > + } > + } > +} > + > +static void end_fp(uint32_t quals, uint32_t orig) > +{ > + end_fp_roundmode(orig); > + end_fp_flush_to_zero(orig); > + end_fp_exception(quals, orig); > +} > + > +static uint64_t remap_ieee_input(uint32_t quals, uint64_t a) > +{ > + uint64_t frac; > + uint32_t exp; > + > + exp = (uint32_t)(a >> 52) & 0x7ff; > + frac = a & 0xfffffffffffffull; > + > + if (exp == 0) { > + if (frac != 0) { > + /* If DNZ is set, flush denormals to zero on input. */ > + if (env->fpcr & FPCR_DNZ) > + a = a & (1ull << 63); > + /* If software completion not enabled, trap. */ > + else if ((quals & QUAL_S) == 0) > + helper_excp(EXCP_ARITH, EXC_M_UNF); > + } > + } else if (exp == 0x7ff) { > + /* Infinity or NaN. If software completion is not enabled, trap. > + If /s is enabled, we'll properly signal for SNaN on output. */ > + /* ??? I'm not sure these exception bit flags are correct. I do > + know that the Linux kernel, at least, doesn't rely on them and > + just emulates the insn to figure out what exception to use. */ > + if ((quals & QUAL_S) == 0) > + helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV); Coding style. > + } > + > + return a; > +} > + > /* F floating (VAX) */ > -static inline uint64_t float32_to_f(float32 fa) > +static uint64_t float32_to_f(float32 fa) > { > uint64_t r, exp, mant, sig; > CPU_FloatU a; > @@ -404,7 +571,7 @@ static inline uint64_t float32_to_f(float32 fa) > return r; > } > > -static inline float32 f_to_float32(uint64_t a) > +static float32 f_to_float32(uint64_t a) > { > uint32_t exp, mant_sig; > CPU_FloatU r; > @@ -447,58 +614,83 @@ uint64_t helper_memory_to_f (uint32_t a) > return r; > } > > -uint64_t helper_addf (uint64_t a, uint64_t b) > +uint64_t helper_addf (uint64_t a, uint64_t b, uint32_t quals) > { > float32 fa, fb, fr; > + uint32_t token; > > fa = f_to_float32(a); > fb = f_to_float32(b); > + > + token = begin_fp(quals); > fr = float32_add(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_f(fr); > } > > -uint64_t helper_subf (uint64_t a, uint64_t b) > +uint64_t helper_subf (uint64_t a, uint64_t b, uint32_t quals) > { > float32 fa, fb, fr; > + uint32_t token; > > fa = f_to_float32(a); > fb = f_to_float32(b); > + > + token = begin_fp(quals); > fr = float32_sub(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_f(fr); > } > > -uint64_t helper_mulf (uint64_t a, uint64_t b) > +uint64_t helper_mulf (uint64_t a, uint64_t b, uint32_t quals) > { > float32 fa, fb, fr; > + uint32_t token; > > fa = f_to_float32(a); > fb = f_to_float32(b); > + > + token = begin_fp(quals); > fr = float32_mul(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_f(fr); > } > > -uint64_t helper_divf (uint64_t a, uint64_t b) > +uint64_t helper_divf (uint64_t a, uint64_t b, uint32_t quals) > { > float32 fa, fb, fr; > + uint32_t token; > > fa = f_to_float32(a); > fb = f_to_float32(b); > + > + token = begin_fp(quals); > fr = float32_div(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_f(fr); > } > > -uint64_t helper_sqrtf (uint64_t t) > +uint64_t helper_sqrtf (uint64_t t, uint32_t quals) > { > float32 ft, fr; > + uint32_t token; > > ft = f_to_float32(t); > + > + token = begin_fp(quals); > fr = float32_sqrt(ft, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_f(fr); > } > > > /* G floating (VAX) */ > -static inline uint64_t float64_to_g(float64 fa) > +static uint64_t float64_to_g(float64 fa) > { > uint64_t r, exp, mant, sig; > CPU_DoubleU a; > @@ -531,7 +723,7 @@ static inline uint64_t float64_to_g(float64 fa) > return r; > } > > -static inline float64 g_to_float64(uint64_t a) > +static float64 g_to_float64(uint64_t a) > { > uint64_t exp, mant_sig; > CPU_DoubleU r; > @@ -574,52 +766,77 @@ uint64_t helper_memory_to_g (uint64_t a) > return r; > } > > -uint64_t helper_addg (uint64_t a, uint64_t b) > +uint64_t helper_addg (uint64_t a, uint64_t b, uint32_t quals) > { > float64 fa, fb, fr; > + uint32_t token; > > fa = g_to_float64(a); > fb = g_to_float64(b); > + > + token = begin_fp(quals); > fr = float64_add(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_g(fr); > } > > -uint64_t helper_subg (uint64_t a, uint64_t b) > +uint64_t helper_subg (uint64_t a, uint64_t b, uint32_t quals) > { > float64 fa, fb, fr; > + uint32_t token; > > fa = g_to_float64(a); > fb = g_to_float64(b); > + > + token = begin_fp(quals); > fr = float64_sub(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_g(fr); > } > > -uint64_t helper_mulg (uint64_t a, uint64_t b) > +uint64_t helper_mulg (uint64_t a, uint64_t b, uint32_t quals) > { > float64 fa, fb, fr; > - > + uint32_t token; > + > fa = g_to_float64(a); > fb = g_to_float64(b); > + > + token = begin_fp(quals); > fr = float64_mul(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_g(fr); > } > > -uint64_t helper_divg (uint64_t a, uint64_t b) > +uint64_t helper_divg (uint64_t a, uint64_t b, uint32_t quals) > { > float64 fa, fb, fr; > + uint32_t token; > > fa = g_to_float64(a); > fb = g_to_float64(b); > + > + token = begin_fp(quals); > fr = float64_div(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_g(fr); > } > > -uint64_t helper_sqrtg (uint64_t a) > +uint64_t helper_sqrtg (uint64_t a, uint32_t quals) > { > float64 fa, fr; > + uint32_t token; > > fa = g_to_float64(a); > + > + token = begin_fp(quals); > fr = float64_sqrt(fa, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_g(fr); > } > > @@ -627,7 +844,7 @@ uint64_t helper_sqrtg (uint64_t a) > /* S floating (single) */ > > /* Taken from linux/arch/alpha/kernel/traps.c, s_mem_to_reg. */ > -static inline uint64_t float32_to_s_int(uint32_t fi) > +static uint64_t float32_to_s_int(uint32_t fi) > { > uint32_t frac = fi & 0x7fffff; > uint32_t sign = fi >> 31; > @@ -649,7 +866,7 @@ static inline uint64_t float32_to_s_int(uint32_t fi) > | ((uint64_t)frac << 29)); > } > > -static inline uint64_t float32_to_s(float32 fa) > +static uint64_t float32_to_s(float32 fa) > { > CPU_FloatU a; > a.f = fa; > @@ -678,52 +895,77 @@ uint64_t helper_memory_to_s (uint32_t a) > return float32_to_s_int(a); > } > > -uint64_t helper_adds (uint64_t a, uint64_t b) > +static float32 input_s(uint32_t quals, uint64_t a) > +{ > + return s_to_float32(remap_ieee_input(quals, a)); > +} > + > +uint64_t helper_adds (uint64_t a, uint64_t b, uint32_t quals) > { > float32 fa, fb, fr; > + uint32_t token; > > - fa = s_to_float32(a); > - fb = s_to_float32(b); > + token = begin_fp(quals); > + fa = input_s(quals, a); > + fb = input_s(quals, b); > fr = float32_add(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_s(fr); > } > > -uint64_t helper_subs (uint64_t a, uint64_t b) > +uint64_t helper_subs (uint64_t a, uint64_t b, uint32_t quals) > { > float32 fa, fb, fr; > + uint32_t token; > > - fa = s_to_float32(a); > - fb = s_to_float32(b); > + token = begin_fp(quals); > + fa = input_s(quals, a); > + fb = input_s(quals, b); > fr = float32_sub(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_s(fr); > } > > -uint64_t helper_muls (uint64_t a, uint64_t b) > +uint64_t helper_muls (uint64_t a, uint64_t b, uint32_t quals) > { > float32 fa, fb, fr; > + uint32_t token; > > - fa = s_to_float32(a); > - fb = s_to_float32(b); > + token = begin_fp(quals); > + fa = input_s(quals, a); > + fb = input_s(quals, b); > fr = float32_mul(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_s(fr); > } > > -uint64_t helper_divs (uint64_t a, uint64_t b) > +uint64_t helper_divs (uint64_t a, uint64_t b, uint32_t quals) > { > float32 fa, fb, fr; > + uint32_t token; > > - fa = s_to_float32(a); > - fb = s_to_float32(b); > + token = begin_fp(quals); > + fa = input_s(quals, a); > + fb = input_s(quals, b); > fr = float32_div(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_s(fr); > } > > -uint64_t helper_sqrts (uint64_t a) > +uint64_t helper_sqrts (uint64_t a, uint32_t quals) > { > float32 fa, fr; > + uint32_t token; > > - fa = s_to_float32(a); > + token = begin_fp(quals); > + fa = input_s(quals, a); > fr = float32_sqrt(fa, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_s(fr); > } > > @@ -745,52 +987,78 @@ static inline uint64_t float64_to_t(float64 fa) > return r.ll; > } > > -uint64_t helper_addt (uint64_t a, uint64_t b) > +/* Raise any exceptions needed for using F, given the insn qualifiers. */ > +static float64 input_t(uint32_t quals, uint64_t a) > +{ > + return t_to_float64(remap_ieee_input(quals, a)); > +} > + > +uint64_t helper_addt (uint64_t a, uint64_t b, uint32_t quals) > { > float64 fa, fb, fr; > + uint32_t token; > > - fa = t_to_float64(a); > - fb = t_to_float64(b); > + token = begin_fp(quals); > + fa = input_t(quals, a); > + fb = input_t(quals, b); > fr = float64_add(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_t(fr); > } > > -uint64_t helper_subt (uint64_t a, uint64_t b) > +uint64_t helper_subt (uint64_t a, uint64_t b, uint32_t quals) > { > float64 fa, fb, fr; > + uint32_t token; > > - fa = t_to_float64(a); > - fb = t_to_float64(b); > + token = begin_fp(quals); > + fa = input_t(quals, a); > + fb = input_t(quals, b); > fr = float64_sub(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_t(fr); > } > > -uint64_t helper_mult (uint64_t a, uint64_t b) > +uint64_t helper_mult (uint64_t a, uint64_t b, uint32_t quals) > { > float64 fa, fb, fr; > + uint32_t token; > > - fa = t_to_float64(a); > - fb = t_to_float64(b); > + token = begin_fp(quals); > + fa = input_t(quals, a); > + fb = input_t(quals, b); > fr = float64_mul(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_t(fr); > } > > -uint64_t helper_divt (uint64_t a, uint64_t b) > +uint64_t helper_divt (uint64_t a, uint64_t b, uint32_t quals) > { > float64 fa, fb, fr; > + uint32_t token; > > - fa = t_to_float64(a); > - fb = t_to_float64(b); > + token = begin_fp(quals); > + fa = input_t(quals, a); > + fb = input_t(quals, b); > fr = float64_div(fa, fb, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_t(fr); > } > > -uint64_t helper_sqrtt (uint64_t a) > +uint64_t helper_sqrtt (uint64_t a, uint32_t quals) > { > float64 fa, fr; > + uint32_t token; > > - fa = t_to_float64(a); > + token = begin_fp(quals); > + fa = input_t(quals, a); > fr = float64_sqrt(fa, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_t(fr); > } > > @@ -813,6 +1081,8 @@ uint64_t helper_cpyse(uint64_t a, uint64_t b) > > > /* Comparisons */ > +/* ??? Software completion qualifier missing. */ > + > uint64_t helper_cmptun (uint64_t a, uint64_t b) > { > float64 fa, fb; > @@ -905,70 +1175,218 @@ uint64_t helper_cmpglt(uint64_t a, uint64_t b) > } > > /* Floating point format conversion */ > -uint64_t helper_cvtts (uint64_t a) > +uint64_t helper_cvtts (uint64_t a, uint32_t quals) > { > float64 fa; > float32 fr; > + uint32_t token; > > - fa = t_to_float64(a); > + token = begin_fp(quals); > + fa = input_t(quals, a); > fr = float64_to_float32(fa, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_s(fr); > } > > -uint64_t helper_cvtst (uint64_t a) > +uint64_t helper_cvtst (uint64_t a, uint32_t quals) > { > float32 fa; > float64 fr; > + uint32_t token; > > - fa = s_to_float32(a); > + token = begin_fp(quals); > + fa = input_s(quals, a); > fr = float32_to_float64(fa, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_t(fr); > } > > -uint64_t helper_cvtqs (uint64_t a) > +uint64_t helper_cvtqs (uint64_t a, uint32_t quals) > { > - float32 fr = int64_to_float32(a, &FP_STATUS); > + float32 fr; > + uint32_t token; > + > + token = begin_fp(quals); > + fr = int64_to_float32(a, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_s(fr); > } > > -uint64_t helper_cvttq (uint64_t a) > +/* Implement float64 to uint64 conversion without overflow enabled. > + In this mode we must supply the truncated result. This behaviour > + is used by the compiler to get unsigned conversion for free with > + the same instruction. */ > + > +static uint64_t cvttq_internal(uint64_t a) > { > - float64 fa = t_to_float64(a); > - return float64_to_int64_round_to_zero(fa, &FP_STATUS); > + uint64_t frac, ret = 0; > + uint32_t exp, sign, exc = 0; > + int shift; > + > + sign = (a >> 63); > + exp = (uint32_t)(a >> 52) & 0x7ff; > + frac = a & 0xfffffffffffffull; > + > + if (exp == 0) { > + if (unlikely(frac != 0)) > + goto do_underflow; > + } else if (exp == 0x7ff) { > + if (frac == 0) > + exc = float_flag_overflow; > + else > + exc = float_flag_invalid; > + } else { > + /* Restore implicit bit. */ > + frac |= 0x10000000000000ull; > + > + /* Note that neither overflow exceptions nor inexact exceptions > + are desired. This lets us streamline the checks quite a bit. */ > + shift = exp - 1023 - 52; > + if (shift >= 0) { > + /* In this case the number is so large that we must shift > + the fraction left. There is no rounding to do. */ > + if (shift < 63) { > + ret = frac << shift; > + if ((ret >> shift) != frac) > + exc = float_flag_overflow; > + } > + } else { > + uint64_t round; > + > + /* In this case the number is smaller than the fraction as > + represented by the 52 bit number. Here we must think > + about rounding the result. Handle this by shifting the > + fractional part of the number into the high bits of ROUND. > + This will let us efficiently handle round-to-nearest. */ > + shift = -shift; > + if (shift < 63) { > + ret = frac >> shift; > + round = frac << (64 - shift); > + } else { > + /* The exponent is so small we shift out everything. > + Leave a sticky bit for proper rounding below. */ > + do_underflow: > + round = 1; > + } > + > + if (round) { > + exc = float_flag_inexact; > + switch (FP_STATUS.float_rounding_mode) { > + case float_round_nearest_even: > + if (round == (1ull << 63)) { > + /* Fraction is exactly 0.5; round to even. */ > + ret += (ret & 1); > + } else if (round > (1ull << 63)) { > + ret += 1; > + } > + break; > + case float_round_to_zero: > + break; > + case float_round_up: > + if (!sign) > + ret += 1; > + break; > + case float_round_down: > + if (sign) > + ret += 1; > + break; > + } > + } > + } > + if (sign) > + ret = -ret; > + } > + if (unlikely(exc)) > + float_raise(exc, &FP_STATUS); > + > + return ret; > +} > + > +uint64_t helper_cvttq (uint64_t a, uint32_t quals) > +{ > + uint64_t ret; > + uint32_t token; > + > + /* ??? There's an arugument to be made that when /S is enabled, we > + should provide the standard IEEE saturated result, instead of > + the truncated result that we *must* provide when /V is disabled. > + However, that's not how either the Tru64 or Linux completion > + handlers actually work, and GCC knows it. */ > + > + token = begin_fp(quals); > + a = remap_ieee_input(quals, a); > + ret = cvttq_internal(a); > + end_fp(quals, token); > + > + return ret; > } > > -uint64_t helper_cvtqt (uint64_t a) > +uint64_t helper_cvtqt (uint64_t a, uint32_t quals) > { > - float64 fr = int64_to_float64(a, &FP_STATUS); > + float64 fr; > + uint32_t token; > + > + token = begin_fp(quals); > + fr = int64_to_float64(a, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_t(fr); > } > > -uint64_t helper_cvtqf (uint64_t a) > +uint64_t helper_cvtqf (uint64_t a, uint32_t quals) > { > - float32 fr = int64_to_float32(a, &FP_STATUS); > + float32 fr; > + uint32_t token; > + > + token = begin_fp(quals); > + fr = int64_to_float32(a, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_f(fr); > } > > -uint64_t helper_cvtgf (uint64_t a) > +uint64_t helper_cvtgf (uint64_t a, uint32_t quals) > { > float64 fa; > float32 fr; > + uint32_t token; > > fa = g_to_float64(a); > + > + token = begin_fp(quals); > fr = float64_to_float32(fa, &FP_STATUS); > + end_fp(quals, token); > + > return float32_to_f(fr); > } > > -uint64_t helper_cvtgq (uint64_t a) > +uint64_t helper_cvtgq (uint64_t a, uint32_t quals) > { > - float64 fa = g_to_float64(a); > - return float64_to_int64_round_to_zero(fa, &FP_STATUS); > + float64 fa; > + uint64_t ret; > + uint32_t token; > + > + fa = g_to_float64(a); > + > + token = begin_fp(quals); > + ret = float64_to_int64(fa, &FP_STATUS); > + end_fp(quals, token); > + > + return ret; > } > > -uint64_t helper_cvtqg (uint64_t a) > +uint64_t helper_cvtqg (uint64_t a, uint32_t quals) > { > float64 fr; > + uint32_t token; > + > + token = begin_fp(quals); > fr = int64_to_float64(a, &FP_STATUS); > + end_fp(quals, token); > + > return float64_to_g(fr); > } > > @@ -979,35 +1397,24 @@ uint64_t helper_cvtlq (uint64_t a) > return (lo & 0x3FFFFFFF) | (hi & 0xc0000000); > } > > -static inline uint64_t __helper_cvtql(uint64_t a, int s, int v) > -{ > - uint64_t r; > - > - r = ((uint64_t)(a & 0xC0000000)) << 32; > - r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29; > - > - if (v && (int64_t)((int32_t)r) != (int64_t)r) { > - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); > - } > - if (s) { > - /* TODO */ > - } > - return r; > -} > - > uint64_t helper_cvtql (uint64_t a) > { > - return __helper_cvtql(a, 0, 0); > + return ((a & 0xC0000000) << 32) | ((a & 0x7FFFFFFF) << 29); > } > > uint64_t helper_cvtqlv (uint64_t a) > { > - return __helper_cvtql(a, 0, 1); > + if ((int32_t)a != (int64_t)a) > + helper_excp(EXCP_ARITH, EXC_M_IOV); > + return helper_cvtql(a); > } > > uint64_t helper_cvtqlsv (uint64_t a) > { > - return __helper_cvtql(a, 1, 1); > + /* ??? I'm pretty sure there's nothing that /sv needs to do that /v > + doesn't do. The only thing I can think is that /sv is a valid > + instruction merely for completeness in the ISA. */ > + return helper_cvtqlv(a); > } > > /* PALcode support special instructions */ > diff --git a/target-alpha/translate.c b/target-alpha/translate.c > index 45cb697..e0ca0ed 100644 > --- a/target-alpha/translate.c > +++ b/target-alpha/translate.c > @@ -442,81 +442,79 @@ static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int rc) > gen_set_label(l1); > } > > -#define FARITH2(name) \ > -static inline void glue(gen_f, name)(int rb, int rc) \ > -{ \ > - if (unlikely(rc == 31)) \ > - return; \ > - \ > - if (rb != 31) \ > - gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]); \ > - else { \ > - TCGv tmp = tcg_const_i64(0); \ > - gen_helper_ ## name (cpu_fir[rc], tmp); \ > - tcg_temp_free(tmp); \ > - } \ > +#define FARITH2(name) \ > +static inline void glue(gen_f, name)(int rb, int rc) \ > +{ \ > + if (unlikely(rc == 31)) \ > + return; \ > + \ > + if (rb != 31) \ > + gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]); \ > + else { \ > + TCGv tmp = tcg_const_i64(0); \ > + gen_helper_ ## name (cpu_fir[rc], tmp); \ > + tcg_temp_free(tmp); \ > + } \ > } > -FARITH2(sqrts) > -FARITH2(sqrtf) > -FARITH2(sqrtg) > -FARITH2(sqrtt) > -FARITH2(cvtgf) > -FARITH2(cvtgq) > -FARITH2(cvtqf) > -FARITH2(cvtqg) > -FARITH2(cvtst) > -FARITH2(cvtts) > -FARITH2(cvttq) > -FARITH2(cvtqs) > -FARITH2(cvtqt) > FARITH2(cvtlq) > FARITH2(cvtql) > FARITH2(cvtqlv) > FARITH2(cvtqlsv) > > -#define FARITH3(name) \ > -static inline void glue(gen_f, name)(int ra, int rb, int rc) \ > -{ \ > - if (unlikely(rc == 31)) \ > - return; \ > - \ > - if (ra != 31) { \ > - if (rb != 31) \ > - gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], cpu_fir[rb]); \ > - else { \ > - TCGv tmp = tcg_const_i64(0); \ > - gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], tmp); \ > - tcg_temp_free(tmp); \ > - } \ > - } else { \ > - TCGv tmp = tcg_const_i64(0); \ > - if (rb != 31) \ > - gen_helper_ ## name (cpu_fir[rc], tmp, cpu_fir[rb]); \ > - else \ > - gen_helper_ ## name (cpu_fir[rc], tmp, tmp); \ > - tcg_temp_free(tmp); \ > - } \ > +#define QFARITH2(name) \ > +static inline void glue(gen_f, name)(int rb, int rc, int opc) \ > +{ \ > + TCGv_i32 quals; \ > + if (unlikely(rc == 31)) \ > + return; \ > + quals = tcg_const_i32(opc & ~0x3f); \ > + if (rb != 31) \ > + gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb], quals); \ > + else { \ > + TCGv tmp = tcg_const_i64(0); \ > + gen_helper_ ## name (cpu_fir[rc], tmp, quals); \ > + tcg_temp_free(tmp); \ > + } \ > + tcg_temp_free_i32(quals); \ > +} > +QFARITH2(sqrts) > +QFARITH2(sqrtf) > +QFARITH2(sqrtg) > +QFARITH2(sqrtt) > +QFARITH2(cvtgf) > +QFARITH2(cvtgq) > +QFARITH2(cvtqf) > +QFARITH2(cvtqg) > +QFARITH2(cvtst) > +QFARITH2(cvtts) > +QFARITH2(cvttq) > +QFARITH2(cvtqs) > +QFARITH2(cvtqt) > + > +#define FARITH3(name) \ > +static inline void glue(gen_f, name)(int ra, int rb, int rc) \ > +{ \ > + TCGv zero, ta, tb; \ > + if (unlikely(rc == 31)) \ > + return; \ > + ta = cpu_fir[ra]; \ > + tb = cpu_fir[rb]; \ > + if (unlikely(ra == 31)) { \ > + zero = tcg_const_i64(0); \ > + ta = zero; \ > + } \ > + if (unlikely(rb == 31)) { \ > + if (ra != 31) \ > + zero = tcg_const_i64(0); \ > + tb = zero; \ > + } \ > + gen_helper_ ## name (cpu_fir[rc], ta, tb); \ > + if (ra == 31 || rb == 31) \ > + tcg_temp_free(zero); \ > } > - > -FARITH3(addf) > -FARITH3(subf) > -FARITH3(mulf) > -FARITH3(divf) > -FARITH3(addg) > -FARITH3(subg) > -FARITH3(mulg) > -FARITH3(divg) > FARITH3(cmpgeq) > FARITH3(cmpglt) > FARITH3(cmpgle) > -FARITH3(adds) > -FARITH3(subs) > -FARITH3(muls) > -FARITH3(divs) > -FARITH3(addt) > -FARITH3(subt) > -FARITH3(mult) > -FARITH3(divt) > FARITH3(cmptun) > FARITH3(cmpteq) > FARITH3(cmptlt) > @@ -525,6 +523,47 @@ FARITH3(cpys) > FARITH3(cpysn) > FARITH3(cpyse) > > +#define QFARITH3(name) \ > +static inline void glue(gen_f, name)(int ra, int rb, int rc, int opc) \ > +{ \ > + TCGv zero, ta, tb; \ > + TCGv_i32 quals; \ > + if (unlikely(rc == 31)) \ > + return; \ > + ta = cpu_fir[ra]; \ > + tb = cpu_fir[rb]; \ > + if (unlikely(ra == 31)) { \ > + zero = tcg_const_i64(0); \ > + ta = zero; \ > + } \ > + if (unlikely(rb == 31)) { \ > + if (ra != 31) \ > + zero = tcg_const_i64(0); \ > + tb = zero; \ > + } \ > + quals = tcg_const_i32(opc & ~0x3f); \ > + gen_helper_ ## name (cpu_fir[rc], ta, tb, quals); \ > + tcg_temp_free_i32(quals); \ > + if (ra == 31 || rb == 31) \ > + tcg_temp_free(zero); \ > +} > +QFARITH3(addf) > +QFARITH3(subf) > +QFARITH3(mulf) > +QFARITH3(divf) > +QFARITH3(addg) > +QFARITH3(subg) > +QFARITH3(mulg) > +QFARITH3(divg) > +QFARITH3(adds) > +QFARITH3(subs) > +QFARITH3(muls) > +QFARITH3(divs) > +QFARITH3(addt) > +QFARITH3(subt) > +QFARITH3(mult) > +QFARITH3(divt) > + > static inline uint64_t zapnot_mask(uint8_t lit) > { > uint64_t mask = 0; > @@ -1607,7 +1646,7 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) > } > break; > case 0x14: > - switch (fpfn) { /* f11 & 0x3F */ > + switch (fpfn) { /* fn11 & 0x3F */ > case 0x04: > /* ITOFS */ > if (!(ctx->amask & AMASK_FIX)) > @@ -1626,13 +1665,13 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) > /* SQRTF */ > if (!(ctx->amask & AMASK_FIX)) > goto invalid_opc; > - gen_fsqrtf(rb, rc); > + gen_fsqrtf(rb, rc, fn11); > break; > case 0x0B: > /* SQRTS */ > if (!(ctx->amask & AMASK_FIX)) > goto invalid_opc; > - gen_fsqrts(rb, rc); > + gen_fsqrts(rb, rc, fn11); > break; > case 0x14: > /* ITOFF */ > @@ -1663,13 +1702,13 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) > /* SQRTG */ > if (!(ctx->amask & AMASK_FIX)) > goto invalid_opc; > - gen_fsqrtg(rb, rc); > + gen_fsqrtg(rb, rc, fn11); > break; > case 0x02B: > /* SQRTT */ > if (!(ctx->amask & AMASK_FIX)) > goto invalid_opc; > - gen_fsqrtt(rb, rc); > + gen_fsqrtt(rb, rc, fn11); > break; > default: > goto invalid_opc; > @@ -1677,47 +1716,42 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) > break; > case 0x15: > /* VAX floating point */ > - /* XXX: rounding mode and trap are ignored (!) */ > - switch (fpfn) { /* f11 & 0x3F */ > + switch (fpfn) { /* fn11 & 0x3F */ > case 0x00: > /* ADDF */ > - gen_faddf(ra, rb, rc); > + gen_faddf(ra, rb, rc, fn11); > break; > case 0x01: > /* SUBF */ > - gen_fsubf(ra, rb, rc); > + gen_fsubf(ra, rb, rc, fn11); > break; > case 0x02: > /* MULF */ > - gen_fmulf(ra, rb, rc); > + gen_fmulf(ra, rb, rc, fn11); > break; > case 0x03: > /* DIVF */ > - gen_fdivf(ra, rb, rc); > + gen_fdivf(ra, rb, rc, fn11); > break; > case 0x1E: > /* CVTDG */ > -#if 0 // TODO > - gen_fcvtdg(rb, rc); > -#else > + /* TODO */ > goto invalid_opc; > -#endif > - break; > case 0x20: > /* ADDG */ > - gen_faddg(ra, rb, rc); > + gen_faddg(ra, rb, rc, fn11); > break; > case 0x21: > /* SUBG */ > - gen_fsubg(ra, rb, rc); > + gen_fsubg(ra, rb, rc, fn11); > break; > case 0x22: > /* MULG */ > - gen_fmulg(ra, rb, rc); > + gen_fmulg(ra, rb, rc, fn11); > break; > case 0x23: > /* DIVG */ > - gen_fdivg(ra, rb, rc); > + gen_fdivg(ra, rb, rc, fn11); > break; > case 0x25: > /* CMPGEQ */ > @@ -1733,27 +1767,23 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) > break; > case 0x2C: > /* CVTGF */ > - gen_fcvtgf(rb, rc); > + gen_fcvtgf(rb, rc, fn11); > break; > case 0x2D: > /* CVTGD */ > -#if 0 // TODO > - gen_fcvtgd(rb, rc); > -#else > + /* TODO */ > goto invalid_opc; > -#endif > - break; > case 0x2F: > /* CVTGQ */ > - gen_fcvtgq(rb, rc); > + gen_fcvtgq(rb, rc, fn11); > break; > case 0x3C: > /* CVTQF */ > - gen_fcvtqf(rb, rc); > + gen_fcvtqf(rb, rc, fn11); > break; > case 0x3E: > /* CVTQG */ > - gen_fcvtqg(rb, rc); > + gen_fcvtqg(rb, rc, fn11); > break; > default: > goto invalid_opc; > @@ -1761,39 +1791,38 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) > break; > case 0x16: > /* IEEE floating-point */ > - /* XXX: rounding mode and traps are ignored (!) */ > - switch (fpfn) { /* f11 & 0x3F */ > + switch (fpfn) { /* fn11 & 0x3F */ > case 0x00: > /* ADDS */ > - gen_fadds(ra, rb, rc); > + gen_fadds(ra, rb, rc, fn11); > break; > case 0x01: > /* SUBS */ > - gen_fsubs(ra, rb, rc); > + gen_fsubs(ra, rb, rc, fn11); > break; > case 0x02: > /* MULS */ > - gen_fmuls(ra, rb, rc); > + gen_fmuls(ra, rb, rc, fn11); > break; > case 0x03: > /* DIVS */ > - gen_fdivs(ra, rb, rc); > + gen_fdivs(ra, rb, rc, fn11); > break; > case 0x20: > /* ADDT */ > - gen_faddt(ra, rb, rc); > + gen_faddt(ra, rb, rc, fn11); > break; > case 0x21: > /* SUBT */ > - gen_fsubt(ra, rb, rc); > + gen_fsubt(ra, rb, rc, fn11); > break; > case 0x22: > /* MULT */ > - gen_fmult(ra, rb, rc); > + gen_fmult(ra, rb, rc, fn11); > break; > case 0x23: > /* DIVT */ > - gen_fdivt(ra, rb, rc); > + gen_fdivt(ra, rb, rc, fn11); > break; > case 0x24: > /* CMPTUN */ > @@ -1812,26 +1841,25 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) > gen_fcmptle(ra, rb, rc); > break; > case 0x2C: > - /* XXX: incorrect */ > if (fn11 == 0x2AC || fn11 == 0x6AC) { > /* CVTST */ > - gen_fcvtst(rb, rc); > + gen_fcvtst(rb, rc, fn11); > } else { > /* CVTTS */ > - gen_fcvtts(rb, rc); > + gen_fcvtts(rb, rc, fn11); > } > break; > case 0x2F: > /* CVTTQ */ > - gen_fcvttq(rb, rc); > + gen_fcvttq(rb, rc, fn11); > break; > case 0x3C: > /* CVTQS */ > - gen_fcvtqs(rb, rc); > + gen_fcvtqs(rb, rc, fn11); > break; > case 0x3E: > /* CVTQT */ > - gen_fcvtqt(rb, rc); > + gen_fcvtqt(rb, rc, fn11); > break; > default: > goto invalid_opc;
On 12/24/2009 07:18 AM, Aurelien Jarno wrote: > My main concern about this patch is that I don't really understand why > the current fp exceptions, the current rounding mode or flush_to_zero > mode are stored in FP_STATUS. I think it would be better to have > dedicated variable(s) in the cpu state structure, as it is done in other > emulated architectures. The internal format for these features is quite different (in terms of bit ordering) than the native Alpha format. If I were to be forcing the bits back into the Alpha format after each operation, that would be far more shifting and masking than only doing the conversion to Alpha format when the FPCR register is read or written. At present I'm only saving a value and ORing it back in However, if you're convinced I should keep things in Alpha format all the time, I can rearrange the patch along those lines. r~
On Mon, Dec 28, 2009 at 11:06:20AM -0800, Richard Henderson wrote: > On 12/24/2009 07:18 AM, Aurelien Jarno wrote: >> My main concern about this patch is that I don't really understand why >> the current fp exceptions, the current rounding mode or flush_to_zero >> mode are stored in FP_STATUS. I think it would be better to have >> dedicated variable(s) in the cpu state structure, as it is done in other >> emulated architectures. > > The internal format for these features is quite different (in terms of > bit ordering) than the native Alpha format. If I were to be forcing the > bits back into the Alpha format after each operation, that would be far > more shifting and masking than only doing the conversion to Alpha format > when the FPCR register is read or written. At present I'm only saving a > value and ORing it back in > > However, if you're convinced I should keep things in Alpha format all > the time, I can rearrange the patch along those lines. > If the format is too different, you should still use a dedicated variable in the cpu state. Thinking more, there is no guarantee that FP_STATUS is not overriden by another thread (or another CPU in system mode), if it's value is set in one TB, and read back in another TB. Also there is no need to save FP_STATUS and restore it. It should be set to 0, and the result should be ORed with the dedicated variable.
On Mon, Dec 28, 2009 at 08:48:22PM +0100, Aurelien Jarno wrote: > On Mon, Dec 28, 2009 at 11:06:20AM -0800, Richard Henderson wrote: > > On 12/24/2009 07:18 AM, Aurelien Jarno wrote: > >> My main concern about this patch is that I don't really understand why > >> the current fp exceptions, the current rounding mode or flush_to_zero > >> mode are stored in FP_STATUS. I think it would be better to have > >> dedicated variable(s) in the cpu state structure, as it is done in other > >> emulated architectures. > > > > The internal format for these features is quite different (in terms of > > bit ordering) than the native Alpha format. If I were to be forcing the > > bits back into the Alpha format after each operation, that would be far > > more shifting and masking than only doing the conversion to Alpha format > > when the FPCR register is read or written. At present I'm only saving a > > value and ORing it back in > > > > However, if you're convinced I should keep things in Alpha format all > > the time, I can rearrange the patch along those lines. > > > > If the format is too different, you should still use a dedicated > variable in the cpu state. Thinking more, there is no guarantee that > FP_STATUS is not overriden by another thread (or another CPU in system > mode), if it's value is set in one TB, and read back in another TB. Forget that part, there is actually one FP_STATUS per CPU, so it should work. My grep didn't work due to the #define... > Also there is no need to save FP_STATUS and restore it. It should be set > to 0, and the result should be ORed with the dedicated variable. > > -- > Aurelien Jarno GPG: 1024D/F1BCDB73 > aurelien@aurel32.net http://www.aurel32.net
On Mon, Dec 28, 2009 at 08:52:28PM +0100, Aurelien Jarno wrote: > On Mon, Dec 28, 2009 at 08:48:22PM +0100, Aurelien Jarno wrote: > > On Mon, Dec 28, 2009 at 11:06:20AM -0800, Richard Henderson wrote: > > > On 12/24/2009 07:18 AM, Aurelien Jarno wrote: > > >> My main concern about this patch is that I don't really understand why > > >> the current fp exceptions, the current rounding mode or flush_to_zero > > >> mode are stored in FP_STATUS. I think it would be better to have > > >> dedicated variable(s) in the cpu state structure, as it is done in other > > >> emulated architectures. > > > > > > The internal format for these features is quite different (in terms of > > > bit ordering) than the native Alpha format. If I were to be forcing the > > > bits back into the Alpha format after each operation, that would be far > > > more shifting and masking than only doing the conversion to Alpha format > > > when the FPCR register is read or written. At present I'm only saving a > > > value and ORing it back in > > > > > > However, if you're convinced I should keep things in Alpha format all > > > the time, I can rearrange the patch along those lines. > > > > > > > If the format is too different, you should still use a dedicated > > variable in the cpu state. Thinking more, there is no guarantee that > > FP_STATUS is not overriden by another thread (or another CPU in system > > mode), if it's value is set in one TB, and read back in another TB. > > Forget that part, there is actually one FP_STATUS per CPU, so it should > work. My grep didn't work due to the #define... > > > Also there is no need to save FP_STATUS and restore it. It should be set > > to 0, and the result should be ORed with the dedicated variable. > > To make it more clear, what I call save FP_STATUS is all the begin_* functions. They should be one or more variable added into the CPU state to hold those values. For example begin_fp_exception can then be replaced by a simple: set_float_exception_flags(0, &FP_STATUS); And then end_fp_exception the mask and shift operation can be replaced by: env->fp_exceptions |= FP_STATUS.float_exception_flags; There is no need to call set_float_exception_flags(). When reading the FPCR register, the value should be computed from env->fp_exceptions instead. Similarly there is no need to save or switch back to the default roundmode or flush_to_zero after and FP instruction, as long as all FP instructions set them before the actual FP code. It seems to be the case with your patch anyway.
I've split up the FPCR as requested by Aurelien. We no longer set anything in FP_STATUS after the execution of the operation, only copy data from FP_STATUS to some env->fpcr field. I have totally rewritten the patch to be more along the line that Laurent was suggesting, in that the rounding mode and other qualifiers are totally parsed within the translator. I no longer pass the FN11 field to the helper functions. Unlike Laurent's prototype, I do not set the rounding mode at every FP instruction; I remember the previous setting of the rounding mode within a TB. Similarly for the flush-to-zero field. I do not handle VAX instructions at all. The existing VAX support is mostly broken, and I didn't feel like compounding the problem. r~ -- Richard Henderson (6): target-alpha: Fix gdb access to fpcr and unique. target-alpha: Split up FPCR value into separate fields. target-alpha: Reduce internal processor registers for user-mode. target-alpha: Clean up arithmetic traps. target-alpha: Mark helper_excp as NORETURN. target-alpha: Implement IEEE FP qualifiers.
Ping? r~ On 01/04/2010 02:46 PM, Richard Henderson wrote: > I've split up the FPCR as requested by Aurelien. We no longer > set anything in FP_STATUS after the execution of the operation, > only copy data from FP_STATUS to some env->fpcr field. > > I have totally rewritten the patch to be more along the line > that Laurent was suggesting, in that the rounding mode and other > qualifiers are totally parsed within the translator. I no longer > pass the FN11 field to the helper functions. > > Unlike Laurent's prototype, I do not set the rounding mode at > every FP instruction; I remember the previous setting of the > rounding mode within a TB. Similarly for the flush-to-zero field. > > I do not handle VAX instructions at all. The existing VAX support > is mostly broken, and I didn't feel like compounding the problem. > > > r~ > > > -- > Richard Henderson (6): > target-alpha: Fix gdb access to fpcr and unique. > target-alpha: Split up FPCR value into separate fields. > target-alpha: Reduce internal processor registers for user-mode. > target-alpha: Clean up arithmetic traps. > target-alpha: Mark helper_excp as NORETURN. > target-alpha: Implement IEEE FP qualifiers. > > >
Ping 2. r~ On 01/04/2010 02:46 PM, Richard Henderson wrote: > I've split up the FPCR as requested by Aurelien. We no longer > set anything in FP_STATUS after the execution of the operation, > only copy data from FP_STATUS to some env->fpcr field. > > I have totally rewritten the patch to be more along the line > that Laurent was suggesting, in that the rounding mode and other > qualifiers are totally parsed within the translator. I no longer > pass the FN11 field to the helper functions. > > Unlike Laurent's prototype, I do not set the rounding mode at > every FP instruction; I remember the previous setting of the > rounding mode within a TB. Similarly for the flush-to-zero field. > > I do not handle VAX instructions at all. The existing VAX support > is mostly broken, and I didn't feel like compounding the problem. > > > r~ > > > -- > Richard Henderson (6): > target-alpha: Fix gdb access to fpcr and unique. > target-alpha: Split up FPCR value into separate fields. > target-alpha: Reduce internal processor registers for user-mode. > target-alpha: Clean up arithmetic traps. > target-alpha: Mark helper_excp as NORETURN. > target-alpha: Implement IEEE FP qualifiers. > > >
On Mon, Jan 04, 2010 at 02:46:05PM -0800, Richard Henderson wrote: > I've split up the FPCR as requested by Aurelien. We no longer > set anything in FP_STATUS after the execution of the operation, > only copy data from FP_STATUS to some env->fpcr field. > > I have totally rewritten the patch to be more along the line > that Laurent was suggesting, in that the rounding mode and other > qualifiers are totally parsed within the translator. I no longer > pass the FN11 field to the helper functions. > What's the benefit of doing that? I don't say it's wrong, I just want to understand. Otherwise the patch looks good, so it can probably be applied without any change. In the meanwhile, I have applied patches 1 to 5.
On 02/23/2010 02:58 PM, Aurelien Jarno wrote: >> I have totally rewritten the patch to be more along the line >> that Laurent was suggesting, in that the rounding mode and other >> qualifiers are totally parsed within the translator. I no longer >> pass the FN11 field to the helper functions. > > What's the benefit of doing that? I don't say it's wrong, I just want > to understand. Otherwise the patch looks good, so it can probably be > applied without any change. I seem to recall Laurent opining that doing the interpretation of the opcode in two different places was less than clean, and in the end I agree with him. FWIW, this configuration would also be compatible with a future TCG enhancement to generate fp code, whereas the first config would not. r~
On Wed, Feb 24, 2010 at 12:24:55PM +0100, Richard Henderson wrote: > On 02/23/2010 02:58 PM, Aurelien Jarno wrote: > >>I have totally rewritten the patch to be more along the line > >>that Laurent was suggesting, in that the rounding mode and other > >>qualifiers are totally parsed within the translator. I no longer > >>pass the FN11 field to the helper functions. > > > >What's the benefit of doing that? I don't say it's wrong, I just want > >to understand. Otherwise the patch looks good, so it can probably be > >applied without any change. > > I seem to recall Laurent opining that doing the interpretation > of the opcode in two different places was less than clean, and > in the end I agree with him. > > FWIW, this configuration would also be compatible with a > future TCG enhancement to generate fp code, whereas the first > config would not. I have applied the patch, but in order to avoid doing the same for all targets, it might be a good idea to directly provide TCG functions to modify FP_STATUS instead of using the interface from softfloat.h. This would also have the advantage of clearly defining this interface, and make sure that the alpha target is not broken by a change in softfloat.h.
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h index c0dff4b..c1c0470 100644 --- a/target-alpha/cpu.h +++ b/target-alpha/cpu.h @@ -430,9 +430,13 @@ enum { }; /* Arithmetic exception */ -enum { - EXCP_ARITH_OVERFLOW, -}; +#define EXC_M_IOV (1<<16) /* Integer Overflow */ +#define EXC_M_INE (1<<15) /* Inexact result */ +#define EXC_M_UNF (1<<14) /* Underflow */ +#define EXC_M_FOV (1<<13) /* Overflow */ +#define EXC_M_DZE (1<<12) /* Division by zero */ +#define EXC_M_INV (1<<11) /* Invalid operation */ +#define EXC_M_SWC (1<<10) /* Software completion */ enum { IR_V0 = 0, diff --git a/target-alpha/helper.c b/target-alpha/helper.c index be7d37b..94821bd 100644 --- a/target-alpha/helper.c +++ b/target-alpha/helper.c @@ -27,41 +27,13 @@ uint64_t cpu_alpha_load_fpcr (CPUState *env) { - uint64_t ret = 0; - int flags, mask; - - flags = env->fp_status.float_exception_flags; - ret |= (uint64_t) flags << 52; - if (flags) - ret |= FPCR_SUM; - env->ipr[IPR_EXC_SUM] &= ~0x3E; - env->ipr[IPR_EXC_SUM] |= flags << 1; - - mask = env->fp_status.float_exception_mask; - if (mask & float_flag_invalid) - ret |= FPCR_INVD; - if (mask & float_flag_divbyzero) - ret |= FPCR_DZED; - if (mask & float_flag_overflow) - ret |= FPCR_OVFD; - if (mask & float_flag_underflow) - ret |= FPCR_UNFD; - if (mask & float_flag_inexact) - ret |= FPCR_INED; - - switch (env->fp_status.float_rounding_mode) { - case float_round_nearest_even: - ret |= 2ULL << FPCR_DYN_SHIFT; - break; - case float_round_down: - ret |= 1ULL << FPCR_DYN_SHIFT; - break; - case float_round_up: - ret |= 3ULL << FPCR_DYN_SHIFT; - break; - case float_round_to_zero: - break; - } + uint64_t ret = env->fp_status.float_exception_flags; + + if (ret) + ret = FPCR_SUM | (ret << 52); + + ret |= env->fpcr & ~(FPCR_SUM | FPCR_STATUS_MASK); + return ret; } @@ -69,6 +41,8 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val) { int round_mode, mask; + env->fpcr = val; + set_float_exception_flags((val >> 52) & 0x3F, &env->fp_status); mask = 0; @@ -86,6 +60,7 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val) switch ((val >> FPCR_DYN_SHIFT) & 3) { case 0: + default: round_mode = float_round_to_zero; break; case 1: @@ -100,6 +75,11 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val) break; } set_float_rounding_mode(round_mode, &env->fp_status); + + mask = 0; + if ((val & (FPCR_UNDZ|FPCR_UNFD)) == (FPCR_UNDZ|FPCR_UNFD)) + mask = 1; + set_flush_to_zero(mask, &env->fp_status); } #if defined(CONFIG_USER_ONLY) diff --git a/target-alpha/helper.h b/target-alpha/helper.h index bedd3c0..1521a84 100644 --- a/target-alpha/helper.h +++ b/target-alpha/helper.h @@ -41,33 +41,33 @@ DEF_HELPER_1(store_fpcr, void, i64) DEF_HELPER_1(f_to_memory, i32, i64) DEF_HELPER_1(memory_to_f, i64, i32) -DEF_HELPER_2(addf, i64, i64, i64) -DEF_HELPER_2(subf, i64, i64, i64) -DEF_HELPER_2(mulf, i64, i64, i64) -DEF_HELPER_2(divf, i64, i64, i64) -DEF_HELPER_1(sqrtf, i64, i64) +DEF_HELPER_3(addf, i64, i64, i64, i32) +DEF_HELPER_3(subf, i64, i64, i64, i32) +DEF_HELPER_3(mulf, i64, i64, i64, i32) +DEF_HELPER_3(divf, i64, i64, i64, i32) +DEF_HELPER_2(sqrtf, i64, i64, i32) DEF_HELPER_1(g_to_memory, i64, i64) DEF_HELPER_1(memory_to_g, i64, i64) -DEF_HELPER_2(addg, i64, i64, i64) -DEF_HELPER_2(subg, i64, i64, i64) -DEF_HELPER_2(mulg, i64, i64, i64) -DEF_HELPER_2(divg, i64, i64, i64) -DEF_HELPER_1(sqrtg, i64, i64) +DEF_HELPER_3(addg, i64, i64, i64, i32) +DEF_HELPER_3(subg, i64, i64, i64, i32) +DEF_HELPER_3(mulg, i64, i64, i64, i32) +DEF_HELPER_3(divg, i64, i64, i64, i32) +DEF_HELPER_2(sqrtg, i64, i64, i32) DEF_HELPER_1(s_to_memory, i32, i64) DEF_HELPER_1(memory_to_s, i64, i32) -DEF_HELPER_2(adds, i64, i64, i64) -DEF_HELPER_2(subs, i64, i64, i64) -DEF_HELPER_2(muls, i64, i64, i64) -DEF_HELPER_2(divs, i64, i64, i64) -DEF_HELPER_1(sqrts, i64, i64) - -DEF_HELPER_2(addt, i64, i64, i64) -DEF_HELPER_2(subt, i64, i64, i64) -DEF_HELPER_2(mult, i64, i64, i64) -DEF_HELPER_2(divt, i64, i64, i64) -DEF_HELPER_1(sqrtt, i64, i64) +DEF_HELPER_3(adds, i64, i64, i64, i32) +DEF_HELPER_3(subs, i64, i64, i64, i32) +DEF_HELPER_3(muls, i64, i64, i64, i32) +DEF_HELPER_3(divs, i64, i64, i64, i32) +DEF_HELPER_2(sqrts, i64, i64, i32) + +DEF_HELPER_3(addt, i64, i64, i64, i32) +DEF_HELPER_3(subt, i64, i64, i64, i32) +DEF_HELPER_3(mult, i64, i64, i64, i32) +DEF_HELPER_3(divt, i64, i64, i64, i32) +DEF_HELPER_2(sqrtt, i64, i64, i32) DEF_HELPER_2(cmptun, i64, i64, i64) DEF_HELPER_2(cmpteq, i64, i64, i64) @@ -81,15 +81,15 @@ DEF_HELPER_2(cpys, i64, i64, i64) DEF_HELPER_2(cpysn, i64, i64, i64) DEF_HELPER_2(cpyse, i64, i64, i64) -DEF_HELPER_1(cvtts, i64, i64) -DEF_HELPER_1(cvtst, i64, i64) -DEF_HELPER_1(cvttq, i64, i64) -DEF_HELPER_1(cvtqs, i64, i64) -DEF_HELPER_1(cvtqt, i64, i64) -DEF_HELPER_1(cvtqf, i64, i64) -DEF_HELPER_1(cvtgf, i64, i64) -DEF_HELPER_1(cvtgq, i64, i64) -DEF_HELPER_1(cvtqg, i64, i64) +DEF_HELPER_2(cvtts, i64, i64, i32) +DEF_HELPER_2(cvtst, i64, i64, i32) +DEF_HELPER_2(cvttq, i64, i64, i32) +DEF_HELPER_2(cvtqs, i64, i64, i32) +DEF_HELPER_2(cvtqt, i64, i64, i32) +DEF_HELPER_2(cvtqf, i64, i64, i32) +DEF_HELPER_2(cvtgf, i64, i64, i32) +DEF_HELPER_2(cvtgq, i64, i64, i32) +DEF_HELPER_2(cvtqg, i64, i64, i32) DEF_HELPER_1(cvtlq, i64, i64) DEF_HELPER_1(cvtql, i64, i64) DEF_HELPER_1(cvtqlv, i64, i64) diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c index b2abf6c..2d1c3d5 100644 --- a/target-alpha/op_helper.c +++ b/target-alpha/op_helper.c @@ -24,7 +24,7 @@ /*****************************************************************************/ /* Exceptions processing helpers */ -void helper_excp (int excp, int error) +void QEMU_NORETURN helper_excp (int excp, int error) { env->exception_index = excp; env->error_code = error; @@ -78,7 +78,7 @@ uint64_t helper_addqv (uint64_t op1, uint64_t op2) uint64_t tmp = op1; op1 += op2; if (unlikely((tmp ^ op2 ^ (-1ULL)) & (tmp ^ op1) & (1ULL << 63))) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return op1; } @@ -88,7 +88,7 @@ uint64_t helper_addlv (uint64_t op1, uint64_t op2) uint64_t tmp = op1; op1 = (uint32_t)(op1 + op2); if (unlikely((tmp ^ op2 ^ (-1UL)) & (tmp ^ op1) & (1UL << 31))) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return op1; } @@ -98,7 +98,7 @@ uint64_t helper_subqv (uint64_t op1, uint64_t op2) uint64_t res; res = op1 - op2; if (unlikely((op1 ^ op2) & (res ^ op1) & (1ULL << 63))) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return res; } @@ -108,7 +108,7 @@ uint64_t helper_sublv (uint64_t op1, uint64_t op2) uint32_t res; res = op1 - op2; if (unlikely((op1 ^ op2) & (res ^ op1) & (1UL << 31))) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return res; } @@ -118,7 +118,7 @@ uint64_t helper_mullv (uint64_t op1, uint64_t op2) int64_t res = (int64_t)op1 * (int64_t)op2; if (unlikely((int32_t)res != res)) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return (int64_t)((int32_t)res); } @@ -130,7 +130,7 @@ uint64_t helper_mulqv (uint64_t op1, uint64_t op2) muls64(&tl, &th, op1, op2); /* If th != 0 && th != -1, then we had an overflow */ if (unlikely((th + 1) > 1)) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); + helper_excp(EXCP_ARITH, EXC_M_IOV); } return tl; } @@ -370,8 +370,175 @@ uint64_t helper_unpkbw (uint64_t op1) /* Floating point helpers */ +/* ??? Not implemented is setting EXC_MASK, containing a bitmask of + destination registers of instructions that have caused arithmetic + traps. Not needed for userspace emulation, or for complete + emulation of the entire fpu stack within qemu. But we would need + it to invoke a guest kernel's entArith trap handler properly. + + It would be possible to encode the FP destination register in the + QUAL parameter for the FPU helpers below; additional changes would + be required for ADD/V et al above. */ + +#define QUAL_RM_N 0x080 /* Round mode nearest even */ +#define QUAL_RM_C 0x000 /* Round mode chopped */ +#define QUAL_RM_M 0x040 /* Round mode minus infinity */ +#define QUAL_RM_D 0x0c0 /* Round mode dynamic */ +#define QUAL_RM_MASK 0x0c0 + +#define QUAL_U 0x100 /* Underflow enable (fp output) */ +#define QUAL_V 0x100 /* Overflow enable (int output) */ +#define QUAL_S 0x400 /* Software completion enable */ +#define QUAL_I 0x200 /* Inexact detection enable */ + +/* If the floating-point qualifiers specified a rounding mode, + set that rounding mode and remember the original mode for + resetting at the end of the instruction. */ +static inline uint32_t begin_fp_roundmode(uint32_t qual) +{ + uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = rm; + + switch (qual & QUAL_RM_MASK) { + default: + case QUAL_RM_N: + rm = float_round_nearest_even; + break; + case QUAL_RM_C: + rm = float_round_to_zero; + break; + case QUAL_RM_M: + rm = float_round_down; + break; + case QUAL_RM_D: + return old_rm; + } + if (old_rm != rm) + set_float_rounding_mode(rm, &FP_STATUS); + return old_rm; +} + +/* Zero the exception flags so that we can determine if the current + instruction raises any exceptions. Save the old acrued exception + status so that we can restore them at the end of the insn. */ +static inline uint32_t begin_fp_exception(void) +{ + uint32_t old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8; + set_float_exception_flags(0, &FP_STATUS); + return old_exc; +} + +static inline uint32_t begin_fp_flush_to_zero(uint32_t quals) +{ + /* If underflow detection is disabled, silently flush to zero. + Note that flush-to-zero mode may already be enabled via the FPCR. */ + if ((quals & QUAL_U) == 0 && !FP_STATUS.flush_to_zero) { + set_flush_to_zero(1, &FP_STATUS); + return 0x10000; + } + return 0; +} + +/* Begin processing an fp operation. Return a token that should be passed + when completing the fp operation. */ +static uint32_t begin_fp(uint32_t quals) +{ + uint32_t ret = 0; + + ret |= begin_fp_roundmode(quals); + ret |= begin_fp_flush_to_zero(quals); + ret |= begin_fp_exception(); + + return ret; +} + +/* End processing an fp operation. */ + +static inline void end_fp_roundmode(uint32_t orig) +{ + uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = orig & 0xff; + if (unlikely(rm != old_rm)) + set_float_rounding_mode(old_rm, &FP_STATUS); +} + +static inline void end_fp_flush_to_zero(uint32_t orig) +{ + if (orig & 0x10000) + set_flush_to_zero(0, &FP_STATUS); +} + +static void end_fp_exception(uint32_t quals, uint32_t orig) +{ + uint8_t exc = FP_STATUS.float_exception_flags; + + /* If inexact detection is disabled, silently clear it. */ + if ((quals & QUAL_I) == 0) + exc &= ~float_flag_inexact; + + orig = (orig >> 8) & 0xff; + set_float_exception_flags(exc | orig, &FP_STATUS); + + /* Raise an exception as required. */ + if (unlikely(exc)) { + if (quals & QUAL_S) + exc &= ~FP_STATUS.float_exception_mask; + if (exc) { + uint32_t hw_exc = 0; + + if (exc & float_flag_invalid) + hw_exc |= EXC_M_INV; + if (exc & float_flag_divbyzero) + hw_exc |= EXC_M_DZE; + if (exc & float_flag_overflow) + hw_exc |= EXC_M_FOV; + if (exc & float_flag_underflow) + hw_exc |= EXC_M_UNF; + if (exc & float_flag_inexact) + hw_exc |= EXC_M_INE; + + helper_excp(EXCP_ARITH, hw_exc); + } + } +} + +static void end_fp(uint32_t quals, uint32_t orig) +{ + end_fp_roundmode(orig); + end_fp_flush_to_zero(orig); + end_fp_exception(quals, orig); +} + +static uint64_t remap_ieee_input(uint32_t quals, uint64_t a) +{ + uint64_t frac; + uint32_t exp; + + exp = (uint32_t)(a >> 52) & 0x7ff; + frac = a & 0xfffffffffffffull; + + if (exp == 0) { + if (frac != 0) { + /* If DNZ is set, flush denormals to zero on input. */ + if (env->fpcr & FPCR_DNZ) + a = a & (1ull << 63); + /* If software completion not enabled, trap. */ + else if ((quals & QUAL_S) == 0) + helper_excp(EXCP_ARITH, EXC_M_UNF); + } + } else if (exp == 0x7ff) { + /* Infinity or NaN. If software completion is not enabled, trap. + If /s is enabled, we'll properly signal for SNaN on output. */ + /* ??? I'm not sure these exception bit flags are correct. I do + know that the Linux kernel, at least, doesn't rely on them and + just emulates the insn to figure out what exception to use. */ + if ((quals & QUAL_S) == 0) + helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV); + } + + return a; +} + /* F floating (VAX) */ -static inline uint64_t float32_to_f(float32 fa) +static uint64_t float32_to_f(float32 fa) { uint64_t r, exp, mant, sig; CPU_FloatU a; @@ -404,7 +571,7 @@ static inline uint64_t float32_to_f(float32 fa) return r; } -static inline float32 f_to_float32(uint64_t a) +static float32 f_to_float32(uint64_t a) { uint32_t exp, mant_sig; CPU_FloatU r; @@ -447,58 +614,83 @@ uint64_t helper_memory_to_f (uint32_t a) return r; } -uint64_t helper_addf (uint64_t a, uint64_t b) +uint64_t helper_addf (uint64_t a, uint64_t b, uint32_t quals) { float32 fa, fb, fr; + uint32_t token; fa = f_to_float32(a); fb = f_to_float32(b); + + token = begin_fp(quals); fr = float32_add(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float32_to_f(fr); } -uint64_t helper_subf (uint64_t a, uint64_t b) +uint64_t helper_subf (uint64_t a, uint64_t b, uint32_t quals) { float32 fa, fb, fr; + uint32_t token; fa = f_to_float32(a); fb = f_to_float32(b); + + token = begin_fp(quals); fr = float32_sub(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float32_to_f(fr); } -uint64_t helper_mulf (uint64_t a, uint64_t b) +uint64_t helper_mulf (uint64_t a, uint64_t b, uint32_t quals) { float32 fa, fb, fr; + uint32_t token; fa = f_to_float32(a); fb = f_to_float32(b); + + token = begin_fp(quals); fr = float32_mul(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float32_to_f(fr); } -uint64_t helper_divf (uint64_t a, uint64_t b) +uint64_t helper_divf (uint64_t a, uint64_t b, uint32_t quals) { float32 fa, fb, fr; + uint32_t token; fa = f_to_float32(a); fb = f_to_float32(b); + + token = begin_fp(quals); fr = float32_div(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float32_to_f(fr); } -uint64_t helper_sqrtf (uint64_t t) +uint64_t helper_sqrtf (uint64_t t, uint32_t quals) { float32 ft, fr; + uint32_t token; ft = f_to_float32(t); + + token = begin_fp(quals); fr = float32_sqrt(ft, &FP_STATUS); + end_fp(quals, token); + return float32_to_f(fr); } /* G floating (VAX) */ -static inline uint64_t float64_to_g(float64 fa) +static uint64_t float64_to_g(float64 fa) { uint64_t r, exp, mant, sig; CPU_DoubleU a; @@ -531,7 +723,7 @@ static inline uint64_t float64_to_g(float64 fa) return r; } -static inline float64 g_to_float64(uint64_t a) +static float64 g_to_float64(uint64_t a) { uint64_t exp, mant_sig; CPU_DoubleU r; @@ -574,52 +766,77 @@ uint64_t helper_memory_to_g (uint64_t a) return r; } -uint64_t helper_addg (uint64_t a, uint64_t b) +uint64_t helper_addg (uint64_t a, uint64_t b, uint32_t quals) { float64 fa, fb, fr; + uint32_t token; fa = g_to_float64(a); fb = g_to_float64(b); + + token = begin_fp(quals); fr = float64_add(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float64_to_g(fr); } -uint64_t helper_subg (uint64_t a, uint64_t b) +uint64_t helper_subg (uint64_t a, uint64_t b, uint32_t quals) { float64 fa, fb, fr; + uint32_t token; fa = g_to_float64(a); fb = g_to_float64(b); + + token = begin_fp(quals); fr = float64_sub(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float64_to_g(fr); } -uint64_t helper_mulg (uint64_t a, uint64_t b) +uint64_t helper_mulg (uint64_t a, uint64_t b, uint32_t quals) { float64 fa, fb, fr; - + uint32_t token; + fa = g_to_float64(a); fb = g_to_float64(b); + + token = begin_fp(quals); fr = float64_mul(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float64_to_g(fr); } -uint64_t helper_divg (uint64_t a, uint64_t b) +uint64_t helper_divg (uint64_t a, uint64_t b, uint32_t quals) { float64 fa, fb, fr; + uint32_t token; fa = g_to_float64(a); fb = g_to_float64(b); + + token = begin_fp(quals); fr = float64_div(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float64_to_g(fr); } -uint64_t helper_sqrtg (uint64_t a) +uint64_t helper_sqrtg (uint64_t a, uint32_t quals) { float64 fa, fr; + uint32_t token; fa = g_to_float64(a); + + token = begin_fp(quals); fr = float64_sqrt(fa, &FP_STATUS); + end_fp(quals, token); + return float64_to_g(fr); } @@ -627,7 +844,7 @@ uint64_t helper_sqrtg (uint64_t a) /* S floating (single) */ /* Taken from linux/arch/alpha/kernel/traps.c, s_mem_to_reg. */ -static inline uint64_t float32_to_s_int(uint32_t fi) +static uint64_t float32_to_s_int(uint32_t fi) { uint32_t frac = fi & 0x7fffff; uint32_t sign = fi >> 31; @@ -649,7 +866,7 @@ static inline uint64_t float32_to_s_int(uint32_t fi) | ((uint64_t)frac << 29)); } -static inline uint64_t float32_to_s(float32 fa) +static uint64_t float32_to_s(float32 fa) { CPU_FloatU a; a.f = fa; @@ -678,52 +895,77 @@ uint64_t helper_memory_to_s (uint32_t a) return float32_to_s_int(a); } -uint64_t helper_adds (uint64_t a, uint64_t b) +static float32 input_s(uint32_t quals, uint64_t a) +{ + return s_to_float32(remap_ieee_input(quals, a)); +} + +uint64_t helper_adds (uint64_t a, uint64_t b, uint32_t quals) { float32 fa, fb, fr; + uint32_t token; - fa = s_to_float32(a); - fb = s_to_float32(b); + token = begin_fp(quals); + fa = input_s(quals, a); + fb = input_s(quals, b); fr = float32_add(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float32_to_s(fr); } -uint64_t helper_subs (uint64_t a, uint64_t b) +uint64_t helper_subs (uint64_t a, uint64_t b, uint32_t quals) { float32 fa, fb, fr; + uint32_t token; - fa = s_to_float32(a); - fb = s_to_float32(b); + token = begin_fp(quals); + fa = input_s(quals, a); + fb = input_s(quals, b); fr = float32_sub(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float32_to_s(fr); } -uint64_t helper_muls (uint64_t a, uint64_t b) +uint64_t helper_muls (uint64_t a, uint64_t b, uint32_t quals) { float32 fa, fb, fr; + uint32_t token; - fa = s_to_float32(a); - fb = s_to_float32(b); + token = begin_fp(quals); + fa = input_s(quals, a); + fb = input_s(quals, b); fr = float32_mul(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float32_to_s(fr); } -uint64_t helper_divs (uint64_t a, uint64_t b) +uint64_t helper_divs (uint64_t a, uint64_t b, uint32_t quals) { float32 fa, fb, fr; + uint32_t token; - fa = s_to_float32(a); - fb = s_to_float32(b); + token = begin_fp(quals); + fa = input_s(quals, a); + fb = input_s(quals, b); fr = float32_div(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float32_to_s(fr); } -uint64_t helper_sqrts (uint64_t a) +uint64_t helper_sqrts (uint64_t a, uint32_t quals) { float32 fa, fr; + uint32_t token; - fa = s_to_float32(a); + token = begin_fp(quals); + fa = input_s(quals, a); fr = float32_sqrt(fa, &FP_STATUS); + end_fp(quals, token); + return float32_to_s(fr); } @@ -745,52 +987,78 @@ static inline uint64_t float64_to_t(float64 fa) return r.ll; } -uint64_t helper_addt (uint64_t a, uint64_t b) +/* Raise any exceptions needed for using F, given the insn qualifiers. */ +static float64 input_t(uint32_t quals, uint64_t a) +{ + return t_to_float64(remap_ieee_input(quals, a)); +} + +uint64_t helper_addt (uint64_t a, uint64_t b, uint32_t quals) { float64 fa, fb, fr; + uint32_t token; - fa = t_to_float64(a); - fb = t_to_float64(b); + token = begin_fp(quals); + fa = input_t(quals, a); + fb = input_t(quals, b); fr = float64_add(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float64_to_t(fr); } -uint64_t helper_subt (uint64_t a, uint64_t b) +uint64_t helper_subt (uint64_t a, uint64_t b, uint32_t quals) { float64 fa, fb, fr; + uint32_t token; - fa = t_to_float64(a); - fb = t_to_float64(b); + token = begin_fp(quals); + fa = input_t(quals, a); + fb = input_t(quals, b); fr = float64_sub(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float64_to_t(fr); } -uint64_t helper_mult (uint64_t a, uint64_t b) +uint64_t helper_mult (uint64_t a, uint64_t b, uint32_t quals) { float64 fa, fb, fr; + uint32_t token; - fa = t_to_float64(a); - fb = t_to_float64(b); + token = begin_fp(quals); + fa = input_t(quals, a); + fb = input_t(quals, b); fr = float64_mul(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float64_to_t(fr); } -uint64_t helper_divt (uint64_t a, uint64_t b) +uint64_t helper_divt (uint64_t a, uint64_t b, uint32_t quals) { float64 fa, fb, fr; + uint32_t token; - fa = t_to_float64(a); - fb = t_to_float64(b); + token = begin_fp(quals); + fa = input_t(quals, a); + fb = input_t(quals, b); fr = float64_div(fa, fb, &FP_STATUS); + end_fp(quals, token); + return float64_to_t(fr); } -uint64_t helper_sqrtt (uint64_t a) +uint64_t helper_sqrtt (uint64_t a, uint32_t quals) { float64 fa, fr; + uint32_t token; - fa = t_to_float64(a); + token = begin_fp(quals); + fa = input_t(quals, a); fr = float64_sqrt(fa, &FP_STATUS); + end_fp(quals, token); + return float64_to_t(fr); } @@ -813,6 +1081,8 @@ uint64_t helper_cpyse(uint64_t a, uint64_t b) /* Comparisons */ +/* ??? Software completion qualifier missing. */ + uint64_t helper_cmptun (uint64_t a, uint64_t b) { float64 fa, fb; @@ -905,70 +1175,218 @@ uint64_t helper_cmpglt(uint64_t a, uint64_t b) } /* Floating point format conversion */ -uint64_t helper_cvtts (uint64_t a) +uint64_t helper_cvtts (uint64_t a, uint32_t quals) { float64 fa; float32 fr; + uint32_t token; - fa = t_to_float64(a); + token = begin_fp(quals); + fa = input_t(quals, a); fr = float64_to_float32(fa, &FP_STATUS); + end_fp(quals, token); + return float32_to_s(fr); } -uint64_t helper_cvtst (uint64_t a) +uint64_t helper_cvtst (uint64_t a, uint32_t quals) { float32 fa; float64 fr; + uint32_t token; - fa = s_to_float32(a); + token = begin_fp(quals); + fa = input_s(quals, a); fr = float32_to_float64(fa, &FP_STATUS); + end_fp(quals, token); + return float64_to_t(fr); } -uint64_t helper_cvtqs (uint64_t a) +uint64_t helper_cvtqs (uint64_t a, uint32_t quals) { - float32 fr = int64_to_float32(a, &FP_STATUS); + float32 fr; + uint32_t token; + + token = begin_fp(quals); + fr = int64_to_float32(a, &FP_STATUS); + end_fp(quals, token); + return float32_to_s(fr); } -uint64_t helper_cvttq (uint64_t a) +/* Implement float64 to uint64 conversion without overflow enabled. + In this mode we must supply the truncated result. This behaviour + is used by the compiler to get unsigned conversion for free with + the same instruction. */ + +static uint64_t cvttq_internal(uint64_t a) { - float64 fa = t_to_float64(a); - return float64_to_int64_round_to_zero(fa, &FP_STATUS); + uint64_t frac, ret = 0; + uint32_t exp, sign, exc = 0; + int shift; + + sign = (a >> 63); + exp = (uint32_t)(a >> 52) & 0x7ff; + frac = a & 0xfffffffffffffull; + + if (exp == 0) { + if (unlikely(frac != 0)) + goto do_underflow; + } else if (exp == 0x7ff) { + if (frac == 0) + exc = float_flag_overflow; + else + exc = float_flag_invalid; + } else { + /* Restore implicit bit. */ + frac |= 0x10000000000000ull; + + /* Note that neither overflow exceptions nor inexact exceptions + are desired. This lets us streamline the checks quite a bit. */ + shift = exp - 1023 - 52; + if (shift >= 0) { + /* In this case the number is so large that we must shift + the fraction left. There is no rounding to do. */ + if (shift < 63) { + ret = frac << shift; + if ((ret >> shift) != frac) + exc = float_flag_overflow; + } + } else { + uint64_t round; + + /* In this case the number is smaller than the fraction as + represented by the 52 bit number. Here we must think + about rounding the result. Handle this by shifting the + fractional part of the number into the high bits of ROUND. + This will let us efficiently handle round-to-nearest. */ + shift = -shift; + if (shift < 63) { + ret = frac >> shift; + round = frac << (64 - shift); + } else { + /* The exponent is so small we shift out everything. + Leave a sticky bit for proper rounding below. */ + do_underflow: + round = 1; + } + + if (round) { + exc = float_flag_inexact; + switch (FP_STATUS.float_rounding_mode) { + case float_round_nearest_even: + if (round == (1ull << 63)) { + /* Fraction is exactly 0.5; round to even. */ + ret += (ret & 1); + } else if (round > (1ull << 63)) { + ret += 1; + } + break; + case float_round_to_zero: + break; + case float_round_up: + if (!sign) + ret += 1; + break; + case float_round_down: + if (sign) + ret += 1; + break; + } + } + } + if (sign) + ret = -ret; + } + if (unlikely(exc)) + float_raise(exc, &FP_STATUS); + + return ret; +} + +uint64_t helper_cvttq (uint64_t a, uint32_t quals) +{ + uint64_t ret; + uint32_t token; + + /* ??? There's an arugument to be made that when /S is enabled, we + should provide the standard IEEE saturated result, instead of + the truncated result that we *must* provide when /V is disabled. + However, that's not how either the Tru64 or Linux completion + handlers actually work, and GCC knows it. */ + + token = begin_fp(quals); + a = remap_ieee_input(quals, a); + ret = cvttq_internal(a); + end_fp(quals, token); + + return ret; } -uint64_t helper_cvtqt (uint64_t a) +uint64_t helper_cvtqt (uint64_t a, uint32_t quals) { - float64 fr = int64_to_float64(a, &FP_STATUS); + float64 fr; + uint32_t token; + + token = begin_fp(quals); + fr = int64_to_float64(a, &FP_STATUS); + end_fp(quals, token); + return float64_to_t(fr); } -uint64_t helper_cvtqf (uint64_t a) +uint64_t helper_cvtqf (uint64_t a, uint32_t quals) { - float32 fr = int64_to_float32(a, &FP_STATUS); + float32 fr; + uint32_t token; + + token = begin_fp(quals); + fr = int64_to_float32(a, &FP_STATUS); + end_fp(quals, token); + return float32_to_f(fr); } -uint64_t helper_cvtgf (uint64_t a) +uint64_t helper_cvtgf (uint64_t a, uint32_t quals) { float64 fa; float32 fr; + uint32_t token; fa = g_to_float64(a); + + token = begin_fp(quals); fr = float64_to_float32(fa, &FP_STATUS); + end_fp(quals, token); + return float32_to_f(fr); } -uint64_t helper_cvtgq (uint64_t a) +uint64_t helper_cvtgq (uint64_t a, uint32_t quals) { - float64 fa = g_to_float64(a); - return float64_to_int64_round_to_zero(fa, &FP_STATUS); + float64 fa; + uint64_t ret; + uint32_t token; + + fa = g_to_float64(a); + + token = begin_fp(quals); + ret = float64_to_int64(fa, &FP_STATUS); + end_fp(quals, token); + + return ret; } -uint64_t helper_cvtqg (uint64_t a) +uint64_t helper_cvtqg (uint64_t a, uint32_t quals) { float64 fr; + uint32_t token; + + token = begin_fp(quals); fr = int64_to_float64(a, &FP_STATUS); + end_fp(quals, token); + return float64_to_g(fr); } @@ -979,35 +1397,24 @@ uint64_t helper_cvtlq (uint64_t a) return (lo & 0x3FFFFFFF) | (hi & 0xc0000000); } -static inline uint64_t __helper_cvtql(uint64_t a, int s, int v) -{ - uint64_t r; - - r = ((uint64_t)(a & 0xC0000000)) << 32; - r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29; - - if (v && (int64_t)((int32_t)r) != (int64_t)r) { - helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW); - } - if (s) { - /* TODO */ - } - return r; -} - uint64_t helper_cvtql (uint64_t a) { - return __helper_cvtql(a, 0, 0); + return ((a & 0xC0000000) << 32) | ((a & 0x7FFFFFFF) << 29); } uint64_t helper_cvtqlv (uint64_t a) { - return __helper_cvtql(a, 0, 1); + if ((int32_t)a != (int64_t)a) + helper_excp(EXCP_ARITH, EXC_M_IOV); + return helper_cvtql(a); } uint64_t helper_cvtqlsv (uint64_t a) { - return __helper_cvtql(a, 1, 1); + /* ??? I'm pretty sure there's nothing that /sv needs to do that /v + doesn't do. The only thing I can think is that /sv is a valid + instruction merely for completeness in the ISA. */ + return helper_cvtqlv(a); } /* PALcode support special instructions */ diff --git a/target-alpha/translate.c b/target-alpha/translate.c index 45cb697..e0ca0ed 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -442,81 +442,79 @@ static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int rc) gen_set_label(l1); } -#define FARITH2(name) \ -static inline void glue(gen_f, name)(int rb, int rc) \ -{ \ - if (unlikely(rc == 31)) \ - return; \ - \ - if (rb != 31) \ - gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]); \ - else { \ - TCGv tmp = tcg_const_i64(0); \ - gen_helper_ ## name (cpu_fir[rc], tmp); \ - tcg_temp_free(tmp); \ - } \ +#define FARITH2(name) \ +static inline void glue(gen_f, name)(int rb, int rc) \ +{ \ + if (unlikely(rc == 31)) \ + return; \ + \ + if (rb != 31) \ + gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]); \ + else { \ + TCGv tmp = tcg_const_i64(0); \ + gen_helper_ ## name (cpu_fir[rc], tmp); \ + tcg_temp_free(tmp); \ + } \ } -FARITH2(sqrts) -FARITH2(sqrtf) -FARITH2(sqrtg) -FARITH2(sqrtt) -FARITH2(cvtgf) -FARITH2(cvtgq) -FARITH2(cvtqf) -FARITH2(cvtqg) -FARITH2(cvtst) -FARITH2(cvtts) -FARITH2(cvttq) -FARITH2(cvtqs) -FARITH2(cvtqt) FARITH2(cvtlq) FARITH2(cvtql) FARITH2(cvtqlv) FARITH2(cvtqlsv) -#define FARITH3(name) \ -static inline void glue(gen_f, name)(int ra, int rb, int rc) \ -{ \ - if (unlikely(rc == 31)) \ - return; \ - \ - if (ra != 31) { \ - if (rb != 31) \ - gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], cpu_fir[rb]); \ - else { \ - TCGv tmp = tcg_const_i64(0); \ - gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], tmp); \ - tcg_temp_free(tmp); \ - } \ - } else { \ - TCGv tmp = tcg_const_i64(0); \ - if (rb != 31) \ - gen_helper_ ## name (cpu_fir[rc], tmp, cpu_fir[rb]); \ - else \ - gen_helper_ ## name (cpu_fir[rc], tmp, tmp); \ - tcg_temp_free(tmp); \ - } \ +#define QFARITH2(name) \ +static inline void glue(gen_f, name)(int rb, int rc, int opc) \ +{ \ + TCGv_i32 quals; \ + if (unlikely(rc == 31)) \ + return; \ + quals = tcg_const_i32(opc & ~0x3f); \ + if (rb != 31) \ + gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb], quals); \ + else { \ + TCGv tmp = tcg_const_i64(0); \ + gen_helper_ ## name (cpu_fir[rc], tmp, quals); \ + tcg_temp_free(tmp); \ + } \ + tcg_temp_free_i32(quals); \ +} +QFARITH2(sqrts) +QFARITH2(sqrtf) +QFARITH2(sqrtg) +QFARITH2(sqrtt) +QFARITH2(cvtgf) +QFARITH2(cvtgq) +QFARITH2(cvtqf) +QFARITH2(cvtqg) +QFARITH2(cvtst) +QFARITH2(cvtts) +QFARITH2(cvttq) +QFARITH2(cvtqs) +QFARITH2(cvtqt) + +#define FARITH3(name) \ +static inline void glue(gen_f, name)(int ra, int rb, int rc) \ +{ \ + TCGv zero, ta, tb; \ + if (unlikely(rc == 31)) \ + return; \ + ta = cpu_fir[ra]; \ + tb = cpu_fir[rb]; \ + if (unlikely(ra == 31)) { \ + zero = tcg_const_i64(0); \ + ta = zero; \ + } \ + if (unlikely(rb == 31)) { \ + if (ra != 31) \ + zero = tcg_const_i64(0); \ + tb = zero; \ + } \ + gen_helper_ ## name (cpu_fir[rc], ta, tb); \ + if (ra == 31 || rb == 31) \ + tcg_temp_free(zero); \ } - -FARITH3(addf) -FARITH3(subf) -FARITH3(mulf) -FARITH3(divf) -FARITH3(addg) -FARITH3(subg) -FARITH3(mulg) -FARITH3(divg) FARITH3(cmpgeq) FARITH3(cmpglt) FARITH3(cmpgle) -FARITH3(adds) -FARITH3(subs) -FARITH3(muls) -FARITH3(divs) -FARITH3(addt) -FARITH3(subt) -FARITH3(mult) -FARITH3(divt) FARITH3(cmptun) FARITH3(cmpteq) FARITH3(cmptlt) @@ -525,6 +523,47 @@ FARITH3(cpys) FARITH3(cpysn) FARITH3(cpyse) +#define QFARITH3(name) \ +static inline void glue(gen_f, name)(int ra, int rb, int rc, int opc) \ +{ \ + TCGv zero, ta, tb; \ + TCGv_i32 quals; \ + if (unlikely(rc == 31)) \ + return; \ + ta = cpu_fir[ra]; \ + tb = cpu_fir[rb]; \ + if (unlikely(ra == 31)) { \ + zero = tcg_const_i64(0); \ + ta = zero; \ + } \ + if (unlikely(rb == 31)) { \ + if (ra != 31) \ + zero = tcg_const_i64(0); \ + tb = zero; \ + } \ + quals = tcg_const_i32(opc & ~0x3f); \ + gen_helper_ ## name (cpu_fir[rc], ta, tb, quals); \ + tcg_temp_free_i32(quals); \ + if (ra == 31 || rb == 31) \ + tcg_temp_free(zero); \ +} +QFARITH3(addf) +QFARITH3(subf) +QFARITH3(mulf) +QFARITH3(divf) +QFARITH3(addg) +QFARITH3(subg) +QFARITH3(mulg) +QFARITH3(divg) +QFARITH3(adds) +QFARITH3(subs) +QFARITH3(muls) +QFARITH3(divs) +QFARITH3(addt) +QFARITH3(subt) +QFARITH3(mult) +QFARITH3(divt) + static inline uint64_t zapnot_mask(uint8_t lit) { uint64_t mask = 0; @@ -1607,7 +1646,7 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) } break; case 0x14: - switch (fpfn) { /* f11 & 0x3F */ + switch (fpfn) { /* fn11 & 0x3F */ case 0x04: /* ITOFS */ if (!(ctx->amask & AMASK_FIX)) @@ -1626,13 +1665,13 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) /* SQRTF */ if (!(ctx->amask & AMASK_FIX)) goto invalid_opc; - gen_fsqrtf(rb, rc); + gen_fsqrtf(rb, rc, fn11); break; case 0x0B: /* SQRTS */ if (!(ctx->amask & AMASK_FIX)) goto invalid_opc; - gen_fsqrts(rb, rc); + gen_fsqrts(rb, rc, fn11); break; case 0x14: /* ITOFF */ @@ -1663,13 +1702,13 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) /* SQRTG */ if (!(ctx->amask & AMASK_FIX)) goto invalid_opc; - gen_fsqrtg(rb, rc); + gen_fsqrtg(rb, rc, fn11); break; case 0x02B: /* SQRTT */ if (!(ctx->amask & AMASK_FIX)) goto invalid_opc; - gen_fsqrtt(rb, rc); + gen_fsqrtt(rb, rc, fn11); break; default: goto invalid_opc; @@ -1677,47 +1716,42 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) break; case 0x15: /* VAX floating point */ - /* XXX: rounding mode and trap are ignored (!) */ - switch (fpfn) { /* f11 & 0x3F */ + switch (fpfn) { /* fn11 & 0x3F */ case 0x00: /* ADDF */ - gen_faddf(ra, rb, rc); + gen_faddf(ra, rb, rc, fn11); break; case 0x01: /* SUBF */ - gen_fsubf(ra, rb, rc); + gen_fsubf(ra, rb, rc, fn11); break; case 0x02: /* MULF */ - gen_fmulf(ra, rb, rc); + gen_fmulf(ra, rb, rc, fn11); break; case 0x03: /* DIVF */ - gen_fdivf(ra, rb, rc); + gen_fdivf(ra, rb, rc, fn11); break; case 0x1E: /* CVTDG */ -#if 0 // TODO - gen_fcvtdg(rb, rc); -#else + /* TODO */ goto invalid_opc; -#endif - break; case 0x20: /* ADDG */ - gen_faddg(ra, rb, rc); + gen_faddg(ra, rb, rc, fn11); break; case 0x21: /* SUBG */ - gen_fsubg(ra, rb, rc); + gen_fsubg(ra, rb, rc, fn11); break; case 0x22: /* MULG */ - gen_fmulg(ra, rb, rc); + gen_fmulg(ra, rb, rc, fn11); break; case 0x23: /* DIVG */ - gen_fdivg(ra, rb, rc); + gen_fdivg(ra, rb, rc, fn11); break; case 0x25: /* CMPGEQ */ @@ -1733,27 +1767,23 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) break; case 0x2C: /* CVTGF */ - gen_fcvtgf(rb, rc); + gen_fcvtgf(rb, rc, fn11); break; case 0x2D: /* CVTGD */ -#if 0 // TODO - gen_fcvtgd(rb, rc); -#else + /* TODO */ goto invalid_opc; -#endif - break; case 0x2F: /* CVTGQ */ - gen_fcvtgq(rb, rc); + gen_fcvtgq(rb, rc, fn11); break; case 0x3C: /* CVTQF */ - gen_fcvtqf(rb, rc); + gen_fcvtqf(rb, rc, fn11); break; case 0x3E: /* CVTQG */ - gen_fcvtqg(rb, rc); + gen_fcvtqg(rb, rc, fn11); break; default: goto invalid_opc; @@ -1761,39 +1791,38 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) break; case 0x16: /* IEEE floating-point */ - /* XXX: rounding mode and traps are ignored (!) */ - switch (fpfn) { /* f11 & 0x3F */ + switch (fpfn) { /* fn11 & 0x3F */ case 0x00: /* ADDS */ - gen_fadds(ra, rb, rc); + gen_fadds(ra, rb, rc, fn11); break; case 0x01: /* SUBS */ - gen_fsubs(ra, rb, rc); + gen_fsubs(ra, rb, rc, fn11); break; case 0x02: /* MULS */ - gen_fmuls(ra, rb, rc); + gen_fmuls(ra, rb, rc, fn11); break; case 0x03: /* DIVS */ - gen_fdivs(ra, rb, rc); + gen_fdivs(ra, rb, rc, fn11); break; case 0x20: /* ADDT */ - gen_faddt(ra, rb, rc); + gen_faddt(ra, rb, rc, fn11); break; case 0x21: /* SUBT */ - gen_fsubt(ra, rb, rc); + gen_fsubt(ra, rb, rc, fn11); break; case 0x22: /* MULT */ - gen_fmult(ra, rb, rc); + gen_fmult(ra, rb, rc, fn11); break; case 0x23: /* DIVT */ - gen_fdivt(ra, rb, rc); + gen_fdivt(ra, rb, rc, fn11); break; case 0x24: /* CMPTUN */ @@ -1812,26 +1841,25 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) gen_fcmptle(ra, rb, rc); break; case 0x2C: - /* XXX: incorrect */ if (fn11 == 0x2AC || fn11 == 0x6AC) { /* CVTST */ - gen_fcvtst(rb, rc); + gen_fcvtst(rb, rc, fn11); } else { /* CVTTS */ - gen_fcvtts(rb, rc); + gen_fcvtts(rb, rc, fn11); } break; case 0x2F: /* CVTTQ */ - gen_fcvttq(rb, rc); + gen_fcvttq(rb, rc, fn11); break; case 0x3C: /* CVTQS */ - gen_fcvtqs(rb, rc); + gen_fcvtqs(rb, rc, fn11); break; case 0x3E: /* CVTQT */ - gen_fcvtqt(rb, rc); + gen_fcvtqt(rb, rc, fn11); break; default: goto invalid_opc;
This is a squashed version of the 3 or 4 incremental patches that I had sent out for implementing the alpha fpu instruction qualifiers. r~ commit 572164702dd83955fc8783c85811ec86c3fb6e4a Author: Richard Henderson <rth@twiddle.net> Date: Fri Dec 18 10:50:32 2009 -0800 target-alpha: Implement fp insn qualifiers. Adds a third constant argument to the fpu helpers, which contain the unparsed qualifier bits. The helper functions use new begin_fp/end_fp routines that extract the rounding mode from the qualifier bits, as well as raise exceptions for non-finite inputs and outputs also as directed by the qualifier bits. cpu_alpha_load/store_fpcr modified to load/store the majority of the bits from env->fpcr. This because we hadn't been saving a few of the fpcr bits in the fp_status field: in particular DNZ. Re-implement cvttq without saturation of overflow results, to match the Alpha specification. Signed-off-by: Richard Henderson <rth@twiddle.net>