Message ID | 1347138767-19941-4-git-send-email-aurelien@aurel32.net |
---|---|
State | New |
Headers | show |
On 8 September 2012 22:12, Aurelien Jarno <aurelien@aurel32.net> wrote: > +#define VARITHFPFMA(suffix, type) \ > + void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ > + ppc_avr_t *b, ppc_avr_t *c) \ > + { \ > + int i; \ > + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ > + r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \ > + type, &env->vec_status); \ > + } \ > + } > +VARITHFPFMA(maddfp, 0); > +VARITHFPFMA(nmsubfp, float_muladd_negate_result); > +#undef VARITHFPFMA > + > #define VARITHSAT_CASE(type, op, cvt, element) \ > { \ > type result = (type)a->element[i] op (type)b->element[i]; \ > -void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, > - ppc_avr_t *b, ppc_avr_t *c) > -{ > - int i; > - > - for (i = 0; i < ARRAY_SIZE(r->f); i++) { > - HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) { > - /* Need to do the computation is higher precision and round > - * once at the end. */ > - float64 af, bf, cf, t; > - > - af = float32_to_float64(a->f[i], &env->vec_status); > - bf = float32_to_float64(b->f[i], &env->vec_status); > - cf = float32_to_float64(c->f[i], &env->vec_status); > - t = float64_mul(af, cf, &env->vec_status); > - t = float64_sub(t, bf, &env->vec_status); > - t = float64_chs(t); > - r->f[i] = float64_to_float32(t, &env->vec_status); > - } > - } > -} I mentioned this in my comment on the other patch, but just to attach it to the right patch for the benefit of the archives: the code here for vnmsub is (correctly) doing a subtraction of bf and then negating the final result, so you need to pass float_muladd the flags negate_result | negate_c, not just negate_result. thanks -- PMM
On Sun, Sep 09, 2012 at 10:51:20AM +0100, Peter Maydell wrote: > On 8 September 2012 22:12, Aurelien Jarno <aurelien@aurel32.net> wrote: > > +#define VARITHFPFMA(suffix, type) \ > > + void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ > > + ppc_avr_t *b, ppc_avr_t *c) \ > > + { \ > > + int i; \ > > + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ > > + r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \ > > + type, &env->vec_status); \ > > + } \ > > + } > > +VARITHFPFMA(maddfp, 0); > > +VARITHFPFMA(nmsubfp, float_muladd_negate_result); > > +#undef VARITHFPFMA > > + > > #define VARITHSAT_CASE(type, op, cvt, element) \ > > { \ > > type result = (type)a->element[i] op (type)b->element[i]; \ > > -void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, > > - ppc_avr_t *b, ppc_avr_t *c) > > -{ > > - int i; > > - > > - for (i = 0; i < ARRAY_SIZE(r->f); i++) { > > - HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) { > > - /* Need to do the computation is higher precision and round > > - * once at the end. */ > > - float64 af, bf, cf, t; > > - > > - af = float32_to_float64(a->f[i], &env->vec_status); > > - bf = float32_to_float64(b->f[i], &env->vec_status); > > - cf = float32_to_float64(c->f[i], &env->vec_status); > > - t = float64_mul(af, cf, &env->vec_status); > > - t = float64_sub(t, bf, &env->vec_status); > > - t = float64_chs(t); > > - r->f[i] = float64_to_float32(t, &env->vec_status); > > - } > > - } > > -} > > I mentioned this in my comment on the other patch, but just to attach > it to the right patch for the benefit of the archives: > the code here for vnmsub is (correctly) doing a subtraction of bf > and then negating the final result, so you need to pass float_muladd > the flags negate_result | negate_c, not just negate_result. > Correct, or alternatively it could use negate_product. I'll send an updated patch later.
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 6141243..51cb97c 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -418,6 +418,20 @@ VARITHFP(minfp, float32_min) VARITHFP(maxfp, float32_max) #undef VARITHFP +#define VARITHFPFMA(suffix, type) \ + void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ + ppc_avr_t *b, ppc_avr_t *c) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \ + type, &env->vec_status); \ + } \ + } +VARITHFPFMA(maddfp, 0); +VARITHFPFMA(nmsubfp, float_muladd_negate_result); +#undef VARITHFPFMA + #define VARITHSAT_CASE(type, op, cvt, element) \ { \ type result = (type)a->element[i] op (type)b->element[i]; \ @@ -649,27 +663,6 @@ VCT(uxs, cvtsduw, u32) VCT(sxs, cvtsdsw, s32) #undef VCT -void helper_vmaddfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, - ppc_avr_t *c) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(r->f); i++) { - HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) { - /* Need to do the computation in higher precision and round - * once at the end. */ - float64 af, bf, cf, t; - - af = float32_to_float64(a->f[i], &env->vec_status); - bf = float32_to_float64(b->f[i], &env->vec_status); - cf = float32_to_float64(c->f[i], &env->vec_status); - t = float64_mul(af, cf, &env->vec_status); - t = float64_add(t, bf, &env->vec_status); - r->f[i] = float64_to_float32(t, &env->vec_status); - } - } -} - void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { @@ -909,28 +902,6 @@ VMUL(uh, u16, u32) #undef VMUL_DO #undef VMUL -void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, - ppc_avr_t *b, ppc_avr_t *c) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(r->f); i++) { - HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) { - /* Need to do the computation is higher precision and round - * once at the end. */ - float64 af, bf, cf, t; - - af = float32_to_float64(a->f[i], &env->vec_status); - bf = float32_to_float64(b->f[i], &env->vec_status); - cf = float32_to_float64(c->f[i], &env->vec_status); - t = float64_mul(af, cf, &env->vec_status); - t = float64_sub(t, bf, &env->vec_status); - t = float64_chs(t); - r->f[i] = float64_to_float32(t, &env->vec_status); - } - } -} - void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) {
Use the new softfloat float32_muladd() function to implement the vmaddfp and vnmsubfp instructions. As a bonus we can get rid of the call to the HANDLE_NAN3 macro, as the NaN handling is directly done at the softfloat level. Cc: Alexander Graf <agraf@suse.de> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net> --- target-ppc/int_helper.c | 57 ++++++++++++----------------------------------- 1 file changed, 14 insertions(+), 43 deletions(-)