Message ID | 20210809202355.568303-2-pc@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | rs6000: Support more SSE4.1 intrinsics | expand |
Hi, Paul! On 8/9/21 3:23 PM, Paul A. Clarke via Gcc-patches wrote: > Suppress exceptions (when specified), by saving, manipulating, and > restoring the FPSCR. Similarly, save, set, and restore the floating-point > rounding mode when required. > > No attempt is made to optimize writing the FPSCR (by checking if the new > value would be the same), other than using lighter weight instructions > when possible. > > The scalar versions naively use the parallel versions to compute the > single scalar result and then construct the remainder of the result. > > Of minor note, the values of _MM_FROUND_TO_NEG_INF and _MM_FROUND_TO_ZERO > are swapped from the corresponding values on x86 so as to match the > corresponding rounding mode values in the Power ISA. > > Move implementations of _mm_ceil* and _mm_floor* into _mm_round*, and > convert _mm_ceil* and _mm_floor* into macros. This matches the current > analogous implementations in config/i386/smmintrin.h. > > Add tests for _mm_round_pd, _mm_round_ps, _mm_round_sd, _mm_round_ss, > modeled after the very similar "floor" and "ceil" tests. > > Include basic tests, plus tests at the boundaries for floating-point > representation, positive and negative, test all of the parameterized > rounding modes as well as the C99 rounding modes and interactions > between the two. > > Exceptions are not explicitly tested. Reminder: Please specify what testing you've done and on which platforms, and where you want to apply the patches (i.e., do you want backports?). > > 2021-08-09 Paul A. Clarke <pc@us.ibm.com> > > gcc > * config/rs6000/smmintrin.h (_mm_round_pd, _mm_round_ps, > _mm_round_sd, _mm_round_ss, _MM_FROUND_TO_NEAREST_INT Missing comma > _MM_FROUND_TO_ZERO, _MM_FROUND_TO_POS_INF, _MM_FROUND_TO_NEG_INF, > _MM_FROUND_CUR_DIRECTION, _MM_FROUND_RAISE_EXC, _MM_FROUND_NO_EXC Likewise > _MM_FROUND_NINT, _MM_FROUND_FLOOR, _MM_FROUND_CEIL, _MM_FROUND_TRUNC, > _MM_FROUND_RINT, _MM_FROUND_NEARBYINT): New. > * config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps, _mm_ceil_sd, > _mm_ceil_ss, _mm_floor_pd, _mm_floor_ps, _mm_floor_sd, _mm_floor_ss): > Convert from function to macro. > > gcc/testsuite > * gcc.target/powerpc/sse4_1-round3.h: New. > * gcc.target/powerpc/sse4_1-roundpd.c: New. > * gcc.target/powerpc/sse4_1-roundps.c: New. > * gcc.target/powerpc/sse4_1-roundsd.c: New. > * gcc.target/powerpc/sse4_1-roundss.c: New. > --- > gcc/config/rs6000/smmintrin.h | 246 ++++++++++++----- > .../gcc.target/powerpc/sse4_1-round3.h | 81 ++++++ > .../gcc.target/powerpc/sse4_1-roundpd.c | 143 ++++++++++ > .../gcc.target/powerpc/sse4_1-roundps.c | 98 +++++++ > .../gcc.target/powerpc/sse4_1-roundsd.c | 256 ++++++++++++++++++ > .../gcc.target/powerpc/sse4_1-roundss.c | 208 ++++++++++++++ > 6 files changed, 968 insertions(+), 64 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > index 3767a67eada7..862e78ac7d60 100644 > --- a/gcc/config/rs6000/smmintrin.h > +++ b/gcc/config/rs6000/smmintrin.h > @@ -42,6 +42,188 @@ > #include <altivec.h> > #include <tmmintrin.h> > > +/* Rounding mode macros. */ > +#define _MM_FROUND_TO_NEAREST_INT 0x00 > +#define _MM_FROUND_TO_ZERO 0x01 > +#define _MM_FROUND_TO_POS_INF 0x02 > +#define _MM_FROUND_TO_NEG_INF 0x03 > +#define _MM_FROUND_CUR_DIRECTION 0x04 > + > +#define _MM_FROUND_NINT \ > + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_FLOOR \ > + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_CEIL \ > + (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_TRUNC \ > + (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_RINT \ > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_NEARBYINT \ > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) > + > +#define _MM_FROUND_RAISE_EXC 0x00 > +#define _MM_FROUND_NO_EXC 0x08 > + > +__inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_round_pd (__m128d __A, int __rounding) > +{ > + __v2df __r; > + union { > + double __fr; > + long long __fpscr; > + } __save, __tmp; > + > + if (__rounding & _MM_FROUND_NO_EXC) > + { > + /* Save enabled exceptions, and disable all exceptions. > + Pre-POWER9, mffsce decodes to mffs, requiring the additional > + mtfsf, below, to disable exceptions. */ > + __asm__ __volatile__ ( > + ".machine push; .machine \"power9\"; mffsce %0; .machine pop" As we discussed, this cleverness causes trouble by introducing a dependency on a binutils that recognizes .machine "power9". Better to just #ifdef this and specify mffsce versus mffs. > + : "=f" (__save.__fr)); > + __save.__fpscr &= 0xf8; > + __tmp.__fpscr = __save.__fpscr; > +#ifndef _ARCH_PWR9 > + __tmp.__fpscr &= ~0xf8; Just __tmp.fpscr = 0, please, given the preceding code. > + __builtin_mtfsf (0x01, __tmp.__fr); > +#endif > + } > + > + switch (__rounding) > + { > + case _MM_FROUND_TO_NEAREST_INT: > + __tmp.__fr = __builtin_mffsl (); Another clever encoding trick, but dangerous. __builtin_mffsl isn't guaranteed to be recognized on P8 or earlier, even if it happens to work today. Just use mffs and mffsl under #ifdef control. > + __attribute__((fallthrough)); Well done. :-) A lot of people miss this. > + case _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC: > + { > + __builtin_set_fpscr_rn (0b00); > + __r = vec_rint ((__v2df) __A); > + __builtin_set_fpscr_rn (__tmp.__fpscr); > + } > + break; > + case _MM_FROUND_TO_NEG_INF: > + case _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC: > + __r = vec_floor ((__v2df) __A); > + break; > + case _MM_FROUND_TO_POS_INF: > + case _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC: > + __r = vec_ceil ((__v2df) __A); > + break; > + case _MM_FROUND_TO_ZERO: > + case _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC: > + __r = vec_trunc ((__v2df) __A); > + break; > + case _MM_FROUND_CUR_DIRECTION: > + __r = vec_rint ((__v2df) __A); > + break; > + } > + if (__rounding & _MM_FROUND_NO_EXC) > + { > + /* Restore enabled exceptions. */ > + __tmp.__fr = __builtin_mffsl (); Same issue here. > + __tmp.__fpscr |= __save.__fpscr & 0xf8; > + __builtin_mtfsf (0x01, __tmp.__fr); > + } > + return (__m128d) __r; > +} > + > +__inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_round_sd (__m128d __A, __m128d __B, int __rounding) > +{ > + __B = _mm_round_pd (__B, __rounding); > + __v2df __r = { ((__v2df)__B)[0], ((__v2df) __A)[1] }; > + return (__m128d) __r; > +} > + > +__inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_round_ps (__m128 __A, int __rounding) > +{ > + __v4sf __r; > + union { > + double __fr; > + long long __fpscr; > + } __save, __tmp; > + > + if (__rounding & _MM_FROUND_NO_EXC) > + { > + /* Save enabled exceptions, and disable all exceptions. > + Pre-POWER9, mffsce decodes to mffs, requiring the additional > + mtfsf, below, to disable exceptions. */ > + __asm__ __volatile__ ( > + ".machine push; .machine \"power9\"; mffsce %0; .machine pop" Same issues in this function as above, I won't repeat them all. > + : "=f" (__save.__fr)); > + __save.__fpscr &= 0xf8; > + __tmp.__fpscr = __save.__fpscr; > +#ifndef _ARCH_PWR9 > + __tmp.__fpscr &= ~0xf8; > + __builtin_mtfsf (0x01, __tmp.__fr); > +#endif > + } > + > + switch (__rounding) > + { > + case _MM_FROUND_TO_NEAREST_INT: > + __tmp.__fr = __builtin_mffsl (); > + __attribute__((fallthrough)); > + case _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC: > + { > + __builtin_set_fpscr_rn (0b00); > + __r = vec_rint ((__v4sf) __A); > + __builtin_set_fpscr_rn (__tmp.__fpscr); > + } > + break; > + case _MM_FROUND_TO_NEG_INF: > + case _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC: > + __r = vec_floor ((__v4sf) __A); > + break; > + case _MM_FROUND_TO_POS_INF: > + case _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC: > + __r = vec_ceil ((__v4sf) __A); > + break; > + case _MM_FROUND_TO_ZERO: > + case _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC: > + __r = vec_trunc ((__v4sf) __A); > + break; > + case _MM_FROUND_CUR_DIRECTION: > + __r = vec_rint ((__v4sf) __A); > + break; > + } > + if (__rounding & _MM_FROUND_NO_EXC) > + { > + /* Restore enabled exceptions. */ > + __tmp.__fr = __builtin_mffsl (); > + __tmp.__fpscr |= __save.__fpscr & 0xf8; > + __builtin_mtfsf (0x01, __tmp.__fr); > + } > + return (__m128) __r; > +} > + > +__inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_round_ss (__m128 __A, __m128 __B, int __rounding) > +{ > + __B = _mm_round_ps (__B, __rounding); > + __v4sf __r = (__v4sf) __A; > + __r[0] = ((__v4sf)__B)[0]; > + return (__m128) __r; > +} > + > +#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) > +#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) > + > +#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) > +#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) > + > +#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) > +#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) > + > +#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) > +#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) > + > extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm_insert_epi8 (__m128i const __A, int const __D, int const __N) > { > @@ -232,70 +414,6 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) > return any_ones * any_zeros; > } > > -__inline __m128d > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_ceil_pd (__m128d __A) > -{ > - return (__m128d) vec_ceil ((__v2df) __A); > -} > - > -__inline __m128d > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_ceil_sd (__m128d __A, __m128d __B) > -{ > - __v2df __r = vec_ceil ((__v2df) __B); > - __r[1] = ((__v2df) __A)[1]; > - return (__m128d) __r; > -} > - > -__inline __m128d > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_floor_pd (__m128d __A) > -{ > - return (__m128d) vec_floor ((__v2df) __A); > -} > - > -__inline __m128d > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_floor_sd (__m128d __A, __m128d __B) > -{ > - __v2df __r = vec_floor ((__v2df) __B); > - __r[1] = ((__v2df) __A)[1]; > - return (__m128d) __r; > -} > - > -__inline __m128 > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_ceil_ps (__m128 __A) > -{ > - return (__m128) vec_ceil ((__v4sf) __A); > -} > - > -__inline __m128 > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_ceil_ss (__m128 __A, __m128 __B) > -{ > - __v4sf __r = (__v4sf) __A; > - __r[0] = __builtin_ceil (((__v4sf) __B)[0]); > - return __r; > -} > - > -__inline __m128 > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_floor_ps (__m128 __A) > -{ > - return (__m128) vec_floor ((__v4sf) __A); > -} > - > -__inline __m128 > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_floor_ss (__m128 __A, __m128 __B) > -{ > - __v4sf __r = (__v4sf) __A; > - __r[0] = __builtin_floor (((__v4sf) __B)[0]); > - return __r; > -} > - > /* Return horizontal packed word minimum and its index in bits [15:0] > and bits [18:16] respectively. */ > __inline __m128i > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > new file mode 100644 > index 000000000000..de6cbf7be438 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > @@ -0,0 +1,81 @@ > +#include <smmintrin.h> > +#include <fenv.h> > +#include "sse4_1-check.h" > + > +#define DIM(a) (sizeof (a) / sizeof (a)[0]) > + > +static int roundings[] = > + { > + _MM_FROUND_TO_NEAREST_INT, > + _MM_FROUND_TO_NEG_INF, > + _MM_FROUND_TO_POS_INF, > + _MM_FROUND_TO_ZERO, > + _MM_FROUND_CUR_DIRECTION > + }; > + > +static int modes[] = > + { > + FE_TONEAREST, > + FE_UPWARD, > + FE_DOWNWARD, > + FE_TOWARDZERO > + }; > + > +static void > +TEST (void) > +{ > + int i, j, ri, mi, round_save; > + > + round_save = fegetround (); > + for (mi = 0; mi < DIM (modes); mi++) { > + fesetround (modes[mi]); > + for (i = 0; i < DIM (data); i++) { > + for (ri = 0; ri < DIM (roundings); ri++) { > + union value guess; > + union value *current_answers = answers[ri]; > + switch ( roundings[ri] ) { > + case _MM_FROUND_TO_NEAREST_INT: > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > + _MM_FROUND_TO_NEAREST_INT); > + break; > + case _MM_FROUND_TO_NEG_INF: > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > + _MM_FROUND_TO_NEG_INF); > + break; > + case _MM_FROUND_TO_POS_INF: > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > + _MM_FROUND_TO_POS_INF); > + break; > + case _MM_FROUND_TO_ZERO: > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > + _MM_FROUND_TO_ZERO); > + break; > + case _MM_FROUND_CUR_DIRECTION: > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > + _MM_FROUND_CUR_DIRECTION); > + switch ( modes[mi] ) { > + case FE_TONEAREST: > + current_answers = answers_NEAREST_INT; > + break; > + case FE_UPWARD: > + current_answers = answers_POS_INF; > + break; > + case FE_DOWNWARD: > + current_answers = answers_NEG_INF; > + break; > + case FE_TOWARDZERO: > + current_answers = answers_ZERO; > + break; > + } > + break; > + default: > + abort (); > + } > + for (j = 0; j < DIM (guess.f); j++) > + if (guess.f[j] != current_answers[i].f[j]) > + abort (); > + } > + } > + } > + fesetround (round_save); > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > new file mode 100644 > index 000000000000..42fa453b9fd3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > @@ -0,0 +1,143 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ Let's please be sure -Wno-psabi is needed. If so, please document the warning you're avoiding. Thanks! I won't claim I went through all the tests in gory detail, but otherwise this patch LGTM with the indicated changes. Thanks! Bill > + > +#define NO_WARN_X86_INTRINSICS 1 > +#include <smmintrin.h> > + > +#define VEC_T __m128d > +#define FP_T double > + > +#define ROUND_INTRIN(x, ignored, mode) _mm_round_pd (x, mode) > + > +#include "sse4_1-round-data.h" > + > +struct data2 data[] = { > + { .value1 = { .f = { 0.00, 0.25 } } }, > + { .value1 = { .f = { 0.50, 0.75 } } }, > + > + { .value1 = { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffdp+50 } } }, > + { .value1 = { .f = { 0x1.ffffffffffffep+50, 0x1.fffffffffffffp+50 } } }, > + { .value1 = { .f = { 0x1.0000000000000p+51, 0x1.0000000000001p+51 } } }, > + { .value1 = { .f = { 0x1.0000000000002p+51, 0x1.0000000000003p+51 } } }, > + > + { .value1 = { .f = { 0x1.ffffffffffffep+51, 0x1.fffffffffffffp+51 } } }, > + { .value1 = { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } } }, > + > + { .value1 = { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } } }, > + { .value1 = { .f = { -0x1.fffffffffffffp+51, -0x1.ffffffffffffep+51 } } }, > + > + { .value1 = { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } } }, > + { .value1 = { .f = { -0x1.0000000000001p+51, -0x1.0000000000000p+51 } } }, > + { .value1 = { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffep+50 } } }, > + { .value1 = { .f = { -0x1.ffffffffffffdp+50, -0x1.ffffffffffffcp+50 } } }, > + > + { .value1 = { .f = { -1.00, -0.75 } } }, > + { .value1 = { .f = { -0.50, -0.25 } } } > +}; > + > +union value answers_NEAREST_INT[] = { > + { .f = { 0.00, 0.00 } }, > + { .f = { 0.00, 1.00 } }, > + > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, > + > + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > + > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, > + > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > + > + { .f = { -1.00, -1.00 } }, > + { .f = { 0.00, 0.00 } } > +}; > + > +union value answers_NEG_INF[] = { > + { .f = { 0.00, 0.00 } }, > + { .f = { 0.00, 0.00 } }, > + > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, > + > + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > + > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, > + > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > + { .f = { -0x1.0000000000002p+51, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.0000000000000p+51, -0x1.ffffffffffffcp+50 } }, > + > + { .f = { -1.00, -1.00 } }, > + { .f = { -1.00, -1.00 } } > +}; > + > +union value answers_POS_INF[] = { > + { .f = { 0.00, 1.00 } }, > + { .f = { 1.00, 1.00 } }, > + > + { .f = { 0x1.ffffffffffffcp+50, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000002p+51 } }, > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, > + > + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > + > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, > + > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > + > + { .f = { -1.00, 0.00 } }, > + { .f = { 0.00, 0.00 } } > +}; > + > +union value answers_ZERO[] = { > + { .f = { 0.00, 0.00 } }, > + { .f = { 0.00, 0.00 } }, > + > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, > + > + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > + > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, > + > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > + > + { .f = { -1.00, 0.00 } }, > + { .f = { 0.00, 0.00 } } > +}; > + > +union value *answers[] = { > + answers_NEAREST_INT, > + answers_NEG_INF, > + answers_POS_INF, > + answers_ZERO, > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > +}; > + > +#include "sse4_1-round3.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > new file mode 100644 > index 000000000000..e36d325992db > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > @@ -0,0 +1,98 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ > + > +#define NO_WARN_X86_INTRINSICS 1 > +#include <smmintrin.h> > + > +#define VEC_T __m128 > +#define FP_T float > + > +#define ROUND_INTRIN(x, ignored, mode) _mm_round_ps (x, mode) > + > +#include "sse4_1-round-data.h" > + > +struct data2 data[] = { > + { .value1 = { .f = { 0.00, 0.25, 0.50, 0.75 } } }, > + > + { .value1 = { .f = { 0x1.fffff8p+21, 0x1.fffffap+21, > + 0x1.fffffcp+21, 0x1.fffffep+21 } } }, > + { .value1 = { .f = { 0x1.fffffap+22, 0x1.fffffcp+22, > + 0x1.fffffep+22, 0x1.fffffep+23 } } }, > + { .value1 = { .f = { -0x1.fffffep+23, -0x1.fffffep+22, > + -0x1.fffffcp+22, -0x1.fffffap+22 } } }, > + { .value1 = { .f = { -0x1.fffffep+21, -0x1.fffffcp+21, > + -0x1.fffffap+21, -0x1.fffff8p+21 } } }, > + > + { .value1 = { .f = { -1.00, -0.75, -0.50, -0.25 } } } > +}; > + > +union value answers_NEAREST_INT[] = { > + { .f = { 0.00, 0.00, 0.00, 1.00 } }, > + > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > + 0x1.000000p+22, 0x1.000000p+22 } }, > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > + 0x1.000000p+23, 0x1.fffffep+23 } }, > + { .f = { -0x1.fffffep+23, -0x1.000000p+23, > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > + { .f = { -0x1.000000p+22, -0x1.000000p+22, > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > + > + { .f = { -1.00, -1.00, 0.00, 0.00 } } > +}; > + > +union value answers_NEG_INF[] = { > + { .f = { 0.00, 0.00, 0.00, 0.00 } }, > + > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > + 0x1.fffffcp+22, 0x1.fffffep+23 } }, > + { .f = { -0x1.fffffep+23, -0x1.000000p+23, > + -0x1.fffffcp+22, -0x1.fffffcp+22 } }, > + { .f = { -0x1.000000p+22, -0x1.000000p+22, > + -0x1.000000p+22, -0x1.fffff8p+21 } }, > + > + { .f = { -1.00, -1.00, -1.00, -1.00 } } > +}; > + > +union value answers_POS_INF[] = { > + { .f = { 0.00, 1.00, 1.00, 1.00 } }, > + > + { .f = { 0x1.fffff8p+21, 0x1.000000p+22, > + 0x1.000000p+22, 0x1.000000p+22 } }, > + { .f = { 0x1.fffffcp+22, 0x1.fffffcp+22, > + 0x1.000000p+23, 0x1.fffffep+23 } }, > + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > + > + { .f = { -1.00, 0.00, 0.00, 0.00 } } > +}; > + > +union value answers_ZERO[] = { > + { .f = { 0.00, 0.00, 0.00, 0.00 } }, > + > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > + 0x1.fffffcp+22, 0x1.fffffep+23 } }, > + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > + > + { .f = { -1.00, 0.00, 0.00, 0.00 } } > +}; > + > +union value *answers[] = { > + answers_NEAREST_INT, > + answers_NEG_INF, > + answers_POS_INF, > + answers_ZERO, > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > +}; > + > +#include "sse4_1-round3.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > new file mode 100644 > index 000000000000..00205dbd19bc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > @@ -0,0 +1,256 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ > + > +#include <stdio.h> > +#define NO_WARN_X86_INTRINSICS 1 > +#include <smmintrin.h> > + > +#define VEC_T __m128d > +#define FP_T double > + > +#define ROUND_INTRIN(x, y, mode) _mm_round_sd (x, y, mode) > + > +#include "sse4_1-round-data.h" > + > +static struct data2 data[] = { > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0.00, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0.25, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0.50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0.75, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.ffffffffffffcp+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.ffffffffffffdp+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.ffffffffffffep+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffffffffffp+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000000p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000001p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000002p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000003p+51, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.ffffffffffffep+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffffffffffp+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000000p+52, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000001p+52, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000001p+52, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000000p+52, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffffffffffp+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.ffffffffffffep+51, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000004p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000002p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000001p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000000p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.ffffffffffffep+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.ffffffffffffdp+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -1.00, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0.75, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0.50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0.25, IGNORED } } } > +}; > + > +static union value answers_NEAREST_INT[] = { > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { -0.00, PASSTHROUGH } }, > + { .f = { -0.00, PASSTHROUGH } } > +}; > + > +static union value answers_NEG_INF[] = { > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH } } > +}; > + > +static union value answers_POS_INF[] = { > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } } > +}; > + > +static union value answers_ZERO[] = { > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } } > +}; > + > +union value *answers[] = { > + answers_NEAREST_INT, > + answers_NEG_INF, > + answers_POS_INF, > + answers_ZERO, > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > +}; > + > +#include "sse4_1-round3.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > new file mode 100644 > index 000000000000..b9a8eae31f2d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > @@ -0,0 +1,208 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ > + > +#include <stdio.h> > +#define NO_WARN_X86_INTRINSICS 1 > +#include <smmintrin.h> > + > +#define VEC_T __m128 > +#define FP_T float > + > +#define ROUND_INTRIN(x, y, mode) _mm_round_ss (x, y, mode) > + > +#include "sse4_1-round-data.h" > + > +static struct data2 data[] = { > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0.00, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0.25, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0.50, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0.75, IGNORED, IGNORED, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -1.00, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0.75, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0.50, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0.25, IGNORED, IGNORED, IGNORED } } } > +}; > + > +static union value answers_NEAREST_INT[] = { > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > +}; > + > +static union value answers_NEG_INF[] = { > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > +}; > + > +static union value answers_POS_INF[] = { > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > +}; > + > +static union value answers_ZERO[] = { > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > +}; > + > +union value *answers[] = { > + answers_NEAREST_INT, > + answers_NEG_INF, > + answers_POS_INF, > + answers_ZERO, > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > +}; > + > +#include "sse4_1-round3.h"
On Wed, Aug 18, 2021 at 02:01:33PM -0500, Bill Schmidt wrote: > On 8/9/21 3:23 PM, Paul A. Clarke via Gcc-patches wrote: > Reminder: Please specify what testing you've done and on which > platforms, Yes please. > and where you want to apply the patches (i.e., do you want > backports?). Backports are not suitable for features, in general. If nothing is said I assume no backports (for any patch). > >+ if (__rounding & _MM_FROUND_NO_EXC) > >+ { > >+ /* Save enabled exceptions, and disable all exceptions. > >+ Pre-POWER9, mffsce decodes to mffs, requiring the additional > >+ mtfsf, below, to disable exceptions. */ > >+ __asm__ __volatile__ ( > >+ ".machine push; .machine \"power9\"; mffsce %0; .machine pop" > > > As we discussed, this cleverness causes trouble by introducing a > dependency on a binutils that recognizes .machine "power9". Better to > just #ifdef this and specify mffsce versus mffs. It needs testing on all possible pre-p9 systems, because of this cleverness, too. ( at the end of a line is always wrong btw. > >+ switch (__rounding) > >+ { > >+ case _MM_FROUND_TO_NEAREST_INT: > >+ __tmp.__fr = __builtin_mffsl (); > > Another clever encoding trick, but dangerous. __builtin_mffsl isn't > guaranteed to be recognized on P8 or earlier, even if it happens to work > today. Just use mffs and mffsl under #ifdef control. Yeah. It isn't mentioned in the architecture that this is safe to use, so if you really have to, it has to be tested everywhere. But better is to just avoid it. There are various builtins that automatically create backwards-compatible code when needed. One of those is __builtin_mffsl. Please use it :-) > >+ __attribute__((fallthrough)); > > Well done. :-) A lot of people miss this. The compiler is supposed to warn whenever you do forget it :-) > >+ case _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC: > >+ { > >+ __builtin_set_fpscr_rn (0b00); > >+ __r = vec_rint ((__v2df) __A); > >+ __builtin_set_fpscr_rn (__tmp.__fpscr); > >+ } > >+ break; That layout isn't right. You probably want the break inside the block? Why do you want a block at all, anyway? > >+ __asm__ __volatile__ ( > >+ ".machine push; .machine \"power9\"; mffsce %0; .machine pop" > > Same issues in this function as above, I won't repeat them all. You need no quotes either, so you don't need to quote the quotes, fwiw. Segher
On Mon, Aug 09, 2021 at 03:23:50PM -0500, Paul A. Clarke wrote: > Suppress exceptions (when specified), by saving, manipulating, and > restoring the FPSCR. Similarly, save, set, and restore the floating-point > rounding mode when required. > > No attempt is made to optimize writing the FPSCR (by checking if the new > value would be the same), other than using lighter weight instructions > when possible. There are __builtin_set_fpscr_rn and friends, please use those, those are optimised for any platform. > * config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps, _mm_ceil_sd, > _mm_ceil_ss, _mm_floor_pd, _mm_floor_ps, _mm_floor_sd, _mm_floor_ss): > Convert from function to macro. Please explain why you regress this (not in the changelog of course). > +/* Rounding mode macros. */ > +#define _MM_FROUND_TO_NEAREST_INT 0x00 > +#define _MM_FROUND_TO_ZERO 0x01 > +#define _MM_FROUND_TO_POS_INF 0x02 > +#define _MM_FROUND_TO_NEG_INF 0x03 > +#define _MM_FROUND_CUR_DIRECTION 0x04 You can just write "0" .. "4", heh. > + > +#define _MM_FROUND_NINT \ > + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_FLOOR \ > + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_CEIL \ > + (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_TRUNC \ > + (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_RINT \ > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_NEARBYINT \ > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) All these macro definitions will comfortably fit on one line. > +__inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_round_pd (__m128d __A, int __rounding) > +{ Non-static inline is not what you want, esp. with gnu-inline? Or, what is the goal, and why can you not do it with modern inline? > + __v2df __r; > + union { > + double __fr; > + long long __fpscr; > + } __save, __tmp; > + > + if (__rounding & _MM_FROUND_NO_EXC) > + { Wrong indent. This code is very hard to read because of that. If you figure that gee, it would be a nice if we had a builtin for mffsce, then please make one? :-) > + case _MM_FROUND_TO_NEAREST_INT: > + __tmp.__fr = __builtin_mffsl (); > + __attribute__((fallthrough)); Space before (. > + case _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC: Space after |. Please fix these things and resend. Segher
On Wed, Aug 18, 2021 at 05:46:58PM -0500, Segher Boessenkool wrote: > On Mon, Aug 09, 2021 at 03:23:50PM -0500, Paul A. Clarke wrote: > > Suppress exceptions (when specified), by saving, manipulating, and > > restoring the FPSCR. Similarly, save, set, and restore the floating-point > > rounding mode when required. > > > > No attempt is made to optimize writing the FPSCR (by checking if the new > > value would be the same), other than using lighter weight instructions > > when possible. > > There are __builtin_set_fpscr_rn and friends, please use those, those > are optimised for any platform. I do. (Unless I missed an opportunity somewhere?) The "optimize" comment refers to, for example, not checking the current rounding mode before setting and restoring it. > > * config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps, _mm_ceil_sd, > > _mm_ceil_ss, _mm_floor_pd, _mm_floor_ps, _mm_floor_sd, _mm_floor_ss): > > Convert from function to macro. > > Please explain why you regress this (not in the changelog of course). I'm not sure what "regress" means here? I should've said that these are now identical implementations to those found in config/i386/smmintrin.h. I'll add that to the commit message in v2. > > +/* Rounding mode macros. */ > > +#define _MM_FROUND_TO_NEAREST_INT 0x00 > > +#define _MM_FROUND_TO_ZERO 0x01 > > +#define _MM_FROUND_TO_POS_INF 0x02 > > +#define _MM_FROUND_TO_NEG_INF 0x03 > > +#define _MM_FROUND_CUR_DIRECTION 0x04 > > You can just write "0" .. "4", heh. Copied from config/i386/smmintrin.h. > > +#define _MM_FROUND_NINT \ > > + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) > > +#define _MM_FROUND_FLOOR \ > > + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) > > +#define _MM_FROUND_CEIL \ > > + (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) > > +#define _MM_FROUND_TRUNC \ > > + (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) > > +#define _MM_FROUND_RINT \ > > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) > > +#define _MM_FROUND_NEARBYINT \ > > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) > > All these macro definitions will comfortably fit on one line. Copied from config/i386/smmintrin.h. > > +__inline __m128d > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > +_mm_round_pd (__m128d __A, int __rounding) > > +{ > > Non-static inline is not what you want, esp. with gnu-inline? Or, what > is the goal, and why can you not do it with modern inline? This is the same basic signature as the other 600+ intrinsics. Actually, they were all described as "extern", but in a previous review, you said: > "extern" on definitions is superfluous So, I've dropped that for newer ones. Should they all instead be "static"? The goal is to be compatible with the i386 implementations. Those typically use something like: extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) (which kinda makes me want to put "extern" back, now that I think about it). I'm not sure what you mean by "modern inline". > > + __v2df __r; > > + union { > > + double __fr; > > + long long __fpscr; > > + } __save, __tmp; > > + > > + if (__rounding & _MM_FROUND_NO_EXC) > > + { > > Wrong indent. This code is very hard to read because of that. OK, will fix in v2. > If you figure that gee, it would be a nice if we had a builtin for > mffsce, then please make one? :-) Is one use-case sufficient grounds? I can give it a shot if so. > > + case _MM_FROUND_TO_NEAREST_INT: > > + __tmp.__fr = __builtin_mffsl (); > > + __attribute__((fallthrough)); > > Space before (. OK > > + case _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC: > > Space after |. OK > Please fix these things and resend. Will do. Thanks! PC
Hi! On Thu, Aug 19, 2021 at 01:16:16PM -0500, Paul A. Clarke wrote: > On Wed, Aug 18, 2021 at 05:46:58PM -0500, Segher Boessenkool wrote: > > There are __builtin_set_fpscr_rn and friends, please use those, those > > are optimised for any platform. > > I do. (Unless I missed an opportunity somewhere?) It looked to me like you do a lot of unnecessary asm. > > > * config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps, _mm_ceil_sd, > > > _mm_ceil_ss, _mm_floor_pd, _mm_floor_ps, _mm_floor_sd, _mm_floor_ss): > > > Convert from function to macro. > > > > Please explain why you regress this (not in the changelog of course). > > I'm not sure what "regress" means here? Macros are from the 1970's, inline functions are the new hot. Why do you need macros here? The patch should say (the patch message likely). > > > +#define _MM_FROUND_TO_NEAREST_INT 0x00 > > > +#define _MM_FROUND_TO_ZERO 0x01 > > > +#define _MM_FROUND_TO_POS_INF 0x02 > > > +#define _MM_FROUND_TO_NEG_INF 0x03 > > > +#define _MM_FROUND_CUR_DIRECTION 0x04 > > > > You can just write "0" .. "4", heh. > > Copied from config/i386/smmintrin.h. That doesn't make it less silly :-) > > > +__inline __m128d > > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > +_mm_round_pd (__m128d __A, int __rounding) > > > +{ > > > > Non-static inline is not what you want, esp. with gnu-inline? Or, what > > is the goal, and why can you not do it with modern inline? > > This is the same basic signature as the other 600+ intrinsics. > Actually, they were all described as "extern", but in a previous > review, you said: > > "extern" on definitions is superfluous > So, I've dropped that for newer ones. > Should they all instead be "static"? > > The goal is to be compatible with the i386 implementations. > Those typically use something like: > > extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > > (which kinda makes me want to put "extern" back, now that I think > about it). "extern" is not redundant for inline functions. Since you have always_inline here, gnu_inline extern inline has the same meaning as static inline in portable C. > I'm not sure what you mean by "modern inline". Not using the long deprecated gnu_inline. > > Wrong indent. This code is very hard to read because of that. > > OK, will fix in v2. Thanks! > > If you figure that gee, it would be a nice if we had a builtin for > > mffsce, then please make one? :-) > > Is one use-case sufficient grounds? I can give it a shot if so. If it is useful for others, then yes please! Ideally you can make a builtin that we can also reasonably implement without support for the new insns, so we can use the builtin whenever the builtin exists. Thanks, Segher
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 3767a67eada7..862e78ac7d60 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -42,6 +42,188 @@ #include <altivec.h> #include <tmmintrin.h> +/* Rounding mode macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_ZERO 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_NEG_INF 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 + +#define _MM_FROUND_NINT \ + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_FLOOR \ + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_CEIL \ + (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_TRUNC \ + (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_RINT \ + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_NEARBYINT \ + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) + +#define _MM_FROUND_RAISE_EXC 0x00 +#define _MM_FROUND_NO_EXC 0x08 + +__inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_round_pd (__m128d __A, int __rounding) +{ + __v2df __r; + union { + double __fr; + long long __fpscr; + } __save, __tmp; + + if (__rounding & _MM_FROUND_NO_EXC) + { + /* Save enabled exceptions, and disable all exceptions. + Pre-POWER9, mffsce decodes to mffs, requiring the additional + mtfsf, below, to disable exceptions. */ + __asm__ __volatile__ ( + ".machine push; .machine \"power9\"; mffsce %0; .machine pop" + : "=f" (__save.__fr)); + __save.__fpscr &= 0xf8; + __tmp.__fpscr = __save.__fpscr; +#ifndef _ARCH_PWR9 + __tmp.__fpscr &= ~0xf8; + __builtin_mtfsf (0x01, __tmp.__fr); +#endif + } + + switch (__rounding) + { + case _MM_FROUND_TO_NEAREST_INT: + __tmp.__fr = __builtin_mffsl (); + __attribute__((fallthrough)); + case _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC: + { + __builtin_set_fpscr_rn (0b00); + __r = vec_rint ((__v2df) __A); + __builtin_set_fpscr_rn (__tmp.__fpscr); + } + break; + case _MM_FROUND_TO_NEG_INF: + case _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC: + __r = vec_floor ((__v2df) __A); + break; + case _MM_FROUND_TO_POS_INF: + case _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC: + __r = vec_ceil ((__v2df) __A); + break; + case _MM_FROUND_TO_ZERO: + case _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC: + __r = vec_trunc ((__v2df) __A); + break; + case _MM_FROUND_CUR_DIRECTION: + __r = vec_rint ((__v2df) __A); + break; + } + if (__rounding & _MM_FROUND_NO_EXC) + { + /* Restore enabled exceptions. */ + __tmp.__fr = __builtin_mffsl (); + __tmp.__fpscr |= __save.__fpscr & 0xf8; + __builtin_mtfsf (0x01, __tmp.__fr); + } + return (__m128d) __r; +} + +__inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_round_sd (__m128d __A, __m128d __B, int __rounding) +{ + __B = _mm_round_pd (__B, __rounding); + __v2df __r = { ((__v2df)__B)[0], ((__v2df) __A)[1] }; + return (__m128d) __r; +} + +__inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_round_ps (__m128 __A, int __rounding) +{ + __v4sf __r; + union { + double __fr; + long long __fpscr; + } __save, __tmp; + + if (__rounding & _MM_FROUND_NO_EXC) + { + /* Save enabled exceptions, and disable all exceptions. + Pre-POWER9, mffsce decodes to mffs, requiring the additional + mtfsf, below, to disable exceptions. */ + __asm__ __volatile__ ( + ".machine push; .machine \"power9\"; mffsce %0; .machine pop" + : "=f" (__save.__fr)); + __save.__fpscr &= 0xf8; + __tmp.__fpscr = __save.__fpscr; +#ifndef _ARCH_PWR9 + __tmp.__fpscr &= ~0xf8; + __builtin_mtfsf (0x01, __tmp.__fr); +#endif + } + + switch (__rounding) + { + case _MM_FROUND_TO_NEAREST_INT: + __tmp.__fr = __builtin_mffsl (); + __attribute__((fallthrough)); + case _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC: + { + __builtin_set_fpscr_rn (0b00); + __r = vec_rint ((__v4sf) __A); + __builtin_set_fpscr_rn (__tmp.__fpscr); + } + break; + case _MM_FROUND_TO_NEG_INF: + case _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC: + __r = vec_floor ((__v4sf) __A); + break; + case _MM_FROUND_TO_POS_INF: + case _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC: + __r = vec_ceil ((__v4sf) __A); + break; + case _MM_FROUND_TO_ZERO: + case _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC: + __r = vec_trunc ((__v4sf) __A); + break; + case _MM_FROUND_CUR_DIRECTION: + __r = vec_rint ((__v4sf) __A); + break; + } + if (__rounding & _MM_FROUND_NO_EXC) + { + /* Restore enabled exceptions. */ + __tmp.__fr = __builtin_mffsl (); + __tmp.__fpscr |= __save.__fpscr & 0xf8; + __builtin_mtfsf (0x01, __tmp.__fr); + } + return (__m128) __r; +} + +__inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_round_ss (__m128 __A, __m128 __B, int __rounding) +{ + __B = _mm_round_ps (__B, __rounding); + __v4sf __r = (__v4sf) __A; + __r[0] = ((__v4sf)__B)[0]; + return (__m128) __r; +} + +#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) +#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) + +#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) +#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) + +#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) +#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) + +#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) +#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) + extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi8 (__m128i const __A, int const __D, int const __N) { @@ -232,70 +414,6 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) return any_ones * any_zeros; } -__inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ceil_pd (__m128d __A) -{ - return (__m128d) vec_ceil ((__v2df) __A); -} - -__inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ceil_sd (__m128d __A, __m128d __B) -{ - __v2df __r = vec_ceil ((__v2df) __B); - __r[1] = ((__v2df) __A)[1]; - return (__m128d) __r; -} - -__inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_floor_pd (__m128d __A) -{ - return (__m128d) vec_floor ((__v2df) __A); -} - -__inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_floor_sd (__m128d __A, __m128d __B) -{ - __v2df __r = vec_floor ((__v2df) __B); - __r[1] = ((__v2df) __A)[1]; - return (__m128d) __r; -} - -__inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ceil_ps (__m128 __A) -{ - return (__m128) vec_ceil ((__v4sf) __A); -} - -__inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ceil_ss (__m128 __A, __m128 __B) -{ - __v4sf __r = (__v4sf) __A; - __r[0] = __builtin_ceil (((__v4sf) __B)[0]); - return __r; -} - -__inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_floor_ps (__m128 __A) -{ - return (__m128) vec_floor ((__v4sf) __A); -} - -__inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_floor_ss (__m128 __A, __m128 __B) -{ - __v4sf __r = (__v4sf) __A; - __r[0] = __builtin_floor (((__v4sf) __B)[0]); - return __r; -} - /* Return horizontal packed word minimum and its index in bits [15:0] and bits [18:16] respectively. */ __inline __m128i diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h new file mode 100644 index 000000000000..de6cbf7be438 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h @@ -0,0 +1,81 @@ +#include <smmintrin.h> +#include <fenv.h> +#include "sse4_1-check.h" + +#define DIM(a) (sizeof (a) / sizeof (a)[0]) + +static int roundings[] = + { + _MM_FROUND_TO_NEAREST_INT, + _MM_FROUND_TO_NEG_INF, + _MM_FROUND_TO_POS_INF, + _MM_FROUND_TO_ZERO, + _MM_FROUND_CUR_DIRECTION + }; + +static int modes[] = + { + FE_TONEAREST, + FE_UPWARD, + FE_DOWNWARD, + FE_TOWARDZERO + }; + +static void +TEST (void) +{ + int i, j, ri, mi, round_save; + + round_save = fegetround (); + for (mi = 0; mi < DIM (modes); mi++) { + fesetround (modes[mi]); + for (i = 0; i < DIM (data); i++) { + for (ri = 0; ri < DIM (roundings); ri++) { + union value guess; + union value *current_answers = answers[ri]; + switch ( roundings[ri] ) { + case _MM_FROUND_TO_NEAREST_INT: + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, + _MM_FROUND_TO_NEAREST_INT); + break; + case _MM_FROUND_TO_NEG_INF: + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, + _MM_FROUND_TO_NEG_INF); + break; + case _MM_FROUND_TO_POS_INF: + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, + _MM_FROUND_TO_POS_INF); + break; + case _MM_FROUND_TO_ZERO: + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, + _MM_FROUND_TO_ZERO); + break; + case _MM_FROUND_CUR_DIRECTION: + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, + _MM_FROUND_CUR_DIRECTION); + switch ( modes[mi] ) { + case FE_TONEAREST: + current_answers = answers_NEAREST_INT; + break; + case FE_UPWARD: + current_answers = answers_POS_INF; + break; + case FE_DOWNWARD: + current_answers = answers_NEG_INF; + break; + case FE_TOWARDZERO: + current_answers = answers_ZERO; + break; + } + break; + default: + abort (); + } + for (j = 0; j < DIM (guess.f); j++) + if (guess.f[j] != current_answers[i].f[j]) + abort (); + } + } + } + fesetround (round_save); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c new file mode 100644 index 000000000000..42fa453b9fd3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c @@ -0,0 +1,143 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <smmintrin.h> + +#define VEC_T __m128d +#define FP_T double + +#define ROUND_INTRIN(x, ignored, mode) _mm_round_pd (x, mode) + +#include "sse4_1-round-data.h" + +struct data2 data[] = { + { .value1 = { .f = { 0.00, 0.25 } } }, + { .value1 = { .f = { 0.50, 0.75 } } }, + + { .value1 = { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffdp+50 } } }, + { .value1 = { .f = { 0x1.ffffffffffffep+50, 0x1.fffffffffffffp+50 } } }, + { .value1 = { .f = { 0x1.0000000000000p+51, 0x1.0000000000001p+51 } } }, + { .value1 = { .f = { 0x1.0000000000002p+51, 0x1.0000000000003p+51 } } }, + + { .value1 = { .f = { 0x1.ffffffffffffep+51, 0x1.fffffffffffffp+51 } } }, + { .value1 = { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } } }, + + { .value1 = { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } } }, + { .value1 = { .f = { -0x1.fffffffffffffp+51, -0x1.ffffffffffffep+51 } } }, + + { .value1 = { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } } }, + { .value1 = { .f = { -0x1.0000000000001p+51, -0x1.0000000000000p+51 } } }, + { .value1 = { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffep+50 } } }, + { .value1 = { .f = { -0x1.ffffffffffffdp+50, -0x1.ffffffffffffcp+50 } } }, + + { .value1 = { .f = { -1.00, -0.75 } } }, + { .value1 = { .f = { -0.50, -0.25 } } } +}; + +union value answers_NEAREST_INT[] = { + { .f = { 0.00, 0.00 } }, + { .f = { 0.00, 1.00 } }, + + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, + + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, + + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, + + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, + + { .f = { -1.00, -1.00 } }, + { .f = { 0.00, 0.00 } } +}; + +union value answers_NEG_INF[] = { + { .f = { 0.00, 0.00 } }, + { .f = { 0.00, 0.00 } }, + + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, + + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, + + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, + + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, + { .f = { -0x1.0000000000002p+51, -0x1.0000000000000p+51 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, + { .f = { -0x1.0000000000000p+51, -0x1.ffffffffffffcp+50 } }, + + { .f = { -1.00, -1.00 } }, + { .f = { -1.00, -1.00 } } +}; + +union value answers_POS_INF[] = { + { .f = { 0.00, 1.00 } }, + { .f = { 1.00, 1.00 } }, + + { .f = { 0x1.ffffffffffffcp+50, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000002p+51 } }, + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, + + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, + + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, + + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, + + { .f = { -1.00, 0.00 } }, + { .f = { 0.00, 0.00 } } +}; + +union value answers_ZERO[] = { + { .f = { 0.00, 0.00 } }, + { .f = { 0.00, 0.00 } }, + + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, + + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, + + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, + + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, + + { .f = { -1.00, 0.00 } }, + { .f = { 0.00, 0.00 } } +}; + +union value *answers[] = { + answers_NEAREST_INT, + answers_NEG_INF, + answers_POS_INF, + answers_ZERO, + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ +}; + +#include "sse4_1-round3.h" diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c new file mode 100644 index 000000000000..e36d325992db --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c @@ -0,0 +1,98 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <smmintrin.h> + +#define VEC_T __m128 +#define FP_T float + +#define ROUND_INTRIN(x, ignored, mode) _mm_round_ps (x, mode) + +#include "sse4_1-round-data.h" + +struct data2 data[] = { + { .value1 = { .f = { 0.00, 0.25, 0.50, 0.75 } } }, + + { .value1 = { .f = { 0x1.fffff8p+21, 0x1.fffffap+21, + 0x1.fffffcp+21, 0x1.fffffep+21 } } }, + { .value1 = { .f = { 0x1.fffffap+22, 0x1.fffffcp+22, + 0x1.fffffep+22, 0x1.fffffep+23 } } }, + { .value1 = { .f = { -0x1.fffffep+23, -0x1.fffffep+22, + -0x1.fffffcp+22, -0x1.fffffap+22 } } }, + { .value1 = { .f = { -0x1.fffffep+21, -0x1.fffffcp+21, + -0x1.fffffap+21, -0x1.fffff8p+21 } } }, + + { .value1 = { .f = { -1.00, -0.75, -0.50, -0.25 } } } +}; + +union value answers_NEAREST_INT[] = { + { .f = { 0.00, 0.00, 0.00, 1.00 } }, + + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, + 0x1.000000p+22, 0x1.000000p+22 } }, + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, + 0x1.000000p+23, 0x1.fffffep+23 } }, + { .f = { -0x1.fffffep+23, -0x1.000000p+23, + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, + { .f = { -0x1.000000p+22, -0x1.000000p+22, + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, + + { .f = { -1.00, -1.00, 0.00, 0.00 } } +}; + +union value answers_NEG_INF[] = { + { .f = { 0.00, 0.00, 0.00, 0.00 } }, + + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, + 0x1.fffffcp+22, 0x1.fffffep+23 } }, + { .f = { -0x1.fffffep+23, -0x1.000000p+23, + -0x1.fffffcp+22, -0x1.fffffcp+22 } }, + { .f = { -0x1.000000p+22, -0x1.000000p+22, + -0x1.000000p+22, -0x1.fffff8p+21 } }, + + { .f = { -1.00, -1.00, -1.00, -1.00 } } +}; + +union value answers_POS_INF[] = { + { .f = { 0.00, 1.00, 1.00, 1.00 } }, + + { .f = { 0x1.fffff8p+21, 0x1.000000p+22, + 0x1.000000p+22, 0x1.000000p+22 } }, + { .f = { 0x1.fffffcp+22, 0x1.fffffcp+22, + 0x1.000000p+23, 0x1.fffffep+23 } }, + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, + + { .f = { -1.00, 0.00, 0.00, 0.00 } } +}; + +union value answers_ZERO[] = { + { .f = { 0.00, 0.00, 0.00, 0.00 } }, + + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, + 0x1.fffffcp+22, 0x1.fffffep+23 } }, + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, + + { .f = { -1.00, 0.00, 0.00, 0.00 } } +}; + +union value *answers[] = { + answers_NEAREST_INT, + answers_NEG_INF, + answers_POS_INF, + answers_ZERO, + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ +}; + +#include "sse4_1-round3.h" diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c new file mode 100644 index 000000000000..00205dbd19bc --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c @@ -0,0 +1,256 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ + +#include <stdio.h> +#define NO_WARN_X86_INTRINSICS 1 +#include <smmintrin.h> + +#define VEC_T __m128d +#define FP_T double + +#define ROUND_INTRIN(x, y, mode) _mm_round_sd (x, y, mode) + +#include "sse4_1-round-data.h" + +static struct data2 data[] = { + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0.00, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0.25, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0.50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0.75, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.ffffffffffffcp+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.ffffffffffffdp+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.ffffffffffffep+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffffffffffp+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000000p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000001p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000002p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000003p+51, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.ffffffffffffep+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffffffffffp+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000000p+52, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000001p+52, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000001p+52, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000000p+52, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffffffffffp+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.ffffffffffffep+51, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000004p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000002p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000001p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000000p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.ffffffffffffep+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.ffffffffffffdp+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -1.00, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0.75, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0.50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0.25, IGNORED } } } +}; + +static union value answers_NEAREST_INT[] = { + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, + + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH } }, + { .f = { -0.00, PASSTHROUGH } }, + { .f = { -0.00, PASSTHROUGH } } +}; + +static union value answers_NEG_INF[] = { + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, + + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH } } +}; + +static union value answers_POS_INF[] = { + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, + + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } } +}; + +static union value answers_ZERO[] = { + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, + + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } } +}; + +union value *answers[] = { + answers_NEAREST_INT, + answers_NEG_INF, + answers_POS_INF, + answers_ZERO, + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ +}; + +#include "sse4_1-round3.h" diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c new file mode 100644 index 000000000000..b9a8eae31f2d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c @@ -0,0 +1,208 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ + +#include <stdio.h> +#define NO_WARN_X86_INTRINSICS 1 +#include <smmintrin.h> + +#define VEC_T __m128 +#define FP_T float + +#define ROUND_INTRIN(x, y, mode) _mm_round_ss (x, y, mode) + +#include "sse4_1-round-data.h" + +static struct data2 data[] = { + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0.00, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0.25, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0.50, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0.75, IGNORED, IGNORED, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -1.00, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0.75, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0.50, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0.25, IGNORED, IGNORED, IGNORED } } } +}; + +static union value answers_NEAREST_INT[] = { + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } +}; + +static union value answers_NEG_INF[] = { + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } +}; + +static union value answers_POS_INF[] = { + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } +}; + +static union value answers_ZERO[] = { + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } +}; + +union value *answers[] = { + answers_NEAREST_INT, + answers_NEG_INF, + answers_POS_INF, + answers_ZERO, + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ +}; + +#include "sse4_1-round3.h"