===================================================================
@@ -272,6 +272,15 @@
UNSPECV_SPLIT_STACK_RETURN
])
+;; Constants to represent rounding modes in the ROUND instruction
+(define_constants
+ [(ROUND_FLOOR 0x1)
+ (ROUND_CEIL 0x2)
+ (ROUND_TRUNC 0x3)
+ (ROUND_MXCSR 0x4)
+ (ROUND_NO_EXC 0x8)
+ ])
+
;; Constants to represent pcomtrue/pcomfalse variants
(define_constants
[(PCOM_FALSE 0)
@@ -14573,7 +14582,7 @@
FAIL;
if (TARGET_ROUND)
emit_insn (gen_sse4_1_round<mode>2
- (operands[0], operands[1], GEN_INT (0x04)));
+ (operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
else
ix86_expand_rint (operand0, operand1);
}
@@ -14819,7 +14828,7 @@
FAIL;
if (TARGET_ROUND)
emit_insn (gen_sse4_1_round<mode>2
- (operands[0], operands[1], GEN_INT (0x01)));
+ (operands[0], operands[1], GEN_INT (ROUND_FLOOR)));
else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_floorceil (operand0, operand1, true);
else
@@ -15074,7 +15083,7 @@
{
if (TARGET_ROUND)
emit_insn (gen_sse4_1_round<mode>2
- (operands[0], operands[1], GEN_INT (0x02)));
+ (operands[0], operands[1], GEN_INT (ROUND_CEIL)));
else if (optimize_insn_for_size_p ())
FAIL;
else if (TARGET_64BIT || (<MODE>mode != DFmode))
@@ -15329,7 +15338,7 @@
{
if (TARGET_ROUND)
emit_insn (gen_sse4_1_round<mode>2
- (operands[0], operands[1], GEN_INT (0x03)));
+ (operands[0], operands[1], GEN_INT (ROUND_TRUNC)));
else if (optimize_insn_for_size_p ())
FAIL;
else if (TARGET_64BIT || (<MODE>mode != DFmode))
===================================================================
@@ -377,6 +377,11 @@ DEF_FUNCTION_TYPE (V16QI, V16QI, INT, V1
DEF_FUNCTION_TYPE (V8QI, QI, QI, QI, QI, QI, QI, QI, QI)
+DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V8SF_FTYPE_V8SF, ROUND)
+
DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DF_V2DF, PTEST)
DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DI_V2DI, PTEST)
DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V4DF_V4DF, PTEST)
===================================================================
@@ -23916,6 +23916,15 @@ enum ix86_builtins
IX86_BUILTIN_ROUNDSD,
IX86_BUILTIN_ROUNDSS,
+ IX86_BUILTIN_FLOORPD,
+ IX86_BUILTIN_CEILPD,
+ IX86_BUILTIN_TRUNCPD,
+ IX86_BUILTIN_RINTPD,
+ IX86_BUILTIN_FLOORPS,
+ IX86_BUILTIN_CEILPS,
+ IX86_BUILTIN_TRUNCPS,
+ IX86_BUILTIN_RINTPS,
+
IX86_BUILTIN_PTESTZ,
IX86_BUILTIN_PTESTC,
IX86_BUILTIN_PTESTNZC,
@@ -24083,6 +24092,15 @@ enum ix86_builtins
IX86_BUILTIN_ROUNDPD256,
IX86_BUILTIN_ROUNDPS256,
+ IX86_BUILTIN_FLOORPD256,
+ IX86_BUILTIN_CEILPD256,
+ IX86_BUILTIN_TRUNCPD256,
+ IX86_BUILTIN_RINTPD256,
+ IX86_BUILTIN_FLOORPS256,
+ IX86_BUILTIN_CEILPS256,
+ IX86_BUILTIN_TRUNCPS256,
+ IX86_BUILTIN_RINTPS256,
+
IX86_BUILTIN_UNPCKHPD256,
IX86_BUILTIN_UNPCKLPD256,
IX86_BUILTIN_UNPCKHPS256,
@@ -25105,6 +25123,16 @@ static const struct builtin_description
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
+
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
+
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
@@ -25217,6 +25245,16 @@ static const struct builtin_description
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
+
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
@@ -26216,6 +26254,39 @@ ix86_expand_sse_comi (const struct built
return SUBREG_REG (target);
}
+/* Subroutine of ix86_expand_args_builtin to take care of round insns. */
+
+static rtx
+ix86_expand_sse_round (const struct builtin_description *d, tree exp,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ rtx op1, op0 = expand_normal (arg0);
+ enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
+
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || !insn_data[d->icode].operand[0].predicate (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !insn_data[d->icode].operand[0].predicate (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+
+ op1 = GEN_INT (d->comparison);
+
+ pat = GEN_FCN (d->icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
static rtx
@@ -26485,6 +26556,11 @@ ix86_expand_args_builtin (const struct b
switch ((enum ix86_builtin_func_type) d->flag)
{
+ case V2DF_FTYPE_V2DF_ROUND:
+ case V4DF_FTYPE_V4DF_ROUND:
+ case V4SF_FTYPE_V4SF_ROUND:
+ case V8SF_FTYPE_V8SF_ROUND:
+ return ix86_expand_sse_round (d, exp, target);
case INT_FTYPE_V8SF_V8SF_PTEST:
case INT_FTYPE_V4DI_V4DI_PTEST:
case INT_FTYPE_V4DF_V4DF_PTEST:
@@ -27581,6 +27657,118 @@ ix86_builtin_vectorized_function (tree f
}
break;
+ case BUILT_IN_FLOOR:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math)
+ break;
+
+ if (out_mode == DFmode && in_mode == DFmode)
+ {
+ if (out_n == 2 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_FLOORPD];
+ else if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_FLOORPD256];
+ }
+ break;
+
+ case BUILT_IN_FLOORF:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math)
+ break;
+
+ if (out_mode == SFmode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_FLOORPS];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_FLOORPS256];
+ }
+ break;
+
+ case BUILT_IN_CEIL:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math)
+ break;
+
+ if (out_mode == DFmode && in_mode == DFmode)
+ {
+ if (out_n == 2 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_CEILPD];
+ else if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_CEILPD256];
+ }
+ break;
+
+ case BUILT_IN_CEILF:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math)
+ break;
+
+ if (out_mode == SFmode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_CEILPS];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_CEILPS256];
+ }
+ break;
+
+ case BUILT_IN_TRUNC:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math)
+ break;
+
+ if (out_mode == DFmode && in_mode == DFmode)
+ {
+ if (out_n == 2 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_TRUNCPD];
+ else if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
+ }
+ break;
+
+ case BUILT_IN_TRUNCF:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math)
+ break;
+
+ if (out_mode == SFmode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_TRUNCPS];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
+ }
+ break;
+
+ case BUILT_IN_RINT:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math)
+ break;
+
+ if (out_mode == DFmode && in_mode == DFmode)
+ {
+ if (out_n == 2 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_RINTPD];
+ else if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_RINTPD256];
+ }
+ break;
+
+ case BUILT_IN_RINTF:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math)
+ break;
+
+ if (out_mode == SFmode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_RINTPS];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_RINTPS256];
+ }
+ break;
+
case BUILT_IN_FMA:
if (out_mode == DFmode && in_mode == DFmode)
{
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "sse4_1-check.h"
+
+#include <math.h>
+
+extern double floor (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+sse4_1_test (void)
+{
+ double a[NUM];
+ double r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = floor (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != floor (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
+/* { dg-require-effective-target avx } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "avx-check.h"
+
+#include <math.h>
+
+extern double rint (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx_test (void)
+{
+ double a[NUM];
+ double r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = rint (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != rint (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
+/* { dg-require-effective-target avx } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "avx-check.h"
+
+#include <math.h>
+
+extern float rintf (float);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+ int i, sign = 1;
+ float f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx_test (void)
+{
+ float a[NUM];
+ float r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = rintf (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != rintf (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
+/* { dg-require-effective-target avx } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "avx-check.h"
+
+#include <math.h>
+
+extern double trunc (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx_test (void)
+{
+ double a[NUM];
+ double r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = trunc (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != trunc (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
+/* { dg-require-effective-target avx } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "avx-check.h"
+
+#include <math.h>
+
+extern float ceilf (float);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+ int i, sign = 1;
+ float f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx_test (void)
+{
+ float a[NUM];
+ float r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = ceilf (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != ceilf (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "sse4_1-check.h"
+
+#include <math.h>
+
+extern float rintf (float);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+ int i, sign = 1;
+ float f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+sse4_1_test (void)
+{
+ float a[NUM];
+ float r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = rintf (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != rintf (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "sse4_1-check.h"
+
+#include <math.h>
+
+extern double trunc (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+sse4_1_test (void)
+{
+ double a[NUM];
+ double r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = trunc (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != trunc (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "sse4_1-check.h"
+
+#include <math.h>
+
+extern double ceil (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+sse4_1_test (void)
+{
+ double a[NUM];
+ double r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = ceil (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != ceil (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "sse4_1-check.h"
+
+#include <math.h>
+
+extern float ceilf (float);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+ int i, sign = 1;
+ float f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+sse4_1_test (void)
+{
+ float a[NUM];
+ float r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = ceilf (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != ceilf (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "sse4_1-check.h"
+
+#include <math.h>
+
+extern float truncf (float);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+ int i, sign = 1;
+ float f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+sse4_1_test (void)
+{
+ float a[NUM];
+ float r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = truncf (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != truncf (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "sse4_1-check.h"
+
+#include <math.h>
+
+extern float floorf (float);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+ int i, sign = 1;
+ float f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+sse4_1_test (void)
+{
+ float a[NUM];
+ float r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = floorf (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != floorf (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
+/* { dg-require-effective-target avx } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "avx-check.h"
+
+#include <math.h>
+
+extern double ceil (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx_test (void)
+{
+ double a[NUM];
+ double r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = ceil (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != ceil (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "sse4_1-check.h"
+
+#include <math.h>
+
+extern double rint (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+sse4_1_test (void)
+{
+ double a[NUM];
+ double r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = rint (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != rint (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
+/* { dg-require-effective-target avx } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "avx-check.h"
+
+#include <math.h>
+
+extern double floor (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx_test (void)
+{
+ double a[NUM];
+ double r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = floor (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != floor (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
+/* { dg-require-effective-target avx } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "avx-check.h"
+
+#include <math.h>
+
+extern float truncf (float);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+ int i, sign = 1;
+ float f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx_test (void)
+{
+ float a[NUM];
+ float r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = truncf (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != truncf (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
+/* { dg-require-effective-target avx } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "avx-check.h"
+
+#include <math.h>
+
+extern float floorf (float);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+ int i, sign = 1;
+ float f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx_test (void)
+{
+ float a[NUM];
+ float r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = floorf (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != floorf (a[i]))
+ abort();
+}