[RFC,i386] : Vectorize calls to floor, ceil, trunc and rint functions.

Message ID	AANLkTik9iUESt+OOVHmTYCmS_EfF6An2CCqnS_0_BX9k@mail.gmail.com
State	New
Headers	show Return-Path: <gcc-patches-return-286845-incoming=patchwork.ozlabs.org@gcc.gnu.org> MIME-Version: 1.0 In-Reply-To: <AANLkTimV3Y56CDMdYwwGKM3-4_uwhiyAcuo3j3e8FUis@mail.gmail.com> References: <AANLkTimV3Y56CDMdYwwGKM3-4_uwhiyAcuo3j3e8FUis@mail.gmail.com> Date: Mon, 14 Mar 2011 19:59:30 +0100 Message-ID: <AANLkTik9iUESt+OOVHmTYCmS_EfF6An2CCqnS_0_BX9k@mail.gmail.com> Subject: Re: [RFC PATCH, i386]: Vectorize calls to floor, ceil, trunc and rint functions. From: Uros Bizjak <ubizjak@gmail.com> To: gcc-patches@gcc.gnu.org Content-Type: multipart/mixed; boundary=000e0cd2e0425a6dac049e75e959 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 170945) +++ config/i386/i386.md (working copy) @@ -272,6 +272,15 @@ UNSPECV_SPLIT_STACK_RETURN ]) +;; Constants to represent rounding modes in the ROUND instruction +(define_constants + [(ROUND_FLOOR 0x1) + (ROUND_CEIL 0x2) + (ROUND_TRUNC 0x3) + (ROUND_MXCSR 0x4) + (ROUND_NO_EXC 0x8) + ]) + ;; Constants to represent pcomtrue/pcomfalse variants (define_constants [(PCOM_FALSE 0) @@ -14573,7 +14582,7 @@ FAIL; if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 - (operands[0], operands[1], GEN_INT (0x04))); + (operands[0], operands[1], GEN_INT (ROUND_MXCSR))); else ix86_expand_rint (operand0, operand1); } @@ -14819,7 +14828,7 @@ FAIL; if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 - (operands[0], operands[1], GEN_INT (0x01))); + (operands[0], operands[1], GEN_INT (ROUND_FLOOR))); else if (TARGET_64BIT || (<MODE>mode != DFmode)) ix86_expand_floorceil (operand0, operand1, true); else @@ -15074,7 +15083,7 @@ { if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 - (operands[0], operands[1], GEN_INT (0x02))); + (operands[0], operands[1], GEN_INT (ROUND_CEIL))); else if (optimize_insn_for_size_p ()) FAIL; else if (TARGET_64BIT || (<MODE>mode != DFmode)) @@ -15329,7 +15338,7 @@ { if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 - (operands[0], operands[1], GEN_INT (0x03))); + (operands[0], operands[1], GEN_INT (ROUND_TRUNC))); else if (optimize_insn_for_size_p ()) FAIL; else if (TARGET_64BIT || (<MODE>mode != DFmode)) Index: config/i386/i386-builtin-types.def =================================================================== --- config/i386/i386-builtin-types.def (revision 170945) +++ config/i386/i386-builtin-types.def (working copy) @@ -377,6 +377,11 @@ DEF_FUNCTION_TYPE (V16QI, V16QI, INT, V1 DEF_FUNCTION_TYPE (V8QI, QI, QI, QI, QI, QI, QI, QI, QI) +DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V8SF_FTYPE_V8SF, ROUND) + DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DF_V2DF, PTEST) DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DI_V2DI, PTEST) DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V4DF_V4DF, PTEST) Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 170945) +++ config/i386/i386.c (working copy) @@ -23916,6 +23916,15 @@ enum ix86_builtins IX86_BUILTIN_ROUNDSD, IX86_BUILTIN_ROUNDSS, + IX86_BUILTIN_FLOORPD, + IX86_BUILTIN_CEILPD, + IX86_BUILTIN_TRUNCPD, + IX86_BUILTIN_RINTPD, + IX86_BUILTIN_FLOORPS, + IX86_BUILTIN_CEILPS, + IX86_BUILTIN_TRUNCPS, + IX86_BUILTIN_RINTPS, + IX86_BUILTIN_PTESTZ, IX86_BUILTIN_PTESTC, IX86_BUILTIN_PTESTNZC, @@ -24083,6 +24092,15 @@ enum ix86_builtins IX86_BUILTIN_ROUNDPD256, IX86_BUILTIN_ROUNDPS256, + IX86_BUILTIN_FLOORPD256, + IX86_BUILTIN_CEILPD256, + IX86_BUILTIN_TRUNCPD256, + IX86_BUILTIN_RINTPD256, + IX86_BUILTIN_FLOORPS256, + IX86_BUILTIN_CEILPS256, + IX86_BUILTIN_TRUNCPS256, + IX86_BUILTIN_RINTPS256, + IX86_BUILTIN_UNPCKHPD256, IX86_BUILTIN_UNPCKLPD256, IX86_BUILTIN_UNPCKHPS256, @@ -25105,6 +25123,16 @@ static const struct builtin_description { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND }, + + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, @@ -25217,6 +25245,16 @@ static const struct builtin_description { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, @@ -26216,6 +26254,39 @@ ix86_expand_sse_comi (const struct built return SUBREG_REG (target); } +/* Subroutine of ix86_expand_args_builtin to take care of round insns. */ + +static rtx +ix86_expand_sse_round (const struct builtin_description *d, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op1, op0 = expand_normal (arg0); + enum machine_mode tmode = insn_data[d->icode].operand[0].mode; + enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; + + if (optimize || target == 0 + || GET_MODE (target) != tmode + || !insn_data[d->icode].operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if ((optimize && !register_operand (op0, mode0)) + || !insn_data[d->icode].operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + op1 = GEN_INT (d->comparison); + + pat = GEN_FCN (d->icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + /* Subroutine of ix86_expand_builtin to take care of ptest insns. */ static rtx @@ -26485,6 +26556,11 @@ ix86_expand_args_builtin (const struct b switch ((enum ix86_builtin_func_type) d->flag) { + case V2DF_FTYPE_V2DF_ROUND: + case V4DF_FTYPE_V4DF_ROUND: + case V4SF_FTYPE_V4SF_ROUND: + case V8SF_FTYPE_V8SF_ROUND: + return ix86_expand_sse_round (d, exp, target); case INT_FTYPE_V8SF_V8SF_PTEST: case INT_FTYPE_V4DI_V4DI_PTEST: case INT_FTYPE_V4DF_V4DF_PTEST: @@ -27581,6 +27657,118 @@ ix86_builtin_vectorized_function (tree f } break; + case BUILT_IN_FLOOR: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math) + break; + + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_builtins[IX86_BUILTIN_FLOORPD]; + else if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_FLOORPD256]; + } + break; + + case BUILT_IN_FLOORF: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math) + break; + + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_FLOORPS]; + else if (out_n == 8 && in_n == 8) + return ix86_builtins[IX86_BUILTIN_FLOORPS256]; + } + break; + + case BUILT_IN_CEIL: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math) + break; + + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_builtins[IX86_BUILTIN_CEILPD]; + else if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_CEILPD256]; + } + break; + + case BUILT_IN_CEILF: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math) + break; + + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_CEILPS]; + else if (out_n == 8 && in_n == 8) + return ix86_builtins[IX86_BUILTIN_CEILPS256]; + } + break; + + case BUILT_IN_TRUNC: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math) + break; + + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_builtins[IX86_BUILTIN_TRUNCPD]; + else if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_TRUNCPD256]; + } + break; + + case BUILT_IN_TRUNCF: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math) + break; + + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_TRUNCPS]; + else if (out_n == 8 && in_n == 8) + return ix86_builtins[IX86_BUILTIN_TRUNCPS256]; + } + break; + + case BUILT_IN_RINT: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math) + break; + + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_builtins[IX86_BUILTIN_RINTPD]; + else if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_RINTPD256]; + } + break; + + case BUILT_IN_RINTF: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math) + break; + + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_RINTPS]; + else if (out_n == 8 && in_n == 8) + return ix86_builtins[IX86_BUILTIN_RINTPS256]; + } + break; + case BUILT_IN_FMA: if (out_mode == DFmode && in_mode == DFmode) { Index: testsuite/gcc.target/i386/sse4_1-floor-vec.c =================================================================== --- testsuite/gcc.target/i386/sse4_1-floor-vec.c (revision 0) +++ testsuite/gcc.target/i386/sse4_1-floor-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "sse4_1-check.h" + +#include <math.h> + +extern double floor (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +sse4_1_test (void) +{ + double a[NUM]; + double r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = floor (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != floor (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/avx-rint-vec.c =================================================================== --- testsuite/gcc.target/i386/avx-rint-vec.c (revision 0) +++ testsuite/gcc.target/i386/avx-rint-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "avx-check.h" + +#include <math.h> + +extern double rint (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +avx_test (void) +{ + double a[NUM]; + double r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = rint (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != rint (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/avx-rintf-vec.c =================================================================== --- testsuite/gcc.target/i386/avx-rintf-vec.c (revision 0) +++ testsuite/gcc.target/i386/avx-rintf-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "avx-check.h" + +#include <math.h> + +extern float rintf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +avx_test (void) +{ + float a[NUM]; + float r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = rintf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != rintf (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/avx-trunc-vec.c =================================================================== --- testsuite/gcc.target/i386/avx-trunc-vec.c (revision 0) +++ testsuite/gcc.target/i386/avx-trunc-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "avx-check.h" + +#include <math.h> + +extern double trunc (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +avx_test (void) +{ + double a[NUM]; + double r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = trunc (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != trunc (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/avx-ceilf-vec.c =================================================================== --- testsuite/gcc.target/i386/avx-ceilf-vec.c (revision 0) +++ testsuite/gcc.target/i386/avx-ceilf-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "avx-check.h" + +#include <math.h> + +extern float ceilf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +avx_test (void) +{ + float a[NUM]; + float r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = ceilf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != ceilf (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/sse4_1-rintf-vec.c =================================================================== --- testsuite/gcc.target/i386/sse4_1-rintf-vec.c (revision 0) +++ testsuite/gcc.target/i386/sse4_1-rintf-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "sse4_1-check.h" + +#include <math.h> + +extern float rintf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +sse4_1_test (void) +{ + float a[NUM]; + float r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = rintf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != rintf (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/sse4_1-trunc-vec.c =================================================================== --- testsuite/gcc.target/i386/sse4_1-trunc-vec.c (revision 0) +++ testsuite/gcc.target/i386/sse4_1-trunc-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "sse4_1-check.h" + +#include <math.h> + +extern double trunc (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +sse4_1_test (void) +{ + double a[NUM]; + double r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = trunc (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != trunc (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/sse4_1-ceil-vec.c =================================================================== --- testsuite/gcc.target/i386/sse4_1-ceil-vec.c (revision 0) +++ testsuite/gcc.target/i386/sse4_1-ceil-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "sse4_1-check.h" + +#include <math.h> + +extern double ceil (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +sse4_1_test (void) +{ + double a[NUM]; + double r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = ceil (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != ceil (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/sse4_1-ceilf-vec.c =================================================================== --- testsuite/gcc.target/i386/sse4_1-ceilf-vec.c (revision 0) +++ testsuite/gcc.target/i386/sse4_1-ceilf-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "sse4_1-check.h" + +#include <math.h> + +extern float ceilf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +sse4_1_test (void) +{ + float a[NUM]; + float r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = ceilf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != ceilf (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/sse4_1-truncf-vec.c =================================================================== --- testsuite/gcc.target/i386/sse4_1-truncf-vec.c (revision 0) +++ testsuite/gcc.target/i386/sse4_1-truncf-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "sse4_1-check.h" + +#include <math.h> + +extern float truncf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +sse4_1_test (void) +{ + float a[NUM]; + float r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = truncf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != truncf (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/sse4_1-floorf-vec.c =================================================================== --- testsuite/gcc.target/i386/sse4_1-floorf-vec.c (revision 0) +++ testsuite/gcc.target/i386/sse4_1-floorf-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "sse4_1-check.h" + +#include <math.h> + +extern float floorf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +sse4_1_test (void) +{ + float a[NUM]; + float r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = floorf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != floorf (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/avx-ceil-vec.c =================================================================== --- testsuite/gcc.target/i386/avx-ceil-vec.c (revision 0) +++ testsuite/gcc.target/i386/avx-ceil-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "avx-check.h" + +#include <math.h> + +extern double ceil (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +avx_test (void) +{ + double a[NUM]; + double r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = ceil (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != ceil (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/sse4_1-rint-vec.c =================================================================== --- testsuite/gcc.target/i386/sse4_1-rint-vec.c (revision 0) +++ testsuite/gcc.target/i386/sse4_1-rint-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "sse4_1-check.h" + +#include <math.h> + +extern double rint (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +sse4_1_test (void) +{ + double a[NUM]; + double r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = rint (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != rint (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/avx-floor-vec.c =================================================================== --- testsuite/gcc.target/i386/avx-floor-vec.c (revision 0) +++ testsuite/gcc.target/i386/avx-floor-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "avx-check.h" + +#include <math.h> + +extern double floor (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +avx_test (void) +{ + double a[NUM]; + double r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = floor (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != floor (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/avx-truncf-vec.c =================================================================== --- testsuite/gcc.target/i386/avx-truncf-vec.c (revision 0) +++ testsuite/gcc.target/i386/avx-truncf-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "avx-check.h" + +#include <math.h> + +extern float truncf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +avx_test (void) +{ + float a[NUM]; + float r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = truncf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != truncf (a[i])) + abort(); +} Index: testsuite/gcc.target/i386/avx-floorf-vec.c =================================================================== --- testsuite/gcc.target/i386/avx-floorf-vec.c (revision 0) +++ testsuite/gcc.target/i386/avx-floorf-vec.c (revision 0) @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#include "avx-check.h" + +#include <math.h> + +extern float floorf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +avx_test (void) +{ + float a[NUM]; + float r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = floorf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != floorf (a[i])) + abort(); +}

[RFC,i386] : Vectorize calls to floor, ceil, trunc and rint functions.

Commit Message

Patch