Message ID | 20240530055219.3477674-1-lin1.hu@intel.com |
---|---|
State | New |
Headers | show |
Series | i386: Handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx | expand |
On Thu, May 30, 2024 at 1:52 PM Hu, Lin1 <lin1.hu@intel.com> wrote: > > Hi, all > > This patch aims to extend __builtin_ia32_cmp[p|s][s|d] from avx to > sse/sse2/avx, where its immediate is in range of [0, 7]. > > Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk? Ok. > > BRs, > Lin > > gcc/ChangeLog: > > * config/i386/avxintrin.h: Move cmp[p|s][s|d] to [e|x]mmintrin.h, > and move macros to xmmintrin.h > * config/i386/emmintrin.h: Add cmp[p|s]s intrins. > * config/i386/i386-builtin.def: Modify __builtin_ia32_cmp[p|s][s|d]. > * config/i386/i386-expand.cc > (ix86_expand_args_builtin): Raise error when imm is in range of > [8, 32] without avx. > * config/i386/sse.md (avx_cmp<mode>3): Modefy define_insn. > (avx_vmcmp<mode>3): Ditto. > * config/i386/xmmintrin.h (_CMP_EQ_OQ): New macro for sse/sse2. > (_CMP_LT_OS): Ditto > (_CMP_LE_OS): Ditto > (_CMP_UNORD_Q): Ditto > (_CMP_NEQ_UQ): Ditto > (_CMP_NLT_US): Ditto > (_CMP_NLE_US): Ditto > (_CMP_ORD_Q): Ditto > (_mm_cmp_ps): Move intrin from avxintrin.h to xmmintrin.h > (_mm_cmp_ss): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/sse-cmp-1.c: New test. > * gcc.target/i386/sse-cmp-2.c: Ditto. > * gcc.target/i386/sse-cmp-error-1.c: Ditto. > --- > gcc/config/i386/avxintrin.h | 56 ----------- > gcc/config/i386/emmintrin.h | 22 +++++ > gcc/config/i386/i386-builtin.def | 10 +- > gcc/config/i386/i386-expand.cc | 6 ++ > gcc/config/i386/predicates.md | 5 + > gcc/config/i386/sse.md | 42 ++++---- > gcc/config/i386/xmmintrin.h | 41 ++++++++ > gcc/testsuite/gcc.target/i386/sse-cmp-1.c | 20 ++++ > gcc/testsuite/gcc.target/i386/sse-cmp-2.c | 96 +++++++++++++++++++ > gcc/testsuite/gcc.target/i386/sse-cmp-error.c | 16 ++++ > 10 files changed, 236 insertions(+), 78 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse-cmp-error.c > > diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h > index 80214540888..ec9b9905b5f 100644 > --- a/gcc/config/i386/avxintrin.h > +++ b/gcc/config/i386/avxintrin.h > @@ -72,22 +72,6 @@ typedef double __m256d_u __attribute__ ((__vector_size__ (32), > > /* Compare predicates for scalar and packed compare intrinsics. */ > > -/* Equal (ordered, non-signaling) */ > -#define _CMP_EQ_OQ 0x00 > -/* Less-than (ordered, signaling) */ > -#define _CMP_LT_OS 0x01 > -/* Less-than-or-equal (ordered, signaling) */ > -#define _CMP_LE_OS 0x02 > -/* Unordered (non-signaling) */ > -#define _CMP_UNORD_Q 0x03 > -/* Not-equal (unordered, non-signaling) */ > -#define _CMP_NEQ_UQ 0x04 > -/* Not-less-than (unordered, signaling) */ > -#define _CMP_NLT_US 0x05 > -/* Not-less-than-or-equal (unordered, signaling) */ > -#define _CMP_NLE_US 0x06 > -/* Ordered (nonsignaling) */ > -#define _CMP_ORD_Q 0x07 > /* Equal (unordered, non-signaling) */ > #define _CMP_EQ_UQ 0x08 > /* Not-greater-than-or-equal (unordered, signaling) */ > @@ -381,18 +365,6 @@ _mm256_xor_ps (__m256 __A, __m256 __B) > } > > #ifdef __OPTIMIZE__ > -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P) > -{ > - return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P); > -} > - > -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P) > -{ > - return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P); > -} > - > extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P) > { > @@ -406,27 +378,7 @@ _mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P) > return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y, > __P); > } > - > -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P) > -{ > - return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P); > -} > - > -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P) > -{ > - return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P); > -} > #else > -#define _mm_cmp_pd(X, Y, P) \ > - ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \ > - (__v2df)(__m128d)(Y), (int)(P))) > - > -#define _mm_cmp_ps(X, Y, P) \ > - ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \ > - (__v4sf)(__m128)(Y), (int)(P))) > - > #define _mm256_cmp_pd(X, Y, P) \ > ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X), \ > (__v4df)(__m256d)(Y), (int)(P))) > @@ -434,14 +386,6 @@ _mm_cmp_ss (__m128 __X, __m128 __Y, const int __P) > #define _mm256_cmp_ps(X, Y, P) \ > ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X), \ > (__v8sf)(__m256)(Y), (int)(P))) > - > -#define _mm_cmp_sd(X, Y, P) \ > - ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \ > - (__v2df)(__m128d)(Y), (int)(P))) > - > -#define _mm_cmp_ss(X, Y, P) \ > - ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \ > - (__v4sf)(__m128)(Y), (int)(P))) > #endif > > extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h > index fa301103daf..c10fc3433af 100644 > --- a/gcc/config/i386/emmintrin.h > +++ b/gcc/config/i386/emmintrin.h > @@ -1390,6 +1390,28 @@ _mm_cmpgt_epi32 (__m128i __A, __m128i __B) > return (__m128i) ((__v4si)__A > (__v4si)__B); > } > > +#ifdef __OPTIMIZE__ > +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P) > +{ > + return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P); > +} > + > +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P) > +{ > + return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P); > +} > +#else > +#define _mm_cmp_pd(X, Y, P) \ > + ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \ > + (__v2df)(__m128d)(Y), (int)(P))) > + > +#define _mm_cmp_sd(X, Y, P) \ > + ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \ > + (__v2df)(__m128d)(Y), (int)(P))) > +#endif > + > #ifdef __OPTIMIZE__ > extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm_extract_epi16 (__m128i const __A, int const __N) > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def > index a28c48c7566..85b01592d7e 100644 > --- a/gcc/config/i386/i386-builtin.def > +++ b/gcc/config/i386/i386-builtin.def > @@ -671,6 +671,9 @@ BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpn > BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF) > BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF) > > +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) > +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) > + > BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) > BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) > BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) > @@ -827,6 +830,9 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtb128", I > BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI) > BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI ) > > +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) > +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) > + > BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) > BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI) > BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) > @@ -1077,10 +1083,6 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvp > BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT) > BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT) > BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT) > -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) > -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) > -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) > -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) > BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT) > BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT) > BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT) > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index 215a998fc26..537f349ed30 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -11833,6 +11833,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, > case CODE_FOR_avx_vmcmpv4sf3: > case CODE_FOR_avx_cmpv2df3: > case CODE_FOR_avx_cmpv4sf3: > + if (CONST_INT_P (op) && IN_RANGE (INTVAL (op), 8, 31)) > + { > + error ("'%s' needs isa option %s", d->name, "-mavx"); > + return const0_rtx; > + } > + /* FALLTHRU */ > case CODE_FOR_avx_cmpv4df3: > case CODE_FOR_avx_cmpv8sf3: > case CODE_FOR_avx512f_cmpv8df3_mask: > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > index 2a97776fc32..29e26f56dde 100644 > --- a/gcc/config/i386/predicates.md > +++ b/gcc/config/i386/predicates.md > @@ -1056,6 +1056,11 @@ (define_predicate "const_28_to_31_operand" > (and (match_code "const_int") > (match_test "IN_RANGE (INTVAL (op), 28, 31)"))) > > +(define_predicate "cmpps_imm_operand" > + (ior (match_operand 0 "const_0_to_7_operand") > + (and (match_test "TARGET_AVX") > + (match_operand 0 "const_0_to_31_operand")))) > + > ;; True if this is a constant appropriate for an increment or decrement. > (define_predicate "incdec_operand" > (match_code "const_int") > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 0f4fbcb2c5d..542d032ff63 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -3641,18 +3641,21 @@ (define_expand "reduc_<code>_scal_<mode>" > ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > > (define_insn "avx_cmp<mode>3" > - [(set (match_operand:VF_128_256 0 "register_operand" "=x") > + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") > (unspec:VF_128_256 > - [(match_operand:VF_128_256 1 "register_operand" "x") > - (match_operand:VF_128_256 2 "nonimmediate_operand" "xjm") > - (match_operand:SI 3 "const_0_to_31_operand")] > + [(match_operand:VF_128_256 1 "register_operand" "0,x") > + (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xjm") > + (match_operand:SI 3 "cmpps_imm_operand")] > UNSPEC_PCMP))] > - "TARGET_AVX" > - "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" > - [(set_attr "type" "ssecmp") > - (set_attr "addr" "gpr16") > + "TARGET_SSE" > + "@ > + cmp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} > + vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" > + [(set_attr "isa" "noavx,avx") > + (set_attr "type" "ssecmp") > + (set_attr "addr" "*,gpr16") > (set_attr "length_immediate" "1") > - (set_attr "prefix" "vex") > + (set_attr "prefix" "orig,vex") > (set_attr "mode" "<MODE>")]) > > (define_insn_and_split "*avx_cmp<mode>3_1" > @@ -3852,21 +3855,24 @@ (define_insn_and_split "*avx_cmp<mode>3_ltint_not" > }) > > (define_insn "avx_vmcmp<mode>3" > - [(set (match_operand:VF_128 0 "register_operand" "=x") > + [(set (match_operand:VF_128 0 "register_operand" "=x,x") > (vec_merge:VF_128 > (unspec:VF_128 > - [(match_operand:VF_128 1 "register_operand" "x") > - (match_operand:VF_128 2 "nonimmediate_operand" "xjm") > - (match_operand:SI 3 "const_0_to_31_operand")] > + [(match_operand:VF_128 1 "register_operand" "0,x") > + (match_operand:VF_128 2 "nonimmediate_operand" "xm,xjm") > + (match_operand:SI 3 "cmpps_imm_operand")] > UNSPEC_PCMP) > (match_dup 1) > (const_int 1)))] > - "TARGET_AVX" > - "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}" > - [(set_attr "type" "ssecmp") > - (set_attr "addr" "gpr16") > + "TARGET_SSE" > + "@ > + cmp<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3} > + vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}" > + [(set_attr "isa" "noavx,avx") > + (set_attr "type" "ssecmp") > + (set_attr "addr" "*,gpr16") > (set_attr "length_immediate" "1") > - (set_attr "prefix" "vex") > + (set_attr "prefix" "orig,vex") > (set_attr "mode" "<ssescalarmode>")]) > > (define_insn "*<sse>_maskcmp<mode>3_comm" > diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h > index 87515ecb218..4b6266c5cde 100644 > --- a/gcc/config/i386/xmmintrin.h > +++ b/gcc/config/i386/xmmintrin.h > @@ -108,6 +108,25 @@ typedef float __v4sf __attribute__ ((__vector_size__ (16))); > #define _MM_FLUSH_ZERO_ON 0x8000 > #define _MM_FLUSH_ZERO_OFF 0x0000 > > +/* Compare predicates for scalar and packed compare intrinsics. */ > + > +/* Equal (ordered, non-signaling) */ > +#define _CMP_EQ_OQ 0x00 > +/* Less-than (ordered, signaling) */ > +#define _CMP_LT_OS 0x01 > +/* Less-than-or-equal (ordered, signaling) */ > +#define _CMP_LE_OS 0x02 > +/* Unordered (non-signaling) */ > +#define _CMP_UNORD_Q 0x03 > +/* Not-equal (unordered, non-signaling) */ > +#define _CMP_NEQ_UQ 0x04 > +/* Not-less-than (unordered, signaling) */ > +#define _CMP_NLT_US 0x05 > +/* Not-less-than-or-equal (unordered, signaling) */ > +#define _CMP_NLE_US 0x06 > +/* Ordered (nonsignaling) */ > +#define _CMP_ORD_Q 0x07 > + > /* Create an undefined vector. */ > extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm_undefined_ps (void) > @@ -434,6 +453,28 @@ _mm_cmpunord_ps (__m128 __A, __m128 __B) > return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B); > } > > +#ifdef __OPTIMIZE__ > +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P) > +{ > + return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P); > +} > + > +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P) > +{ > + return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P); > +} > +#else > +#define _mm_cmp_ps(X, Y, P) \ > + ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \ > + (__v4sf)(__m128)(Y), (int)(P))) > + > +#define _mm_cmp_ss(X, Y, P) \ > + ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \ > + (__v4sf)(__m128)(Y), (int)(P))) > +#endif > + > /* Compare the lower SPFP values of A and B and return 1 if true > and 0 if false. */ > > diff --git a/gcc/testsuite/gcc.target/i386/sse-cmp-1.c b/gcc/testsuite/gcc.target/i386/sse-cmp-1.c > new file mode 100644 > index 00000000000..eff90d4790e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse-cmp-1.c > @@ -0,0 +1,20 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O0 -msse2" } */ > +/* { dg-final { scan-assembler-times "cmpsd" 1 } } */ > +/* { dg-final { scan-assembler-times "cmpss" 1 } } */ > +/* { dg-final { scan-assembler-times "cmppd" 1 } } */ > +/* { dg-final { scan-assembler-times "cmpps" 1 } } */ > + > +#include <x86intrin.h> > + > +__m128 a1, a2, a3, a4, a5, a6; > +__m128d d1, d2, d3, d4, d5, d6; > + > +void > +test (void) > +{ > + d1 = _mm_cmp_sd (d2, d3, 1); > + a1 = _mm_cmp_ss (a2, a3, 2); > + d1 = _mm_cmp_pd (d2, d3, 3); > + a1 = _mm_cmp_ps (a2, a3, 4); > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse-cmp-2.c b/gcc/testsuite/gcc.target/i386/sse-cmp-2.c > new file mode 100644 > index 00000000000..77c05c484b6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse-cmp-2.c > @@ -0,0 +1,96 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target sse2 } */ > +/* { dg-require-effective-target c99_runtime } */ > +/* { dg-options "-O2 -msse2 -std=c99" } */ > + > +#include "sse2-check.h" > +#include "emmintrin.h" > +#include <math.h> > + > +double sd1[2]={2134.3343,6678.346}; > +double sd2[2]={41124.234,6678.346}; > + > +float ss1[4]={2134.3343,6678.346,453.345635,54646.464356}; > +float ss2[4]={41124.234,6678.346,8653.65635,856.43576}; > + > +union > +{ > + double x[2]; > + long long a[2]; > +}dd, ed; > + > +union > +{ > + float x[4]; > + int a[4]; > +}ds, es; > + > +#define CHECK(INTSIZE, SIZE, NUNITS, SUFFIX) \ > +void check##SUFFIX(unsigned imm, char *id) \ > +{ \ > + if(checkV##INTSIZE(d##SIZE.a, e##SIZE.a, NUNITS)){ \ > + printf("mm_cmp_" #SUFFIX "(%s: 0x%x) FAILED\n", id, imm);\ > + abort(); \ > + } \ > +} > + > +CHECK (l, d, 2, pd) > +CHECK (i, s, 4, ps) > +CHECK (l, d, 2, sd) > +CHECK (i, s, 4, ss) > + > +#define CMP(imm, rel, SIZE, NUNITS, SUFFIX) \ > + for (i = 0; i < NUNITS; i++) e##SIZE.a[i] = rel ? -1 : 0; \ > + source##SIZE##1 = _mm_loadu_p##SIZE(s##SIZE##1); \ > + source##SIZE##2 = _mm_loadu_p##SIZE(s##SIZE##2); \ > + dest##SIZE = _mm_cmp_##SUFFIX(source##SIZE##1, source##SIZE##2, imm); \ > + _mm_storeu_p##SIZE(d##SIZE.x, dest##SIZE); \ > + check##SUFFIX(imm, "" #imm ""); > + > +static void > +sse2_test () > +{ > + __m128d sourced1, sourced2, destd; > + __m128 sources1, sources2, dests; > + int i; > + > + CMP(_CMP_EQ_OQ, !isunordered(sd1[i], sd2[i]) && sd1[i] == sd2[i], d, 2, pd); > + CMP(_CMP_LT_OS, !isunordered(sd1[i], sd2[i]) && sd1[i] < sd2[i], d, 2, pd); > + CMP(_CMP_LE_OS, !isunordered(sd1[i], sd2[i]) && sd1[i] <= sd2[i], d, 2, pd); > + CMP(_CMP_UNORD_Q, isunordered(sd1[i], sd2[i]), d, 2, pd); > + CMP(_CMP_NEQ_UQ, isunordered(sd1[i], sd2[i]) || sd1[i] != sd2[i], d, 2, pd); > + CMP(_CMP_NLT_US, isunordered(sd1[i], sd2[i]) || sd1[i] >= sd2[i], d, 2, pd); > + CMP(_CMP_NLE_US, isunordered(sd1[i], sd2[i]) || sd1[i] > sd2[i], d, 2, pd); > + CMP(_CMP_ORD_Q, !isunordered(sd1[i], sd2[i]), d, 2, pd); > + > + CMP(_CMP_EQ_OQ, !isunordered(ss1[i], ss2[i]) && ss1[i] == ss2[i], s, 4, ps); > + CMP(_CMP_LT_OS, !isunordered(ss1[i], ss2[i]) && ss1[i] < ss2[i], s, 4, ps); > + CMP(_CMP_LE_OS, !isunordered(ss1[i], ss2[i]) && ss1[i] <= ss2[i], s, 4, ps); > + CMP(_CMP_UNORD_Q, isunordered(ss1[i], ss2[i]), s, 4, ps); > + CMP(_CMP_NEQ_UQ, isunordered(ss1[i], ss2[i]) || ss1[i] != ss2[i], s, 4, ps); > + CMP(_CMP_NLT_US, isunordered(ss1[i], ss2[i]) || ss1[i] >= ss2[i], s, 4, ps); > + CMP(_CMP_NLE_US, isunordered(ss1[i], ss2[i]) || ss1[i] > ss2[i], s, 4, ps); > + CMP(_CMP_ORD_Q, !isunordered(ss1[i], ss2[i]), s, 4, ps); > + > + dd.x[1] = ed.x[1] = sd1[1]; > + > + CMP(_CMP_EQ_OQ, !isunordered(sd1[0], sd2[0]) && sd1[0] == sd2[0], d, 1, sd); > + CMP(_CMP_LT_OS, !isunordered(sd1[0], sd2[0]) && sd1[0] < sd2[0], d, 1, sd); > + CMP(_CMP_LE_OS, !isunordered(sd1[0], sd2[0]) && sd1[0] <= sd2[0], d, 1, sd); > + CMP(_CMP_UNORD_Q, isunordered(sd1[0], sd2[0]), d, 1, sd); > + CMP(_CMP_NEQ_UQ, isunordered(sd1[0], sd2[0]) || sd1[0] != sd2[0], d, 1, sd); > + CMP(_CMP_NLT_US, isunordered(sd1[0], sd2[0]) || sd1[0] >= sd2[0], d, 1, sd); > + CMP(_CMP_NLE_US, isunordered(sd1[0], sd2[0]) || sd1[0] > sd2[0], d, 1, sd); > + CMP(_CMP_ORD_Q, !isunordered(sd1[0], sd2[0]), d, 1, sd); > + > + for(i = 1; i < 4; i++) ds.x[i] = es.x[i] = ss1[i]; > + > + CMP(_CMP_EQ_OQ, !isunordered(ss1[0], ss2[0]) && ss1[0] == ss2[0], s, 1, ss); > + CMP(_CMP_LT_OS, !isunordered(ss1[0], ss2[0]) && ss1[0] < ss2[0], s, 1, ss); > + CMP(_CMP_LE_OS, !isunordered(ss1[0], ss2[0]) && ss1[0] <= ss2[0], s, 1, ss); > + CMP(_CMP_UNORD_Q, isunordered(ss1[0], ss2[0]), s, 1, ss); > + CMP(_CMP_NEQ_UQ, isunordered(ss1[0], ss2[0]) || ss1[0] != ss2[0], s, 1, ss); > + CMP(_CMP_NLT_US, isunordered(ss1[0], ss2[0]) || ss1[0] >= ss2[0], s, 1, ss); > + CMP(_CMP_NLE_US, isunordered(ss1[0], ss2[0]) || ss1[0] > ss2[0], s, 1, ss); > + CMP(_CMP_ORD_Q, !isunordered(ss1[0], ss2[0]), s, 1, ss); > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse-cmp-error.c b/gcc/testsuite/gcc.target/i386/sse-cmp-error.c > new file mode 100644 > index 00000000000..79633b94e2f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse-cmp-error.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O0 -msse2" } */ > + > +#include <x86intrin.h> > + > +volatile __m128 a1, a2, a3; > +volatile __m128d d1, d2, d3; > + > +void > +test (void) > +{ > + d1 = _mm_cmp_sd (d2, d3, 8); /* { dg-error "'__builtin_ia32_cmpsd' needs isa option -mavx" } */ > + a1 = _mm_cmp_ss (a2, a3, 8); /* { dg-error "'__builtin_ia32_cmpss' needs isa option -mavx" } */ > + d1 = _mm_cmp_pd (d2, d3, 8); /* { dg-error "'__builtin_ia32_cmppd' needs isa option -mavx" } */ > + a1 = _mm_cmp_ps (a2, a3, 8); /* { dg-error "'__builtin_ia32_cmpps' needs isa option -mavx" } */ > +} > -- > 2.31.1 >
diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 80214540888..ec9b9905b5f 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -72,22 +72,6 @@ typedef double __m256d_u __attribute__ ((__vector_size__ (32), /* Compare predicates for scalar and packed compare intrinsics. */ -/* Equal (ordered, non-signaling) */ -#define _CMP_EQ_OQ 0x00 -/* Less-than (ordered, signaling) */ -#define _CMP_LT_OS 0x01 -/* Less-than-or-equal (ordered, signaling) */ -#define _CMP_LE_OS 0x02 -/* Unordered (non-signaling) */ -#define _CMP_UNORD_Q 0x03 -/* Not-equal (unordered, non-signaling) */ -#define _CMP_NEQ_UQ 0x04 -/* Not-less-than (unordered, signaling) */ -#define _CMP_NLT_US 0x05 -/* Not-less-than-or-equal (unordered, signaling) */ -#define _CMP_NLE_US 0x06 -/* Ordered (nonsignaling) */ -#define _CMP_ORD_Q 0x07 /* Equal (unordered, non-signaling) */ #define _CMP_EQ_UQ 0x08 /* Not-greater-than-or-equal (unordered, signaling) */ @@ -381,18 +365,6 @@ _mm256_xor_ps (__m256 __A, __m256 __B) } #ifdef __OPTIMIZE__ -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P) -{ - return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P); -} - -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P) -{ - return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P); -} - extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P) { @@ -406,27 +378,7 @@ _mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P) return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y, __P); } - -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P) -{ - return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P); -} - -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P) -{ - return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P); -} #else -#define _mm_cmp_pd(X, Y, P) \ - ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (int)(P))) - -#define _mm_cmp_ps(X, Y, P) \ - ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (int)(P))) - #define _mm256_cmp_pd(X, Y, P) \ ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), (int)(P))) @@ -434,14 +386,6 @@ _mm_cmp_ss (__m128 __X, __m128 __Y, const int __P) #define _mm256_cmp_ps(X, Y, P) \ ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), (int)(P))) - -#define _mm_cmp_sd(X, Y, P) \ - ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (int)(P))) - -#define _mm_cmp_ss(X, Y, P) \ - ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (int)(P))) #endif extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index fa301103daf..c10fc3433af 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -1390,6 +1390,28 @@ _mm_cmpgt_epi32 (__m128i __A, __m128i __B) return (__m128i) ((__v4si)__A > (__v4si)__B); } +#ifdef __OPTIMIZE__ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P) +{ + return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P) +{ + return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P); +} +#else +#define _mm_cmp_pd(X, Y, P) \ + ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P))) + +#define _mm_cmp_sd(X, Y, P) \ + ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P))) +#endif + #ifdef __OPTIMIZE__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi16 (__m128i const __A, int const __N) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index a28c48c7566..85b01592d7e 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -671,6 +671,9 @@ BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpn BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF) +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) + BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) @@ -827,6 +830,9 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtb128", I BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI) BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI ) +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) + BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI) BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) @@ -1077,10 +1083,6 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvp BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 215a998fc26..537f349ed30 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -11833,6 +11833,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_avx_vmcmpv4sf3: case CODE_FOR_avx_cmpv2df3: case CODE_FOR_avx_cmpv4sf3: + if (CONST_INT_P (op) && IN_RANGE (INTVAL (op), 8, 31)) + { + error ("'%s' needs isa option %s", d->name, "-mavx"); + return const0_rtx; + } + /* FALLTHRU */ case CODE_FOR_avx_cmpv4df3: case CODE_FOR_avx_cmpv8sf3: case CODE_FOR_avx512f_cmpv8df3_mask: diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 2a97776fc32..29e26f56dde 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1056,6 +1056,11 @@ (define_predicate "const_28_to_31_operand" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 28, 31)"))) +(define_predicate "cmpps_imm_operand" + (ior (match_operand 0 "const_0_to_7_operand") + (and (match_test "TARGET_AVX") + (match_operand 0 "const_0_to_31_operand")))) + ;; True if this is a constant appropriate for an increment or decrement. (define_predicate "incdec_operand" (match_code "const_int") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0f4fbcb2c5d..542d032ff63 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3641,18 +3641,21 @@ (define_expand "reduc_<code>_scal_<mode>" ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "avx_cmp<mode>3" - [(set (match_operand:VF_128_256 0 "register_operand" "=x") + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") (unspec:VF_128_256 - [(match_operand:VF_128_256 1 "register_operand" "x") - (match_operand:VF_128_256 2 "nonimmediate_operand" "xjm") - (match_operand:SI 3 "const_0_to_31_operand")] + [(match_operand:VF_128_256 1 "register_operand" "0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xjm") + (match_operand:SI 3 "cmpps_imm_operand")] UNSPEC_PCMP))] - "TARGET_AVX" - "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "addr" "gpr16") + "TARGET_SSE" + "@ + cmp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} + vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") + (set_attr "addr" "*,gpr16") (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "<MODE>")]) (define_insn_and_split "*avx_cmp<mode>3_1" @@ -3852,21 +3855,24 @@ (define_insn_and_split "*avx_cmp<mode>3_ltint_not" }) (define_insn "avx_vmcmp<mode>3" - [(set (match_operand:VF_128 0 "register_operand" "=x") + [(set (match_operand:VF_128 0 "register_operand" "=x,x") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 1 "register_operand" "x") - (match_operand:VF_128 2 "nonimmediate_operand" "xjm") - (match_operand:SI 3 "const_0_to_31_operand")] + [(match_operand:VF_128 1 "register_operand" "0,x") + (match_operand:VF_128 2 "nonimmediate_operand" "xm,xjm") + (match_operand:SI 3 "cmpps_imm_operand")] UNSPEC_PCMP) (match_dup 1) (const_int 1)))] - "TARGET_AVX" - "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "addr" "gpr16") + "TARGET_SSE" + "@ + cmp<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3} + vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") + (set_attr "addr" "*,gpr16") (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "<ssescalarmode>")]) (define_insn "*<sse>_maskcmp<mode>3_comm" diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 87515ecb218..4b6266c5cde 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -108,6 +108,25 @@ typedef float __v4sf __attribute__ ((__vector_size__ (16))); #define _MM_FLUSH_ZERO_ON 0x8000 #define _MM_FLUSH_ZERO_OFF 0x0000 +/* Compare predicates for scalar and packed compare intrinsics. */ + +/* Equal (ordered, non-signaling) */ +#define _CMP_EQ_OQ 0x00 +/* Less-than (ordered, signaling) */ +#define _CMP_LT_OS 0x01 +/* Less-than-or-equal (ordered, signaling) */ +#define _CMP_LE_OS 0x02 +/* Unordered (non-signaling) */ +#define _CMP_UNORD_Q 0x03 +/* Not-equal (unordered, non-signaling) */ +#define _CMP_NEQ_UQ 0x04 +/* Not-less-than (unordered, signaling) */ +#define _CMP_NLT_US 0x05 +/* Not-less-than-or-equal (unordered, signaling) */ +#define _CMP_NLE_US 0x06 +/* Ordered (nonsignaling) */ +#define _CMP_ORD_Q 0x07 + /* Create an undefined vector. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_undefined_ps (void) @@ -434,6 +453,28 @@ _mm_cmpunord_ps (__m128 __A, __m128 __B) return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B); } +#ifdef __OPTIMIZE__ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P) +{ + return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P) +{ + return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P); +} +#else +#define _mm_cmp_ps(X, Y, P) \ + ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P))) + +#define _mm_cmp_ss(X, Y, P) \ + ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P))) +#endif + /* Compare the lower SPFP values of A and B and return 1 if true and 0 if false. */ diff --git a/gcc/testsuite/gcc.target/i386/sse-cmp-1.c b/gcc/testsuite/gcc.target/i386/sse-cmp-1.c new file mode 100644 index 00000000000..eff90d4790e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-cmp-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -msse2" } */ +/* { dg-final { scan-assembler-times "cmpsd" 1 } } */ +/* { dg-final { scan-assembler-times "cmpss" 1 } } */ +/* { dg-final { scan-assembler-times "cmppd" 1 } } */ +/* { dg-final { scan-assembler-times "cmpps" 1 } } */ + +#include <x86intrin.h> + +__m128 a1, a2, a3, a4, a5, a6; +__m128d d1, d2, d3, d4, d5, d6; + +void +test (void) +{ + d1 = _mm_cmp_sd (d2, d3, 1); + a1 = _mm_cmp_ss (a2, a3, 2); + d1 = _mm_cmp_pd (d2, d3, 3); + a1 = _mm_cmp_ps (a2, a3, 4); +} diff --git a/gcc/testsuite/gcc.target/i386/sse-cmp-2.c b/gcc/testsuite/gcc.target/i386/sse-cmp-2.c new file mode 100644 index 00000000000..77c05c484b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-cmp-2.c @@ -0,0 +1,96 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-require-effective-target c99_runtime } */ +/* { dg-options "-O2 -msse2 -std=c99" } */ + +#include "sse2-check.h" +#include "emmintrin.h" +#include <math.h> + +double sd1[2]={2134.3343,6678.346}; +double sd2[2]={41124.234,6678.346}; + +float ss1[4]={2134.3343,6678.346,453.345635,54646.464356}; +float ss2[4]={41124.234,6678.346,8653.65635,856.43576}; + +union +{ + double x[2]; + long long a[2]; +}dd, ed; + +union +{ + float x[4]; + int a[4]; +}ds, es; + +#define CHECK(INTSIZE, SIZE, NUNITS, SUFFIX) \ +void check##SUFFIX(unsigned imm, char *id) \ +{ \ + if(checkV##INTSIZE(d##SIZE.a, e##SIZE.a, NUNITS)){ \ + printf("mm_cmp_" #SUFFIX "(%s: 0x%x) FAILED\n", id, imm);\ + abort(); \ + } \ +} + +CHECK (l, d, 2, pd) +CHECK (i, s, 4, ps) +CHECK (l, d, 2, sd) +CHECK (i, s, 4, ss) + +#define CMP(imm, rel, SIZE, NUNITS, SUFFIX) \ + for (i = 0; i < NUNITS; i++) e##SIZE.a[i] = rel ? -1 : 0; \ + source##SIZE##1 = _mm_loadu_p##SIZE(s##SIZE##1); \ + source##SIZE##2 = _mm_loadu_p##SIZE(s##SIZE##2); \ + dest##SIZE = _mm_cmp_##SUFFIX(source##SIZE##1, source##SIZE##2, imm); \ + _mm_storeu_p##SIZE(d##SIZE.x, dest##SIZE); \ + check##SUFFIX(imm, "" #imm ""); + +static void +sse2_test () +{ + __m128d sourced1, sourced2, destd; + __m128 sources1, sources2, dests; + int i; + + CMP(_CMP_EQ_OQ, !isunordered(sd1[i], sd2[i]) && sd1[i] == sd2[i], d, 2, pd); + CMP(_CMP_LT_OS, !isunordered(sd1[i], sd2[i]) && sd1[i] < sd2[i], d, 2, pd); + CMP(_CMP_LE_OS, !isunordered(sd1[i], sd2[i]) && sd1[i] <= sd2[i], d, 2, pd); + CMP(_CMP_UNORD_Q, isunordered(sd1[i], sd2[i]), d, 2, pd); + CMP(_CMP_NEQ_UQ, isunordered(sd1[i], sd2[i]) || sd1[i] != sd2[i], d, 2, pd); + CMP(_CMP_NLT_US, isunordered(sd1[i], sd2[i]) || sd1[i] >= sd2[i], d, 2, pd); + CMP(_CMP_NLE_US, isunordered(sd1[i], sd2[i]) || sd1[i] > sd2[i], d, 2, pd); + CMP(_CMP_ORD_Q, !isunordered(sd1[i], sd2[i]), d, 2, pd); + + CMP(_CMP_EQ_OQ, !isunordered(ss1[i], ss2[i]) && ss1[i] == ss2[i], s, 4, ps); + CMP(_CMP_LT_OS, !isunordered(ss1[i], ss2[i]) && ss1[i] < ss2[i], s, 4, ps); + CMP(_CMP_LE_OS, !isunordered(ss1[i], ss2[i]) && ss1[i] <= ss2[i], s, 4, ps); + CMP(_CMP_UNORD_Q, isunordered(ss1[i], ss2[i]), s, 4, ps); + CMP(_CMP_NEQ_UQ, isunordered(ss1[i], ss2[i]) || ss1[i] != ss2[i], s, 4, ps); + CMP(_CMP_NLT_US, isunordered(ss1[i], ss2[i]) || ss1[i] >= ss2[i], s, 4, ps); + CMP(_CMP_NLE_US, isunordered(ss1[i], ss2[i]) || ss1[i] > ss2[i], s, 4, ps); + CMP(_CMP_ORD_Q, !isunordered(ss1[i], ss2[i]), s, 4, ps); + + dd.x[1] = ed.x[1] = sd1[1]; + + CMP(_CMP_EQ_OQ, !isunordered(sd1[0], sd2[0]) && sd1[0] == sd2[0], d, 1, sd); + CMP(_CMP_LT_OS, !isunordered(sd1[0], sd2[0]) && sd1[0] < sd2[0], d, 1, sd); + CMP(_CMP_LE_OS, !isunordered(sd1[0], sd2[0]) && sd1[0] <= sd2[0], d, 1, sd); + CMP(_CMP_UNORD_Q, isunordered(sd1[0], sd2[0]), d, 1, sd); + CMP(_CMP_NEQ_UQ, isunordered(sd1[0], sd2[0]) || sd1[0] != sd2[0], d, 1, sd); + CMP(_CMP_NLT_US, isunordered(sd1[0], sd2[0]) || sd1[0] >= sd2[0], d, 1, sd); + CMP(_CMP_NLE_US, isunordered(sd1[0], sd2[0]) || sd1[0] > sd2[0], d, 1, sd); + CMP(_CMP_ORD_Q, !isunordered(sd1[0], sd2[0]), d, 1, sd); + + for(i = 1; i < 4; i++) ds.x[i] = es.x[i] = ss1[i]; + + CMP(_CMP_EQ_OQ, !isunordered(ss1[0], ss2[0]) && ss1[0] == ss2[0], s, 1, ss); + CMP(_CMP_LT_OS, !isunordered(ss1[0], ss2[0]) && ss1[0] < ss2[0], s, 1, ss); + CMP(_CMP_LE_OS, !isunordered(ss1[0], ss2[0]) && ss1[0] <= ss2[0], s, 1, ss); + CMP(_CMP_UNORD_Q, isunordered(ss1[0], ss2[0]), s, 1, ss); + CMP(_CMP_NEQ_UQ, isunordered(ss1[0], ss2[0]) || ss1[0] != ss2[0], s, 1, ss); + CMP(_CMP_NLT_US, isunordered(ss1[0], ss2[0]) || ss1[0] >= ss2[0], s, 1, ss); + CMP(_CMP_NLE_US, isunordered(ss1[0], ss2[0]) || ss1[0] > ss2[0], s, 1, ss); + CMP(_CMP_ORD_Q, !isunordered(ss1[0], ss2[0]), s, 1, ss); +} diff --git a/gcc/testsuite/gcc.target/i386/sse-cmp-error.c b/gcc/testsuite/gcc.target/i386/sse-cmp-error.c new file mode 100644 index 00000000000..79633b94e2f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-cmp-error.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -msse2" } */ + +#include <x86intrin.h> + +volatile __m128 a1, a2, a3; +volatile __m128d d1, d2, d3; + +void +test (void) +{ + d1 = _mm_cmp_sd (d2, d3, 8); /* { dg-error "'__builtin_ia32_cmpsd' needs isa option -mavx" } */ + a1 = _mm_cmp_ss (a2, a3, 8); /* { dg-error "'__builtin_ia32_cmpss' needs isa option -mavx" } */ + d1 = _mm_cmp_pd (d2, d3, 8); /* { dg-error "'__builtin_ia32_cmppd' needs isa option -mavx" } */ + a1 = _mm_cmp_ps (a2, a3, 8); /* { dg-error "'__builtin_ia32_cmpps' needs isa option -mavx" } */ +}