Message ID | 20210415151144.439294-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86: Use crc32 target option for CRC32 intrinsics | expand |
On Thu, Apr 15, 2021 at 5:11 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > Use crc32 target option for CRC32 intrinsics to support CRC32 intrinsics > without enabling SSE vector instructions. There is no CRC32 ISA. crc32 is part of SSE4.2 [1] and current situation reflects that correctly. [1] https://en.wikipedia.org/wiki/SSE4 Uros. > * config/i386/gnu-property.c > (file_end_indicate_exec_stack_and_gnu_property): Also check > TARGET_CRC32 for GNU_PROPERTY_X86_ISA_1_V2. > * config/i386/i386-c.c (ix86_target_macros_internal): Define > __CRC32__ for -mcrc32. > * config/i386/i386-options.c (ix86_option_override_internal): > Handle PTA_CRC32. Enable crc32 instruction for -msse4.2. > * config/i386/i386.h (PTA_CRC32): New. > (PTA_X86_64_V2): Add PTA_CRC32. > (PTA_NEHALEM): Likewise. > * config/i386/i386.md (sse4_2_crc32<mode>): Remove TARGET_SSE4_2 > check. > (sse4_2_crc32di): Likewise. > * config/i386/ia32intrin.h: Use crc32 target option for CRC32 > intrinsics. > --- > gcc/config/i386/gnu-property.c | 1 + > gcc/config/i386/i386-c.c | 2 ++ > gcc/config/i386/i386-options.c | 8 ++++++++ > gcc/config/i386/i386.h | 6 ++++-- > gcc/config/i386/i386.md | 4 ++-- > gcc/config/i386/ia32intrin.h | 28 ++++++++++++++-------------- > 6 files changed, 31 insertions(+), 18 deletions(-) > > diff --git a/gcc/config/i386/gnu-property.c b/gcc/config/i386/gnu-property.c > index 4ba04403002..b6a3bdf62ce 100644 > --- a/gcc/config/i386/gnu-property.c > +++ b/gcc/config/i386/gnu-property.c > @@ -92,6 +92,7 @@ file_end_indicate_exec_stack_and_gnu_property (void) > /* GNU_PROPERTY_X86_ISA_1_V2. */ > if (TARGET_CMPXCHG16B > || (TARGET_64BIT && TARGET_SAHF) > + || TARGET_CRC32 > || TARGET_POPCNT > || TARGET_SSE3 > || TARGET_SSSE3 > diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c > index be46d0506ad..5ed0de006fb 100644 > --- a/gcc/config/i386/i386-c.c > +++ b/gcc/config/i386/i386-c.c > @@ -532,6 +532,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, > def_or_undef (parse_in, "__LZCNT__"); > if (isa_flag & OPTION_MASK_ISA_TBM) > def_or_undef (parse_in, "__TBM__"); > + if (isa_flag & OPTION_MASK_ISA_CRC32) > + def_or_undef (parse_in, "__CRC32__"); > if (isa_flag & OPTION_MASK_ISA_POPCNT) > def_or_undef (parse_in, "__POPCNT__"); > if (isa_flag & OPTION_MASK_ISA_FSGSBASE) > diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c > index 91da2849c49..959ee163d2f 100644 > --- a/gcc/config/i386/i386-options.c > +++ b/gcc/config/i386/i386-options.c > @@ -2162,6 +2162,9 @@ ix86_option_override_internal (bool main_args_p, > if (((processor_alias_table[i].flags & PTA_CX16) != 0) > && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_CX16)) > opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_CX16; > + if (((processor_alias_table[i].flags & PTA_CRC32) != 0) > + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CRC32)) > + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32; > if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0) > && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) > opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; > @@ -2617,6 +2620,11 @@ ix86_option_override_internal (bool main_args_p, > opts->x_ix86_isa_flags > |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit; > > + /* Enable crc32 instruction for -msse4.2. */ > + if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)) > + opts->x_ix86_isa_flags > + |= OPTION_MASK_ISA_CRC32 & ~opts->x_ix86_isa_flags_explicit; > + > /* Enable lzcnt instruction for -mabm. */ > if (TARGET_ABM_P(opts->x_ix86_isa_flags)) > opts->x_ix86_isa_flags > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 97700d797a7..c50f9ab24fa 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -2504,12 +2504,14 @@ constexpr wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23); > constexpr wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24); > constexpr wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25); > constexpr wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26); > +constexpr wide_int_bitmask PTA_CRC32 (0, HOST_WIDE_INT_1U << 27); > > constexpr wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE > | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR; > constexpr wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE > & (~PTA_NO_SAHF)) > - | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3; > + | PTA_CRC32 | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 > + | PTA_SSSE3; > constexpr wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2 > | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT > | PTA_MOVBE | PTA_XSAVE; > @@ -2519,7 +2521,7 @@ constexpr wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3 > constexpr wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 > | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR; > constexpr wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 > - | PTA_POPCNT; > + | PTA_CRC32 | PTA_POPCNT; > constexpr wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_PCLMUL; > constexpr wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE > | PTA_XSAVEOPT; > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 9ff35d9a607..1f1d74e6275 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -20998,7 +20998,7 @@ (define_insn "sse4_2_crc32<mode>" > [(match_operand:SI 1 "register_operand" "0") > (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")] > UNSPEC_CRC32))] > - "TARGET_SSE4_2 || TARGET_CRC32" > + "TARGET_CRC32" > "crc32{<imodesuffix>}\t{%2, %0|%0, %2}" > [(set_attr "type" "sselog1") > (set_attr "prefix_rep" "1") > @@ -21019,7 +21019,7 @@ (define_insn "sse4_2_crc32di" > [(match_operand:DI 1 "register_operand" "0") > (match_operand:DI 2 "nonimmediate_operand" "rm")] > UNSPEC_CRC32))] > - "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)" > + "TARGET_64BIT && TARGET_CRC32" > "crc32{q}\t{%2, %0|%0, %2}" > [(set_attr "type" "sselog1") > (set_attr "prefix_rep" "1") > diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h > index 591394076cc..5422b0fc9e0 100644 > --- a/gcc/config/i386/ia32intrin.h > +++ b/gcc/config/i386/ia32intrin.h > @@ -51,11 +51,11 @@ __bswapd (int __X) > > #ifndef __iamcu__ > > -#ifndef __SSE4_2__ > +#ifndef __CRC32__ > #pragma GCC push_options > -#pragma GCC target("sse4.2") > -#define __DISABLE_SSE4_2__ > -#endif /* __SSE4_2__ */ > +#pragma GCC target("crc32") > +#define __DISABLE_CRC32__ > +#endif /* __CRC32__ */ > > /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ > extern __inline unsigned int > @@ -79,10 +79,10 @@ __crc32d (unsigned int __C, unsigned int __V) > return __builtin_ia32_crc32si (__C, __V); > } > > -#ifdef __DISABLE_SSE4_2__ > -#undef __DISABLE_SSE4_2__ > +#ifdef __DISABLE_CRC32__ > +#undef __DISABLE_CRC32__ > #pragma GCC pop_options > -#endif /* __DISABLE_SSE4_2__ */ > +#endif /* __DISABLE_CRC32__ */ > > #endif /* __iamcu__ */ > > @@ -199,11 +199,11 @@ __bswapq (long long __X) > return __builtin_bswap64 (__X); > } > > -#ifndef __SSE4_2__ > +#ifndef __CRC32__ > #pragma GCC push_options > -#pragma GCC target("sse4.2") > -#define __DISABLE_SSE4_2__ > -#endif /* __SSE4_2__ */ > +#pragma GCC target("crc32") > +#define __DISABLE_CRC32__ > +#endif /* __CRC32__ */ > > /* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ > extern __inline unsigned long long > @@ -213,10 +213,10 @@ __crc32q (unsigned long long __C, unsigned long long __V) > return __builtin_ia32_crc32di (__C, __V); > } > > -#ifdef __DISABLE_SSE4_2__ > -#undef __DISABLE_SSE4_2__ > +#ifdef __DISABLE_CRC32__ > +#undef __DISABLE_CRC32__ > #pragma GCC pop_options > -#endif /* __DISABLE_SSE4_2__ */ > +#endif /* __DISABLE_CRC32__ */ > > /* 64bit popcnt */ > extern __inline long long > -- > 2.30.2 >
On Thu, Apr 15, 2021 at 9:14 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Thu, Apr 15, 2021 at 5:11 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > Use crc32 target option for CRC32 intrinsics to support CRC32 intrinsics > > without enabling SSE vector instructions. > > There is no CRC32 ISA. crc32 is part of SSE4.2 [1] and current > situation reflects that correctly. CRC32 is similar to POPCNT which was originally in SSE4.2. Now POPCNT is a separate feature which is also enabled by SSE4.2. Enable CRC32 only with SSE4.2 makes it impossible to use CRC32 with -mgeneral-regs-only. This patch addresses this issue the same way as POPCNT. > [1] https://en.wikipedia.org/wiki/SSE4 > > Uros. > > > * config/i386/gnu-property.c > > (file_end_indicate_exec_stack_and_gnu_property): Also check > > TARGET_CRC32 for GNU_PROPERTY_X86_ISA_1_V2. > > * config/i386/i386-c.c (ix86_target_macros_internal): Define > > __CRC32__ for -mcrc32. > > * config/i386/i386-options.c (ix86_option_override_internal): > > Handle PTA_CRC32. Enable crc32 instruction for -msse4.2. > > * config/i386/i386.h (PTA_CRC32): New. > > (PTA_X86_64_V2): Add PTA_CRC32. > > (PTA_NEHALEM): Likewise. > > * config/i386/i386.md (sse4_2_crc32<mode>): Remove TARGET_SSE4_2 > > check. > > (sse4_2_crc32di): Likewise. > > * config/i386/ia32intrin.h: Use crc32 target option for CRC32 > > intrinsics. > > --- > > gcc/config/i386/gnu-property.c | 1 + > > gcc/config/i386/i386-c.c | 2 ++ > > gcc/config/i386/i386-options.c | 8 ++++++++ > > gcc/config/i386/i386.h | 6 ++++-- > > gcc/config/i386/i386.md | 4 ++-- > > gcc/config/i386/ia32intrin.h | 28 ++++++++++++++-------------- > > 6 files changed, 31 insertions(+), 18 deletions(-) > > > > diff --git a/gcc/config/i386/gnu-property.c b/gcc/config/i386/gnu-property.c > > index 4ba04403002..b6a3bdf62ce 100644 > > --- a/gcc/config/i386/gnu-property.c > > +++ b/gcc/config/i386/gnu-property.c > > @@ -92,6 +92,7 @@ file_end_indicate_exec_stack_and_gnu_property (void) > > /* GNU_PROPERTY_X86_ISA_1_V2. */ > > if (TARGET_CMPXCHG16B > > || (TARGET_64BIT && TARGET_SAHF) > > + || TARGET_CRC32 > > || TARGET_POPCNT > > || TARGET_SSE3 > > || TARGET_SSSE3 > > diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c > > index be46d0506ad..5ed0de006fb 100644 > > --- a/gcc/config/i386/i386-c.c > > +++ b/gcc/config/i386/i386-c.c > > @@ -532,6 +532,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, > > def_or_undef (parse_in, "__LZCNT__"); > > if (isa_flag & OPTION_MASK_ISA_TBM) > > def_or_undef (parse_in, "__TBM__"); > > + if (isa_flag & OPTION_MASK_ISA_CRC32) > > + def_or_undef (parse_in, "__CRC32__"); > > if (isa_flag & OPTION_MASK_ISA_POPCNT) > > def_or_undef (parse_in, "__POPCNT__"); > > if (isa_flag & OPTION_MASK_ISA_FSGSBASE) > > diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c > > index 91da2849c49..959ee163d2f 100644 > > --- a/gcc/config/i386/i386-options.c > > +++ b/gcc/config/i386/i386-options.c > > @@ -2162,6 +2162,9 @@ ix86_option_override_internal (bool main_args_p, > > if (((processor_alias_table[i].flags & PTA_CX16) != 0) > > && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_CX16)) > > opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_CX16; > > + if (((processor_alias_table[i].flags & PTA_CRC32) != 0) > > + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CRC32)) > > + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32; > > if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0) > > && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) > > opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; > > @@ -2617,6 +2620,11 @@ ix86_option_override_internal (bool main_args_p, > > opts->x_ix86_isa_flags > > |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit; > > > > + /* Enable crc32 instruction for -msse4.2. */ > > + if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)) > > + opts->x_ix86_isa_flags > > + |= OPTION_MASK_ISA_CRC32 & ~opts->x_ix86_isa_flags_explicit; > > + > > /* Enable lzcnt instruction for -mabm. */ > > if (TARGET_ABM_P(opts->x_ix86_isa_flags)) > > opts->x_ix86_isa_flags > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > > index 97700d797a7..c50f9ab24fa 100644 > > --- a/gcc/config/i386/i386.h > > +++ b/gcc/config/i386/i386.h > > @@ -2504,12 +2504,14 @@ constexpr wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23); > > constexpr wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24); > > constexpr wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25); > > constexpr wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26); > > +constexpr wide_int_bitmask PTA_CRC32 (0, HOST_WIDE_INT_1U << 27); > > > > constexpr wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE > > | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR; > > constexpr wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE > > & (~PTA_NO_SAHF)) > > - | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3; > > + | PTA_CRC32 | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 > > + | PTA_SSSE3; > > constexpr wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2 > > | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT > > | PTA_MOVBE | PTA_XSAVE; > > @@ -2519,7 +2521,7 @@ constexpr wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3 > > constexpr wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 > > | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR; > > constexpr wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 > > - | PTA_POPCNT; > > + | PTA_CRC32 | PTA_POPCNT; > > constexpr wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_PCLMUL; > > constexpr wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE > > | PTA_XSAVEOPT; > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > index 9ff35d9a607..1f1d74e6275 100644 > > --- a/gcc/config/i386/i386.md > > +++ b/gcc/config/i386/i386.md > > @@ -20998,7 +20998,7 @@ (define_insn "sse4_2_crc32<mode>" > > [(match_operand:SI 1 "register_operand" "0") > > (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")] > > UNSPEC_CRC32))] > > - "TARGET_SSE4_2 || TARGET_CRC32" > > + "TARGET_CRC32" > > "crc32{<imodesuffix>}\t{%2, %0|%0, %2}" > > [(set_attr "type" "sselog1") > > (set_attr "prefix_rep" "1") > > @@ -21019,7 +21019,7 @@ (define_insn "sse4_2_crc32di" > > [(match_operand:DI 1 "register_operand" "0") > > (match_operand:DI 2 "nonimmediate_operand" "rm")] > > UNSPEC_CRC32))] > > - "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)" > > + "TARGET_64BIT && TARGET_CRC32" > > "crc32{q}\t{%2, %0|%0, %2}" > > [(set_attr "type" "sselog1") > > (set_attr "prefix_rep" "1") > > diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h > > index 591394076cc..5422b0fc9e0 100644 > > --- a/gcc/config/i386/ia32intrin.h > > +++ b/gcc/config/i386/ia32intrin.h > > @@ -51,11 +51,11 @@ __bswapd (int __X) > > > > #ifndef __iamcu__ > > > > -#ifndef __SSE4_2__ > > +#ifndef __CRC32__ > > #pragma GCC push_options > > -#pragma GCC target("sse4.2") > > -#define __DISABLE_SSE4_2__ > > -#endif /* __SSE4_2__ */ > > +#pragma GCC target("crc32") > > +#define __DISABLE_CRC32__ > > +#endif /* __CRC32__ */ > > > > /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ > > extern __inline unsigned int > > @@ -79,10 +79,10 @@ __crc32d (unsigned int __C, unsigned int __V) > > return __builtin_ia32_crc32si (__C, __V); > > } > > > > -#ifdef __DISABLE_SSE4_2__ > > -#undef __DISABLE_SSE4_2__ > > +#ifdef __DISABLE_CRC32__ > > +#undef __DISABLE_CRC32__ > > #pragma GCC pop_options > > -#endif /* __DISABLE_SSE4_2__ */ > > +#endif /* __DISABLE_CRC32__ */ > > > > #endif /* __iamcu__ */ > > > > @@ -199,11 +199,11 @@ __bswapq (long long __X) > > return __builtin_bswap64 (__X); > > } > > > > -#ifndef __SSE4_2__ > > +#ifndef __CRC32__ > > #pragma GCC push_options > > -#pragma GCC target("sse4.2") > > -#define __DISABLE_SSE4_2__ > > -#endif /* __SSE4_2__ */ > > +#pragma GCC target("crc32") > > +#define __DISABLE_CRC32__ > > +#endif /* __CRC32__ */ > > > > /* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ > > extern __inline unsigned long long > > @@ -213,10 +213,10 @@ __crc32q (unsigned long long __C, unsigned long long __V) > > return __builtin_ia32_crc32di (__C, __V); > > } > > > > -#ifdef __DISABLE_SSE4_2__ > > -#undef __DISABLE_SSE4_2__ > > +#ifdef __DISABLE_CRC32__ > > +#undef __DISABLE_CRC32__ > > #pragma GCC pop_options > > -#endif /* __DISABLE_SSE4_2__ */ > > +#endif /* __DISABLE_CRC32__ */ > > > > /* 64bit popcnt */ > > extern __inline long long > > -- > > 2.30.2 > >
On Thu, Apr 15, 2021 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Thu, Apr 15, 2021 at 9:14 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Thu, Apr 15, 2021 at 5:11 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > Use crc32 target option for CRC32 intrinsics to support CRC32 intrinsics > > > without enabling SSE vector instructions. > > > > There is no CRC32 ISA. crc32 is part of SSE4.2 [1] and current > > situation reflects that correctly. > > CRC32 is similar to POPCNT which was originally in SSE4.2. Now POPCNT It is not similar, POPCNT has its own CPUID flag and can be enabled independently of SSE4.2. > is a separate feature which is also enabled by SSE4.2. Enable CRC32 only > with SSE4.2 makes it impossible to use CRC32 with -mgeneral-regs-only. This > patch addresses this issue the same way as POPCNT. CRC32 doesn't have its own CPUID flag, so PTA_CRC32 is pointless. OTOH, the situation is similar with MONITOR and MWAIT. These are enabled with SSE3 and don't use XMM registers. Also somewhat similar is FISTTP, but there is no intrinsic for this insn. Uros. > > > [1] https://en.wikipedia.org/wiki/SSE4 > > > > Uros. > > > > > * config/i386/gnu-property.c > > > (file_end_indicate_exec_stack_and_gnu_property): Also check > > > TARGET_CRC32 for GNU_PROPERTY_X86_ISA_1_V2. > > > * config/i386/i386-c.c (ix86_target_macros_internal): Define > > > __CRC32__ for -mcrc32. > > > * config/i386/i386-options.c (ix86_option_override_internal): > > > Handle PTA_CRC32. Enable crc32 instruction for -msse4.2. > > > * config/i386/i386.h (PTA_CRC32): New. > > > (PTA_X86_64_V2): Add PTA_CRC32. > > > (PTA_NEHALEM): Likewise. > > > * config/i386/i386.md (sse4_2_crc32<mode>): Remove TARGET_SSE4_2 > > > check. > > > (sse4_2_crc32di): Likewise. > > > * config/i386/ia32intrin.h: Use crc32 target option for CRC32 > > > intrinsics. > > > --- > > > gcc/config/i386/gnu-property.c | 1 + > > > gcc/config/i386/i386-c.c | 2 ++ > > > gcc/config/i386/i386-options.c | 8 ++++++++ > > > gcc/config/i386/i386.h | 6 ++++-- > > > gcc/config/i386/i386.md | 4 ++-- > > > gcc/config/i386/ia32intrin.h | 28 ++++++++++++++-------------- > > > 6 files changed, 31 insertions(+), 18 deletions(-) > > > > > > diff --git a/gcc/config/i386/gnu-property.c b/gcc/config/i386/gnu-property.c > > > index 4ba04403002..b6a3bdf62ce 100644 > > > --- a/gcc/config/i386/gnu-property.c > > > +++ b/gcc/config/i386/gnu-property.c > > > @@ -92,6 +92,7 @@ file_end_indicate_exec_stack_and_gnu_property (void) > > > /* GNU_PROPERTY_X86_ISA_1_V2. */ > > > if (TARGET_CMPXCHG16B > > > || (TARGET_64BIT && TARGET_SAHF) > > > + || TARGET_CRC32 > > > || TARGET_POPCNT > > > || TARGET_SSE3 > > > || TARGET_SSSE3 > > > diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c > > > index be46d0506ad..5ed0de006fb 100644 > > > --- a/gcc/config/i386/i386-c.c > > > +++ b/gcc/config/i386/i386-c.c > > > @@ -532,6 +532,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, > > > def_or_undef (parse_in, "__LZCNT__"); > > > if (isa_flag & OPTION_MASK_ISA_TBM) > > > def_or_undef (parse_in, "__TBM__"); > > > + if (isa_flag & OPTION_MASK_ISA_CRC32) > > > + def_or_undef (parse_in, "__CRC32__"); > > > if (isa_flag & OPTION_MASK_ISA_POPCNT) > > > def_or_undef (parse_in, "__POPCNT__"); > > > if (isa_flag & OPTION_MASK_ISA_FSGSBASE) > > > diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c > > > index 91da2849c49..959ee163d2f 100644 > > > --- a/gcc/config/i386/i386-options.c > > > +++ b/gcc/config/i386/i386-options.c > > > @@ -2162,6 +2162,9 @@ ix86_option_override_internal (bool main_args_p, > > > if (((processor_alias_table[i].flags & PTA_CX16) != 0) > > > && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_CX16)) > > > opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_CX16; > > > + if (((processor_alias_table[i].flags & PTA_CRC32) != 0) > > > + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CRC32)) > > > + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32; > > > if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0) > > > && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) > > > opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; > > > @@ -2617,6 +2620,11 @@ ix86_option_override_internal (bool main_args_p, > > > opts->x_ix86_isa_flags > > > |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit; > > > > > > + /* Enable crc32 instruction for -msse4.2. */ > > > + if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)) > > > + opts->x_ix86_isa_flags > > > + |= OPTION_MASK_ISA_CRC32 & ~opts->x_ix86_isa_flags_explicit; > > > + > > > /* Enable lzcnt instruction for -mabm. */ > > > if (TARGET_ABM_P(opts->x_ix86_isa_flags)) > > > opts->x_ix86_isa_flags > > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > > > index 97700d797a7..c50f9ab24fa 100644 > > > --- a/gcc/config/i386/i386.h > > > +++ b/gcc/config/i386/i386.h > > > @@ -2504,12 +2504,14 @@ constexpr wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23); > > > constexpr wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24); > > > constexpr wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25); > > > constexpr wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26); > > > +constexpr wide_int_bitmask PTA_CRC32 (0, HOST_WIDE_INT_1U << 27); > > > > > > constexpr wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE > > > | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR; > > > constexpr wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE > > > & (~PTA_NO_SAHF)) > > > - | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3; > > > + | PTA_CRC32 | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 > > > + | PTA_SSSE3; > > > constexpr wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2 > > > | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT > > > | PTA_MOVBE | PTA_XSAVE; > > > @@ -2519,7 +2521,7 @@ constexpr wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3 > > > constexpr wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 > > > | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR; > > > constexpr wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 > > > - | PTA_POPCNT; > > > + | PTA_CRC32 | PTA_POPCNT; > > > constexpr wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_PCLMUL; > > > constexpr wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE > > > | PTA_XSAVEOPT; > > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > > index 9ff35d9a607..1f1d74e6275 100644 > > > --- a/gcc/config/i386/i386.md > > > +++ b/gcc/config/i386/i386.md > > > @@ -20998,7 +20998,7 @@ (define_insn "sse4_2_crc32<mode>" > > > [(match_operand:SI 1 "register_operand" "0") > > > (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")] > > > UNSPEC_CRC32))] > > > - "TARGET_SSE4_2 || TARGET_CRC32" > > > + "TARGET_CRC32" > > > "crc32{<imodesuffix>}\t{%2, %0|%0, %2}" > > > [(set_attr "type" "sselog1") > > > (set_attr "prefix_rep" "1") > > > @@ -21019,7 +21019,7 @@ (define_insn "sse4_2_crc32di" > > > [(match_operand:DI 1 "register_operand" "0") > > > (match_operand:DI 2 "nonimmediate_operand" "rm")] > > > UNSPEC_CRC32))] > > > - "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)" > > > + "TARGET_64BIT && TARGET_CRC32" > > > "crc32{q}\t{%2, %0|%0, %2}" > > > [(set_attr "type" "sselog1") > > > (set_attr "prefix_rep" "1") > > > diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h > > > index 591394076cc..5422b0fc9e0 100644 > > > --- a/gcc/config/i386/ia32intrin.h > > > +++ b/gcc/config/i386/ia32intrin.h > > > @@ -51,11 +51,11 @@ __bswapd (int __X) > > > > > > #ifndef __iamcu__ > > > > > > -#ifndef __SSE4_2__ > > > +#ifndef __CRC32__ > > > #pragma GCC push_options > > > -#pragma GCC target("sse4.2") > > > -#define __DISABLE_SSE4_2__ > > > -#endif /* __SSE4_2__ */ > > > +#pragma GCC target("crc32") > > > +#define __DISABLE_CRC32__ > > > +#endif /* __CRC32__ */ > > > > > > /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ > > > extern __inline unsigned int > > > @@ -79,10 +79,10 @@ __crc32d (unsigned int __C, unsigned int __V) > > > return __builtin_ia32_crc32si (__C, __V); > > > } > > > > > > -#ifdef __DISABLE_SSE4_2__ > > > -#undef __DISABLE_SSE4_2__ > > > +#ifdef __DISABLE_CRC32__ > > > +#undef __DISABLE_CRC32__ > > > #pragma GCC pop_options > > > -#endif /* __DISABLE_SSE4_2__ */ > > > +#endif /* __DISABLE_CRC32__ */ > > > > > > #endif /* __iamcu__ */ > > > > > > @@ -199,11 +199,11 @@ __bswapq (long long __X) > > > return __builtin_bswap64 (__X); > > > } > > > > > > -#ifndef __SSE4_2__ > > > +#ifndef __CRC32__ > > > #pragma GCC push_options > > > -#pragma GCC target("sse4.2") > > > -#define __DISABLE_SSE4_2__ > > > -#endif /* __SSE4_2__ */ > > > +#pragma GCC target("crc32") > > > +#define __DISABLE_CRC32__ > > > +#endif /* __CRC32__ */ > > > > > > /* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ > > > extern __inline unsigned long long > > > @@ -213,10 +213,10 @@ __crc32q (unsigned long long __C, unsigned long long __V) > > > return __builtin_ia32_crc32di (__C, __V); > > > } > > > > > > -#ifdef __DISABLE_SSE4_2__ > > > -#undef __DISABLE_SSE4_2__ > > > +#ifdef __DISABLE_CRC32__ > > > +#undef __DISABLE_CRC32__ > > > #pragma GCC pop_options > > > -#endif /* __DISABLE_SSE4_2__ */ > > > +#endif /* __DISABLE_CRC32__ */ > > > > > > /* 64bit popcnt */ > > > extern __inline long long > > > -- > > > 2.30.2 > > > > > > > -- > H.J.
On Thu, Apr 15, 2021 at 9:34 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Thu, Apr 15, 2021 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Thu, Apr 15, 2021 at 9:14 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > On Thu, Apr 15, 2021 at 5:11 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > Use crc32 target option for CRC32 intrinsics to support CRC32 intrinsics > > > > without enabling SSE vector instructions. > > > > > > There is no CRC32 ISA. crc32 is part of SSE4.2 [1] and current > > > situation reflects that correctly. > > > > CRC32 is similar to POPCNT which was originally in SSE4.2. Now POPCNT > > It is not similar, POPCNT has its own CPUID flag and can be enabled > independently of SSE4.2. > > > is a separate feature which is also enabled by SSE4.2. Enable CRC32 only > > with SSE4.2 makes it impossible to use CRC32 with -mgeneral-regs-only. This > > patch addresses this issue the same way as POPCNT. > > CRC32 doesn't have its own CPUID flag, so PTA_CRC32 is pointless. PTA_CRC32 shouldn't be added. > OTOH, the situation is similar with MONITOR and MWAIT. These are There are no intrinsics for MONITOR nor MWAIT. > enabled with SSE3 and don't use XMM registers. Also somewhat similar > is FISTTP, but there is no intrinsic for this insn. True. Here is the v2 patch without PTA_CRC32.
On Thu, Apr 15, 2021 at 6:51 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Thu, Apr 15, 2021 at 9:34 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Thu, Apr 15, 2021 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > On Thu, Apr 15, 2021 at 9:14 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > > > On Thu, Apr 15, 2021 at 5:11 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > Use crc32 target option for CRC32 intrinsics to support CRC32 intrinsics > > > > > without enabling SSE vector instructions. > > > > > > > > There is no CRC32 ISA. crc32 is part of SSE4.2 [1] and current > > > > situation reflects that correctly. > > > > > > CRC32 is similar to POPCNT which was originally in SSE4.2. Now POPCNT > > > > It is not similar, POPCNT has its own CPUID flag and can be enabled > > independently of SSE4.2. > > > > > is a separate feature which is also enabled by SSE4.2. Enable CRC32 only > > > with SSE4.2 makes it impossible to use CRC32 with -mgeneral-regs-only. This > > > patch addresses this issue the same way as POPCNT. > > > > CRC32 doesn't have its own CPUID flag, so PTA_CRC32 is pointless. > > PTA_CRC32 shouldn't be added. > > > OTOH, the situation is similar with MONITOR and MWAIT. These are > > There are no intrinsics for MONITOR nor MWAIT. pmmintrin.h: extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_monitor (void const * __P, unsigned int __E, unsigned int __H) { __builtin_ia32_monitor (__P, __E, __H); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mwait (unsigned int __E, unsigned int __H) { __builtin_ia32_mwait (__E, __H); } > > > enabled with SSE3 and don't use XMM registers. Also somewhat similar > > is FISTTP, but there is no intrinsic for this insn. > > True. > > Here is the v2 patch without PTA_CRC32. > > -- > H.J.
On Thu, Apr 15, 2021 at 9:53 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Thu, Apr 15, 2021 at 6:51 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Thu, Apr 15, 2021 at 9:34 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > On Thu, Apr 15, 2021 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > On Thu, Apr 15, 2021 at 9:14 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > > > > > On Thu, Apr 15, 2021 at 5:11 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > > > Use crc32 target option for CRC32 intrinsics to support CRC32 intrinsics > > > > > > without enabling SSE vector instructions. > > > > > > > > > > There is no CRC32 ISA. crc32 is part of SSE4.2 [1] and current > > > > > situation reflects that correctly. > > > > > > > > CRC32 is similar to POPCNT which was originally in SSE4.2. Now POPCNT > > > > > > It is not similar, POPCNT has its own CPUID flag and can be enabled > > > independently of SSE4.2. > > > > > > > is a separate feature which is also enabled by SSE4.2. Enable CRC32 only > > > > with SSE4.2 makes it impossible to use CRC32 with -mgeneral-regs-only. This > > > > patch addresses this issue the same way as POPCNT. > > > > > > CRC32 doesn't have its own CPUID flag, so PTA_CRC32 is pointless. > > > > PTA_CRC32 shouldn't be added. > > > > > OTOH, the situation is similar with MONITOR and MWAIT. These are > > > > There are no intrinsics for MONITOR nor MWAIT. > > pmmintrin.h: > > extern __inline void __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > _mm_monitor (void const * __P, unsigned int __E, unsigned int __H) > { > __builtin_ia32_monitor (__P, __E, __H); > } > > extern __inline void __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > _mm_mwait (unsigned int __E, unsigned int __H) > { > __builtin_ia32_mwait (__E, __H); > } They can be moved to mwaitintrin.h with -mmwait. > > > > > enabled with SSE3 and don't use XMM registers. Also somewhat similar > > > is FISTTP, but there is no intrinsic for this insn. > > > > True. > > > > Here is the v2 patch without PTA_CRC32. > > > > -- > > H.J.
On Thu, Apr 15, 2021 at 6:51 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Thu, Apr 15, 2021 at 9:34 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Thu, Apr 15, 2021 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > On Thu, Apr 15, 2021 at 9:14 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > > > On Thu, Apr 15, 2021 at 5:11 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > > > > > Use crc32 target option for CRC32 intrinsics to support CRC32 intrinsics > > > > > without enabling SSE vector instructions. > > > > > > > > There is no CRC32 ISA. crc32 is part of SSE4.2 [1] and current > > > > situation reflects that correctly. > > > > > > CRC32 is similar to POPCNT which was originally in SSE4.2. Now POPCNT > > > > It is not similar, POPCNT has its own CPUID flag and can be enabled > > independently of SSE4.2. > > > > > is a separate feature which is also enabled by SSE4.2. Enable CRC32 only > > > with SSE4.2 makes it impossible to use CRC32 with -mgeneral-regs-only. This > > > patch addresses this issue the same way as POPCNT. > > > > CRC32 doesn't have its own CPUID flag, so PTA_CRC32 is pointless. > > PTA_CRC32 shouldn't be added. > > > OTOH, the situation is similar with MONITOR and MWAIT. These are > > There are no intrinsics for MONITOR nor MWAIT. > > > enabled with SSE3 and don't use XMM registers. Also somewhat similar > > is FISTTP, but there is no intrinsic for this insn. > > True. > > Here is the v2 patch without PTA_CRC32. --- a/gcc/config/i386/gnu-property.c +++ b/gcc/config/i386/gnu-property.c @@ -92,6 +92,7 @@ file_end_indicate_exec_stack_and_gnu_property (void) /* GNU_PROPERTY_X86_ISA_1_V2. */ if (TARGET_CMPXCHG16B || (TARGET_64BIT && TARGET_SAHF) + || TARGET_CRC32 || TARGET_POPCNT || TARGET_SSE3 || TARGET_SSSE3 This is not needed. CRC32 is not an ISA, and if someone uses -mx86-64-v2 -mno-crc32 it does what the documentation says - disables builtin function. Otherwise OK, but please also obtain RM's approval at this stage. Thanks, Uros. > -- > H.J.
diff --git a/gcc/config/i386/gnu-property.c b/gcc/config/i386/gnu-property.c index 4ba04403002..b6a3bdf62ce 100644 --- a/gcc/config/i386/gnu-property.c +++ b/gcc/config/i386/gnu-property.c @@ -92,6 +92,7 @@ file_end_indicate_exec_stack_and_gnu_property (void) /* GNU_PROPERTY_X86_ISA_1_V2. */ if (TARGET_CMPXCHG16B || (TARGET_64BIT && TARGET_SAHF) + || TARGET_CRC32 || TARGET_POPCNT || TARGET_SSE3 || TARGET_SSSE3 diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index be46d0506ad..5ed0de006fb 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -532,6 +532,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__LZCNT__"); if (isa_flag & OPTION_MASK_ISA_TBM) def_or_undef (parse_in, "__TBM__"); + if (isa_flag & OPTION_MASK_ISA_CRC32) + def_or_undef (parse_in, "__CRC32__"); if (isa_flag & OPTION_MASK_ISA_POPCNT) def_or_undef (parse_in, "__POPCNT__"); if (isa_flag & OPTION_MASK_ISA_FSGSBASE) diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index 91da2849c49..959ee163d2f 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -2162,6 +2162,9 @@ ix86_option_override_internal (bool main_args_p, if (((processor_alias_table[i].flags & PTA_CX16) != 0) && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_CX16)) opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_CX16; + if (((processor_alias_table[i].flags & PTA_CRC32) != 0) + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CRC32)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32; if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; @@ -2617,6 +2620,11 @@ ix86_option_override_internal (bool main_args_p, opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit; + /* Enable crc32 instruction for -msse4.2. */ + if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)) + opts->x_ix86_isa_flags + |= OPTION_MASK_ISA_CRC32 & ~opts->x_ix86_isa_flags_explicit; + /* Enable lzcnt instruction for -mabm. */ if (TARGET_ABM_P(opts->x_ix86_isa_flags)) opts->x_ix86_isa_flags diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 97700d797a7..c50f9ab24fa 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2504,12 +2504,14 @@ constexpr wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23); constexpr wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24); constexpr wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25); constexpr wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26); +constexpr wide_int_bitmask PTA_CRC32 (0, HOST_WIDE_INT_1U << 27); constexpr wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR; constexpr wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE & (~PTA_NO_SAHF)) - | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3; + | PTA_CRC32 | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 + | PTA_SSSE3; constexpr wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2 | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT | PTA_MOVBE | PTA_XSAVE; @@ -2519,7 +2521,7 @@ constexpr wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3 constexpr wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR; constexpr wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 - | PTA_POPCNT; + | PTA_CRC32 | PTA_POPCNT; constexpr wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_PCLMUL; constexpr wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9ff35d9a607..1f1d74e6275 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20998,7 +20998,7 @@ (define_insn "sse4_2_crc32<mode>" [(match_operand:SI 1 "register_operand" "0") (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")] UNSPEC_CRC32))] - "TARGET_SSE4_2 || TARGET_CRC32" + "TARGET_CRC32" "crc32{<imodesuffix>}\t{%2, %0|%0, %2}" [(set_attr "type" "sselog1") (set_attr "prefix_rep" "1") @@ -21019,7 +21019,7 @@ (define_insn "sse4_2_crc32di" [(match_operand:DI 1 "register_operand" "0") (match_operand:DI 2 "nonimmediate_operand" "rm")] UNSPEC_CRC32))] - "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)" + "TARGET_64BIT && TARGET_CRC32" "crc32{q}\t{%2, %0|%0, %2}" [(set_attr "type" "sselog1") (set_attr "prefix_rep" "1") diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h index 591394076cc..5422b0fc9e0 100644 --- a/gcc/config/i386/ia32intrin.h +++ b/gcc/config/i386/ia32intrin.h @@ -51,11 +51,11 @@ __bswapd (int __X) #ifndef __iamcu__ -#ifndef __SSE4_2__ +#ifndef __CRC32__ #pragma GCC push_options -#pragma GCC target("sse4.2") -#define __DISABLE_SSE4_2__ -#endif /* __SSE4_2__ */ +#pragma GCC target("crc32") +#define __DISABLE_CRC32__ +#endif /* __CRC32__ */ /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ extern __inline unsigned int @@ -79,10 +79,10 @@ __crc32d (unsigned int __C, unsigned int __V) return __builtin_ia32_crc32si (__C, __V); } -#ifdef __DISABLE_SSE4_2__ -#undef __DISABLE_SSE4_2__ +#ifdef __DISABLE_CRC32__ +#undef __DISABLE_CRC32__ #pragma GCC pop_options -#endif /* __DISABLE_SSE4_2__ */ +#endif /* __DISABLE_CRC32__ */ #endif /* __iamcu__ */ @@ -199,11 +199,11 @@ __bswapq (long long __X) return __builtin_bswap64 (__X); } -#ifndef __SSE4_2__ +#ifndef __CRC32__ #pragma GCC push_options -#pragma GCC target("sse4.2") -#define __DISABLE_SSE4_2__ -#endif /* __SSE4_2__ */ +#pragma GCC target("crc32") +#define __DISABLE_CRC32__ +#endif /* __CRC32__ */ /* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ extern __inline unsigned long long @@ -213,10 +213,10 @@ __crc32q (unsigned long long __C, unsigned long long __V) return __builtin_ia32_crc32di (__C, __V); } -#ifdef __DISABLE_SSE4_2__ -#undef __DISABLE_SSE4_2__ +#ifdef __DISABLE_CRC32__ +#undef __DISABLE_CRC32__ #pragma GCC pop_options -#endif /* __DISABLE_SSE4_2__ */ +#endif /* __DISABLE_CRC32__ */ /* 64bit popcnt */ extern __inline long long