Message ID | 20231020062050.971264-1-haochen.jiang@intel.com |
---|---|
State | New |
Headers | show |
Series | i386: Prevent splitting to xmm16+ when !TARGET_AVX512VL | expand |
> -----Original Message----- > From: Jiang, Haochen <haochen.jiang@intel.com> > Sent: Friday, October 20, 2023 2:21 PM > To: gcc-patches@gcc.gnu.org > Cc: ubizjak@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> > Subject: [PATCH] i386: Prevent splitting to xmm16+ when !TARGET_AVX512VL > > Hi all, > > Currently, there will be a chance in split to use x/ymm16+ w/o AVX512VL, > which finally leads to an ICE as pr111753 does. > > This patch aims to fix that. > > Regtested on x86_64-pc-linux-gnu. Ok for trunk? LGTM. > > Thx, > Haochen > > gcc/ChangeLog: > > PR target/111753 > * config/i386/i386.cc (ix86_standard_x87sse_constant_load_p): > Do not split to xmm16+ when !TARGET_AVX512VL. > > gcc/testsuite/ChangeLog: > > PR target/111753 > * gcc.target/i386/pr111753.c: New test. > --- > gcc/config/i386/i386.cc | 3 ++ > gcc/testsuite/gcc.target/i386/pr111753.c | 69 > ++++++++++++++++++++++++ > 2 files changed, 72 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/pr111753.c > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index > 641e7680335..5f8c5eb98a2 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -5481,6 +5481,9 @@ ix86_standard_x87sse_constant_load_p (const > rtx_insn *insn, rtx dst) > if (src == NULL > || (SSE_REGNO_P (REGNO (dst)) > && standard_sse_constant_p (src, GET_MODE (dst)) != 1) > + || (!TARGET_AVX512VL > + && EXT_REX_SSE_REGNO_P (REGNO (dst)) > + && standard_sse_constant_p (src, GET_MODE (dst)) == 1) > || (STACK_REGNO_P (REGNO (dst)) > && standard_80387_constant_p (src) < 1)) > return false; > diff --git a/gcc/testsuite/gcc.target/i386/pr111753.c > b/gcc/testsuite/gcc.target/i386/pr111753.c > new file mode 100644 > index 00000000000..16ceca6ddc6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr111753.c > @@ -0,0 +1,69 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx512bw -fno-tree-ter -Wno-div-by-zero" } */ > + > +typedef int __attribute__((__vector_size__ (8))) v64u8; typedef char > +__attribute__((__vector_size__ (16))) v128u8; typedef int > +__attribute__((__vector_size__ (16))) v128u32; typedef int > +__attribute__((__vector_size__ (32))) v256u8; typedef int > +__attribute__((__vector_size__ (64))) v512u8; typedef short > +__attribute__((__vector_size__ (4))) v32s16; typedef short > +__attribute__((__vector_size__ (16))) v128s16; typedef short > +__attribute__((__vector_size__ (32))) v256s16; typedef _Float16 > +__attribute__((__vector_size__ (16))) f16; typedef _Float32 f32; > +typedef double __attribute__((__vector_size__ (64))) v512f64; typedef > +_Decimal32 d32; typedef _Decimal64 __attribute__((__vector_size__ > +(32))) v256d64; typedef _Decimal64 __attribute__((__vector_size__ > +(64))) v512d64; > +d32 foo0_d32_0, foo0_ret; > +v256d64 foo0_v256d64_0; > +v128s16 foo0_v128s16_0; > +int foo0_v256d128_0; > + > +extern void bar(int); > + > +void > +foo (v64u8, v128u8 v128u8_0, v128u8 v128s8_0, > + v256u8 v256u8_0, int v256s8_0, v512u8 v512u8_0, int v512s8_0, > + v256s16 v256s16_0, > + v512u8 v512s16_0, > + v128u32 v128u64_0, > + v128u32 v128s64_0, > + int, int, __int128 v128u128_0, __int128 v128s128_0, v128u32 > +v128f64_0) { > + v512d64 v512d64_0; > + v256u8 v256f32_0, v256d64_1 = foo0_v256d64_0 == foo0_d32_0; > + f32 f32_0; > + f16 v128f16_0; > + f32_0 /= 0; > + v128u8 v128u8_1 = v128u8_0 != 0; > + int v256d32_1; > + v256f32_0 /= 0; > + v32s16 v32s16_1 = __builtin_shufflevector ((v128s16) { }, v256s16_0, > +5, 10); > + v512f64 v512f64_1 = __builtin_convertvector (v512d64_0, v512f64); > + v512u8 v512d128_1 = v512s16_0; > + v128s16 v128s16_2 = > + __builtin_shufflevector ((v32s16) { }, v32s16_1, 0, 3, 2, 1, > + 0, 0, 0, 3), v128s16_3 = foo0_v128s16_0 > 0; > + v128f16_0 /= 0; > + __int128 v128s128_1 = 0 == v128s128_0; > + v512u8 v512u8_r = v512u8_0 + v512s8_0 + (v512u8) v512f64_1 + > +v512s16_0; > + v256u8 v256u8_r = ((union { > + v512u8 a; > + v256u8 b;}) v512u8_r).b + > + v256u8_0 + v256s8_0 + v256f32_0 + v256d32_1 + > + (v256u8) v256d64_1 + foo0_v256d128_0; > + v128u8 v128u8_r = ((union { > + v256u8 a; > + v128u8 b;}) v256u8_r).b + > + v128u8_0 + v128u8_1 + v128s8_0 + (v128u8) v128s16_2 + > + (v128u8) v128s16_3 + (v128u8) v128u64_0 + (v128u8) v128s64_0 + > + (v128u8) v128u128_0 + (v128u8) v128s128_1 + > + (v128u8) v128f16_0 + (v128u8) v128f64_0; > + bar (f32_0 + (int) foo0_d32_0); > + foo0_ret = ((union { > + v64u8 a; > + int b;}) ((union { > + v128u8 a; > + v64u8 b;}) v128u8_r).b).b; > +} > -- > 2.31.1
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 641e7680335..5f8c5eb98a2 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -5481,6 +5481,9 @@ ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst) if (src == NULL || (SSE_REGNO_P (REGNO (dst)) && standard_sse_constant_p (src, GET_MODE (dst)) != 1) + || (!TARGET_AVX512VL + && EXT_REX_SSE_REGNO_P (REGNO (dst)) + && standard_sse_constant_p (src, GET_MODE (dst)) == 1) || (STACK_REGNO_P (REGNO (dst)) && standard_80387_constant_p (src) < 1)) return false; diff --git a/gcc/testsuite/gcc.target/i386/pr111753.c b/gcc/testsuite/gcc.target/i386/pr111753.c new file mode 100644 index 00000000000..16ceca6ddc6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr111753.c @@ -0,0 +1,69 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512bw -fno-tree-ter -Wno-div-by-zero" } */ + +typedef int __attribute__((__vector_size__ (8))) v64u8; +typedef char __attribute__((__vector_size__ (16))) v128u8; +typedef int __attribute__((__vector_size__ (16))) v128u32; +typedef int __attribute__((__vector_size__ (32))) v256u8; +typedef int __attribute__((__vector_size__ (64))) v512u8; +typedef short __attribute__((__vector_size__ (4))) v32s16; +typedef short __attribute__((__vector_size__ (16))) v128s16; +typedef short __attribute__((__vector_size__ (32))) v256s16; +typedef _Float16 __attribute__((__vector_size__ (16))) f16; +typedef _Float32 f32; +typedef double __attribute__((__vector_size__ (64))) v512f64; +typedef _Decimal32 d32; +typedef _Decimal64 __attribute__((__vector_size__ (32))) v256d64; +typedef _Decimal64 __attribute__((__vector_size__ (64))) v512d64; +d32 foo0_d32_0, foo0_ret; +v256d64 foo0_v256d64_0; +v128s16 foo0_v128s16_0; +int foo0_v256d128_0; + +extern void bar(int); + +void +foo (v64u8, v128u8 v128u8_0, v128u8 v128s8_0, + v256u8 v256u8_0, int v256s8_0, v512u8 v512u8_0, int v512s8_0, + v256s16 v256s16_0, + v512u8 v512s16_0, + v128u32 v128u64_0, + v128u32 v128s64_0, + int, int, __int128 v128u128_0, __int128 v128s128_0, v128u32 v128f64_0) +{ + v512d64 v512d64_0; + v256u8 v256f32_0, v256d64_1 = foo0_v256d64_0 == foo0_d32_0; + f32 f32_0; + f16 v128f16_0; + f32_0 /= 0; + v128u8 v128u8_1 = v128u8_0 != 0; + int v256d32_1; + v256f32_0 /= 0; + v32s16 v32s16_1 = __builtin_shufflevector ((v128s16) { }, v256s16_0, 5, 10); + v512f64 v512f64_1 = __builtin_convertvector (v512d64_0, v512f64); + v512u8 v512d128_1 = v512s16_0; + v128s16 v128s16_2 = + __builtin_shufflevector ((v32s16) { }, v32s16_1, 0, 3, 2, 1, + 0, 0, 0, 3), v128s16_3 = foo0_v128s16_0 > 0; + v128f16_0 /= 0; + __int128 v128s128_1 = 0 == v128s128_0; + v512u8 v512u8_r = v512u8_0 + v512s8_0 + (v512u8) v512f64_1 + v512s16_0; + v256u8 v256u8_r = ((union { + v512u8 a; + v256u8 b;}) v512u8_r).b + + v256u8_0 + v256s8_0 + v256f32_0 + v256d32_1 + + (v256u8) v256d64_1 + foo0_v256d128_0; + v128u8 v128u8_r = ((union { + v256u8 a; + v128u8 b;}) v256u8_r).b + + v128u8_0 + v128u8_1 + v128s8_0 + (v128u8) v128s16_2 + + (v128u8) v128s16_3 + (v128u8) v128u64_0 + (v128u8) v128s64_0 + + (v128u8) v128u128_0 + (v128u8) v128s128_1 + + (v128u8) v128f16_0 + (v128u8) v128f64_0; + bar (f32_0 + (int) foo0_d32_0); + foo0_ret = ((union { + v64u8 a; + int b;}) ((union { + v128u8 a; + v64u8 b;}) v128u8_r).b).b; +}