Message ID | 20210722123139.1529202-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86: Convert load from constand pool to SSE constant load | expand |
On Thu, Jul 22, 2021 at 8:34 PM H.J. Lu via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > For > > (set (reg:V32QI 108) > (const_vector:V32QI [ > (const_int -1 [0xffffffffffffffff]) repeated x32 > ])) "x.c":15:5 1573 {movv32qi_internal} > (expr_list:REG_EQUIV (const_vector:V32QI [ > (const_int -1 [0xffffffffffffffff]) repeated x32 > ]) > (nil)) > > LRA may generate LRA will call ix86_expand_vector_move, and using ix86_gen_scratch_sse_rtx there may result in no hard reg splits and cause ICE. I'm afraid this patch is just a walk round, the real issue here should be ix86_gen_scratch_sse_rtx usage when lra_in_progress. > > #6 0x0000000001077c8a in lra_emit_move (x=0x7ffff7f81b58, y=0x7ffff7f81b40) > at /export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:502 > 502 ? emit_move_insn (x, y) : emit_insn (gen_rtx_SET (x, y))); > (gdb) call debug_rtx (x) > (reg:V32QI 326) > (gdb) call debug_rtx (y) > (mem/u/c:V32QI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S32 A256]) > (gdb) > > Convert load from constand pool to SSE constant load: > > (set (reg:V32QI 24 xmm4 [327]) > (const_vector:V32QI [ > (const_int -1 [0xffffffffffffffff]) repeated x32 > ])) "x.c":15:5 1573 {movv32qi_internal} > > gcc/ > > PR target/101504 > * config/i386/i386-expand.c (ix86_broadcast_from_constant): > Return the standard SSE constant. > (ix86_expand_vector_move): Handle standard SSE constants. > > gcc/testsuite/ > > PR target/101504 > * gcc.target/i386/pr101504.c: New test. > --- > gcc/config/i386/i386-expand.c | 11 +++++++++++ > gcc/testsuite/gcc.target/i386/pr101504.c | 23 +++++++++++++++++++++++ > 2 files changed, 34 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/pr101504.c > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index 896bd685b1f..7b6055bbf83 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -493,6 +493,10 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) > if (GET_CODE (constant) != CONST_VECTOR) > return nullptr; > > + /* Return the standard SSE constant. */ > + if (standard_sse_constant_p (constant, mode)) > + return constant; > + > /* There could be some rtx like > (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1"))) > but with "*.LC1" refer to V2DI constant vector. */ > @@ -577,6 +581,13 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) > rtx first = ix86_broadcast_from_constant (mode, op1); > if (first != nullptr) > { > + if (GET_CODE (first) == CONST_VECTOR) > + { > + /* This is a standard SSE constant. */ > + emit_move_insn (op0, first); > + return; > + } > + > /* Broadcast to XMM/YMM/ZMM register from an integer > constant or scalar mem. */ > /* Hard registers are used for 2 purposes: > diff --git a/gcc/testsuite/gcc.target/i386/pr101504.c b/gcc/testsuite/gcc.target/i386/pr101504.c > new file mode 100644 > index 00000000000..2ad0405dd7b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr101504.c > @@ -0,0 +1,23 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=skylake" } */ > + > +typedef unsigned int __attribute__((__vector_size__ (32))) U; > +typedef unsigned char __attribute__((__vector_size__ (64))) V; > + > +V g; > + > +U > +foo (void) > +{ > + V v = __builtin_shufflevector (g, g, > + 0, 1, 2, 0, 5, 1, 0, 1, 3, 2, 3, 0, 4, 3, 1, 2, > + 2, 0, 4, 2, 3, 1, 1, 2, 3, 4, 1, 1, 0, 0, 5, 2, > + 0, 3, 3, 3, 3, 4, 5, 0, 1, 5, 2, 1, 0, 1, 1, 2, > + 3, 2, 0, 5, 4, 5, 1, 0, 1, 4, 4, 3, 4, 5, 2, 0); > + v ^= 255; > + V w = v + g; > + U u = ((union { V a; U b; }) w).b + ((union { V a; U b; }) w).b[1]; > + return u; > +} > + > +/* { dg-final { scan-assembler-not "\.byte\[ \t\]+-1\n" } } */ > -- > 2.31.1 >
On Thu, Jul 22, 2021 at 7:17 PM Hongtao Liu <crazylht@gmail.com> wrote: > > On Thu, Jul 22, 2021 at 8:34 PM H.J. Lu via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > For > > > > (set (reg:V32QI 108) > > (const_vector:V32QI [ > > (const_int -1 [0xffffffffffffffff]) repeated x32 > > ])) "x.c":15:5 1573 {movv32qi_internal} > > (expr_list:REG_EQUIV (const_vector:V32QI [ > > (const_int -1 [0xffffffffffffffff]) repeated x32 > > ]) > > (nil)) > > > > LRA may generate > LRA will call ix86_expand_vector_move, and using > ix86_gen_scratch_sse_rtx there may result in no hard reg splits and > cause ICE. > I'm afraid this patch is just a walk round, the real issue here should > be ix86_gen_scratch_sse_rtx usage when lra_in_progress. Fixed. I sent the v2 patch. Thanks. > > > > #6 0x0000000001077c8a in lra_emit_move (x=0x7ffff7f81b58, y=0x7ffff7f81b40) > > at /export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:502 > > 502 ? emit_move_insn (x, y) : emit_insn (gen_rtx_SET (x, y))); > > (gdb) call debug_rtx (x) > > (reg:V32QI 326) > > (gdb) call debug_rtx (y) > > (mem/u/c:V32QI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S32 A256]) > > (gdb) > > > > Convert load from constand pool to SSE constant load: > > > > (set (reg:V32QI 24 xmm4 [327]) > > (const_vector:V32QI [ > > (const_int -1 [0xffffffffffffffff]) repeated x32 > > ])) "x.c":15:5 1573 {movv32qi_internal} > > > > gcc/ > > > > PR target/101504 > > * config/i386/i386-expand.c (ix86_broadcast_from_constant): > > Return the standard SSE constant. > > (ix86_expand_vector_move): Handle standard SSE constants. > > > > gcc/testsuite/ > > > > PR target/101504 > > * gcc.target/i386/pr101504.c: New test. > > --- > > gcc/config/i386/i386-expand.c | 11 +++++++++++ > > gcc/testsuite/gcc.target/i386/pr101504.c | 23 +++++++++++++++++++++++ > > 2 files changed, 34 insertions(+) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101504.c > > > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > > index 896bd685b1f..7b6055bbf83 100644 > > --- a/gcc/config/i386/i386-expand.c > > +++ b/gcc/config/i386/i386-expand.c > > @@ -493,6 +493,10 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) > > if (GET_CODE (constant) != CONST_VECTOR) > > return nullptr; > > > > + /* Return the standard SSE constant. */ > > + if (standard_sse_constant_p (constant, mode)) > > + return constant; > > + > > /* There could be some rtx like > > (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1"))) > > but with "*.LC1" refer to V2DI constant vector. */ > > @@ -577,6 +581,13 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) > > rtx first = ix86_broadcast_from_constant (mode, op1); > > if (first != nullptr) > > { > > + if (GET_CODE (first) == CONST_VECTOR) > > + { > > + /* This is a standard SSE constant. */ > > + emit_move_insn (op0, first); > > + return; > > + } > > + > > /* Broadcast to XMM/YMM/ZMM register from an integer > > constant or scalar mem. */ > > /* Hard registers are used for 2 purposes: > > diff --git a/gcc/testsuite/gcc.target/i386/pr101504.c b/gcc/testsuite/gcc.target/i386/pr101504.c > > new file mode 100644 > > index 00000000000..2ad0405dd7b > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr101504.c > > @@ -0,0 +1,23 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -march=skylake" } */ > > + > > +typedef unsigned int __attribute__((__vector_size__ (32))) U; > > +typedef unsigned char __attribute__((__vector_size__ (64))) V; > > + > > +V g; > > + > > +U > > +foo (void) > > +{ > > + V v = __builtin_shufflevector (g, g, > > + 0, 1, 2, 0, 5, 1, 0, 1, 3, 2, 3, 0, 4, 3, 1, 2, > > + 2, 0, 4, 2, 3, 1, 1, 2, 3, 4, 1, 1, 0, 0, 5, 2, > > + 0, 3, 3, 3, 3, 4, 5, 0, 1, 5, 2, 1, 0, 1, 1, 2, > > + 3, 2, 0, 5, 4, 5, 1, 0, 1, 4, 4, 3, 4, 5, 2, 0); > > + v ^= 255; > > + V w = v + g; > > + U u = ((union { V a; U b; }) w).b + ((union { V a; U b; }) w).b[1]; > > + return u; > > +} > > + > > +/* { dg-final { scan-assembler-not "\.byte\[ \t\]+-1\n" } } */ > > -- > > 2.31.1 > > > > > -- > BR, > Hongtao
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 896bd685b1f..7b6055bbf83 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -493,6 +493,10 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) if (GET_CODE (constant) != CONST_VECTOR) return nullptr; + /* Return the standard SSE constant. */ + if (standard_sse_constant_p (constant, mode)) + return constant; + /* There could be some rtx like (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1"))) but with "*.LC1" refer to V2DI constant vector. */ @@ -577,6 +581,13 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) rtx first = ix86_broadcast_from_constant (mode, op1); if (first != nullptr) { + if (GET_CODE (first) == CONST_VECTOR) + { + /* This is a standard SSE constant. */ + emit_move_insn (op0, first); + return; + } + /* Broadcast to XMM/YMM/ZMM register from an integer constant or scalar mem. */ /* Hard registers are used for 2 purposes: diff --git a/gcc/testsuite/gcc.target/i386/pr101504.c b/gcc/testsuite/gcc.target/i386/pr101504.c new file mode 100644 index 00000000000..2ad0405dd7b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101504.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake" } */ + +typedef unsigned int __attribute__((__vector_size__ (32))) U; +typedef unsigned char __attribute__((__vector_size__ (64))) V; + +V g; + +U +foo (void) +{ + V v = __builtin_shufflevector (g, g, + 0, 1, 2, 0, 5, 1, 0, 1, 3, 2, 3, 0, 4, 3, 1, 2, + 2, 0, 4, 2, 3, 1, 1, 2, 3, 4, 1, 1, 0, 0, 5, 2, + 0, 3, 3, 3, 3, 4, 5, 0, 1, 5, 2, 1, 0, 1, 1, 2, + 3, 2, 0, 5, 4, 5, 1, 0, 1, 4, 4, 3, 4, 5, 2, 0); + v ^= 255; + V w = v + g; + U u = ((union { V a; U b; }) w).b + ((union { V a; U b; }) w).b[1]; + return u; +} + +/* { dg-final { scan-assembler-not "\.byte\[ \t\]+-1\n" } } */