Message ID | 871t8vjh3l.fsf@e105548-lin.cambridge.arm.com |
---|---|
State | New |
Headers | show |
Hi Richard, On 02/02/16 14:56, Richard Sandiford wrote: > In PR 69577 we have: > > A: (set (reg:V2TI X) ...) > B: (set (subreg:TI (reg:V2TI X) 0) ...) > > X gets allocated to an AVX register, as usual for V2TI. The problem is > that the movti for B doesn't then preserve the other half of X, even > though the subreg semantics are supposed to guarantee that. > > If instead the same value had been set by: > > A': (set (subreg:TI (reg:V2TI X) 16) ...) > B: (set (subreg:TI (reg:V2TI X) 0) ...) > > the subreg in A' would have prevented the use of AVX registers for X, > since you can't directly access the high part. > > IMO these are really the same thing. An alternative way to view it > is that the original sequence is equivalent to: > > A: (set (reg:V2TI X) ...) > B1: (set (subreg:TI (reg:V2TI X) 0) ...) > B2: (set (subreg:TI (reg:V2TI X) 16) (subreg:TI (reg:V2TI X) 16)) > > in which B2 is a no-op and therefore implicit. The handling ought > to be the same regardless of whether there is an rtl insn that > explicitly assigns to (subreg:TI (reg:V2TI X) 16). > > This patch implements that idea. Hopefully the comments explain > what's going on. > > Tested on x86_64-linux-gnu so far. Will test on aarch64-linux-gnu and > arm-linux-gnueabihf as well. OK to install if the additional testing > succeeds? For me this patch causes an ICE when building libgcc during an aarch64-none-elf build. It's a segfault with the trace: 0xb0ac2a crash_signal $SRC/gcc/toplev.c:335 0xa7cfd7 init_subregs_of_mode() $SRC/gcc/reginfo.c:1345 0x96fc4b init_costs $SRC/gcc/ira-costs.c:2187 0x97419e ira_set_pseudo_classes(bool, _IO_FILE*) $SRC/gcc/ira-costs.c:2237 0x106fd1e alloc_global_sched_pressure_data $SRC/gcc/haifa-sched.c:7244 0x106fd1e sched_init() $SRC/gcc/haifa-sched.c:7394 0x107109a haifa_sched_init() $SRC/gcc/haifa-sched.c:7406 0xab37ac schedule_insns() $SRC/gcc/sched-rgn.c:3504 0xab3f5b rest_of_handle_sched $SRC/gcc/sched-rgn.c:3717 0xab3f5b execute $SRC/gcc/sched-rgn.c:3825 Thanks, Kyrill > Thanks, > Richard > > > diff --git a/gcc/reginfo.c b/gcc/reginfo.c > index 6814eed..afb36aa 100644 > --- a/gcc/reginfo.c > +++ b/gcc/reginfo.c > @@ -1244,8 +1244,16 @@ simplifiable_subregs (const subreg_shape &shape) > static HARD_REG_SET **valid_mode_changes; > static obstack valid_mode_changes_obstack; > > +/* Restrict the choice of register for SUBREG_REG (SUBREG) based > + on information about SUBREG. > + > + If PARTIAL_DEF, SUBREG is a partial definition of a multipart inner > + register and we want to ensure that the other parts of the inner > + register are correctly preserved. If !PARTIAL_DEF we need to > + ensure that SUBREG itself can be formed. */ > + > static void > -record_subregs_of_mode (rtx subreg) > +record_subregs_of_mode (rtx subreg, bool partial_def) > { > unsigned int regno; > > @@ -1256,15 +1264,41 @@ record_subregs_of_mode (rtx subreg) > if (regno < FIRST_PSEUDO_REGISTER) > return; > > + subreg_shape shape (shape_of_subreg (subreg)); > + if (partial_def) > + { > + /* The number of independently-accessible SHAPE.outer_mode values > + in SHAPE.inner_mode is GET_MODE_SIZE (SHAPE.inner_mode) / SIZE. > + We need to check that the assignment will preserve all the other > + SIZE-byte chunks in the inner register besides the one that > + includes SUBREG. > + > + In practice it is enough to check whether an equivalent > + SHAPE.inner_mode value in an adjacent SIZE-byte chunk can be formed. > + If the underlying registers are small enough, both subregs will > + be valid. If the underlying registers are too large, one of the > + subregs will be invalid. > + > + This relies on the fact that we've already been passed > + SUBREG with PARTIAL_DEF set to false. */ > + unsigned int size = MAX (REGMODE_NATURAL_SIZE (shape.inner_mode), > + GET_MODE_SIZE (shape.outer_mode)); > + gcc_checking_assert (size < GET_MODE_SIZE (shape.inner_mode)); > + if (shape.offset >= size) > + shape.offset -= size; > + else > + shape.offset += size; > + } > + > if (valid_mode_changes[regno]) > AND_HARD_REG_SET (*valid_mode_changes[regno], > - simplifiable_subregs (shape_of_subreg (subreg))); > + simplifiable_subregs (shape)); > else > { > valid_mode_changes[regno] > = XOBNEW (&valid_mode_changes_obstack, HARD_REG_SET); > COPY_HARD_REG_SET (*valid_mode_changes[regno], > - simplifiable_subregs (shape_of_subreg (subreg))); > + simplifiable_subregs (shape)); > } > } > > @@ -1277,7 +1311,7 @@ find_subregs_of_mode (rtx x) > int i; > > if (code == SUBREG) > - record_subregs_of_mode (x); > + record_subregs_of_mode (x, false); > > /* Time for some deep diving. */ > for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) > @@ -1304,8 +1338,15 @@ init_subregs_of_mode (void) > > FOR_EACH_BB_FN (bb, cfun) > FOR_BB_INSNS (bb, insn) > - if (NONDEBUG_INSN_P (insn)) > - find_subregs_of_mode (PATTERN (insn)); > + { > + if (NONDEBUG_INSN_P (insn)) > + find_subregs_of_mode (PATTERN (insn)); > + df_ref def; > + FOR_EACH_INSN_DEF (def, insn) > + if (DF_REF_FLAGS_IS_SET (def, DF_REF_PARTIAL) > + && df_read_modify_subreg_p (DF_REF_REG (def))) > + record_subregs_of_mode (DF_REF_REG (def), true); > + } > } > > const HARD_REG_SET * > diff --git a/gcc/testsuite/gcc.target/i386/pr69577.c b/gcc/testsuite/gcc.target/i386/pr69577.c > new file mode 100644 > index 0000000..d680539 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr69577.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target avx } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-options "-O -fno-forward-propagate -fno-split-wide-types -mavx" } */ > + > +typedef unsigned int u32; > +typedef unsigned __int128 u128; > +typedef unsigned __int128 v32u128 __attribute__ ((vector_size (32))); > + > +u128 __attribute__ ((noinline, noclone)) > +foo (u32 u32_0, v32u128 v32u128_0) > +{ > + v32u128_0[0] >>= u32_0; > + v32u128_0 += (v32u128) {u32_0, 0}; > + return u32_0 + v32u128_0[0] + v32u128_0[1]; > +} > + > +int > +main() > +{ > + u128 x = foo (1, (v32u128) {1, 4}); > + if (x != 6) > + __builtin_abort (); > + return 0; > +} >
On Tue, Feb 2, 2016 at 5:54 PM, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote: > Hi Richard, > > > On 02/02/16 14:56, Richard Sandiford wrote: >> >> In PR 69577 we have: >> >> A: (set (reg:V2TI X) ...) >> B: (set (subreg:TI (reg:V2TI X) 0) ...) >> >> X gets allocated to an AVX register, as usual for V2TI. The problem is >> that the movti for B doesn't then preserve the other half of X, even >> though the subreg semantics are supposed to guarantee that. >> >> If instead the same value had been set by: >> >> A': (set (subreg:TI (reg:V2TI X) 16) ...) >> B: (set (subreg:TI (reg:V2TI X) 0) ...) >> >> the subreg in A' would have prevented the use of AVX registers for X, >> since you can't directly access the high part. >> >> IMO these are really the same thing. An alternative way to view it >> is that the original sequence is equivalent to: >> >> A: (set (reg:V2TI X) ...) >> B1: (set (subreg:TI (reg:V2TI X) 0) ...) >> B2: (set (subreg:TI (reg:V2TI X) 16) (subreg:TI (reg:V2TI X) 16)) >> >> in which B2 is a no-op and therefore implicit. The handling ought >> to be the same regardless of whether there is an rtl insn that >> explicitly assigns to (subreg:TI (reg:V2TI X) 16). >> >> This patch implements that idea. Hopefully the comments explain >> what's going on. >> >> Tested on x86_64-linux-gnu so far. Will test on aarch64-linux-gnu and >> arm-linux-gnueabihf as well. OK to install if the additional testing >> succeeds? > > > For me this patch causes an ICE when building libgcc during an > aarch64-none-elf build. > It's a segfault with the trace: > 0xb0ac2a crash_signal > $SRC/gcc/toplev.c:335 > 0xa7cfd7 init_subregs_of_mode() > $SRC/gcc/reginfo.c:1345 > 0x96fc4b init_costs > $SRC/gcc/ira-costs.c:2187 > 0x97419e ira_set_pseudo_classes(bool, _IO_FILE*) > $SRC/gcc/ira-costs.c:2237 > 0x106fd1e alloc_global_sched_pressure_data > $SRC/gcc/haifa-sched.c:7244 > 0x106fd1e sched_init() > $SRC/gcc/haifa-sched.c:7394 > 0x107109a haifa_sched_init() > $SRC/gcc/haifa-sched.c:7406 > 0xab37ac schedule_insns() > $SRC/gcc/sched-rgn.c:3504 > 0xab3f5b rest_of_handle_sched > $SRC/gcc/sched-rgn.c:3717 > 0xab3f5b execute > $SRC/gcc/sched-rgn.c:3825 Also on x86_64-linux-gnu when building -m32 multilib: Program received signal SIGSEGV, Segmentation fault. 0x0000000000d28264 in init_subregs_of_mode () at /home/uros/gcc-svn/trunk/gcc/reginfo.c:1345 1345 FOR_EACH_INSN_DEF (def, insn) (gdb) p insn $1 = (rtx_insn *) 0x7fffef9f4d40 (gdb) p debug_rtx (insn) (code_label 60 31 39 10 9 "" [3 uses]) $2 = void (gdb) p def $3 = (df_ref) 0x0 Uros.
diff --git a/gcc/reginfo.c b/gcc/reginfo.c index 6814eed..afb36aa 100644 --- a/gcc/reginfo.c +++ b/gcc/reginfo.c @@ -1244,8 +1244,16 @@ simplifiable_subregs (const subreg_shape &shape) static HARD_REG_SET **valid_mode_changes; static obstack valid_mode_changes_obstack; +/* Restrict the choice of register for SUBREG_REG (SUBREG) based + on information about SUBREG. + + If PARTIAL_DEF, SUBREG is a partial definition of a multipart inner + register and we want to ensure that the other parts of the inner + register are correctly preserved. If !PARTIAL_DEF we need to + ensure that SUBREG itself can be formed. */ + static void -record_subregs_of_mode (rtx subreg) +record_subregs_of_mode (rtx subreg, bool partial_def) { unsigned int regno; @@ -1256,15 +1264,41 @@ record_subregs_of_mode (rtx subreg) if (regno < FIRST_PSEUDO_REGISTER) return; + subreg_shape shape (shape_of_subreg (subreg)); + if (partial_def) + { + /* The number of independently-accessible SHAPE.outer_mode values + in SHAPE.inner_mode is GET_MODE_SIZE (SHAPE.inner_mode) / SIZE. + We need to check that the assignment will preserve all the other + SIZE-byte chunks in the inner register besides the one that + includes SUBREG. + + In practice it is enough to check whether an equivalent + SHAPE.inner_mode value in an adjacent SIZE-byte chunk can be formed. + If the underlying registers are small enough, both subregs will + be valid. If the underlying registers are too large, one of the + subregs will be invalid. + + This relies on the fact that we've already been passed + SUBREG with PARTIAL_DEF set to false. */ + unsigned int size = MAX (REGMODE_NATURAL_SIZE (shape.inner_mode), + GET_MODE_SIZE (shape.outer_mode)); + gcc_checking_assert (size < GET_MODE_SIZE (shape.inner_mode)); + if (shape.offset >= size) + shape.offset -= size; + else + shape.offset += size; + } + if (valid_mode_changes[regno]) AND_HARD_REG_SET (*valid_mode_changes[regno], - simplifiable_subregs (shape_of_subreg (subreg))); + simplifiable_subregs (shape)); else { valid_mode_changes[regno] = XOBNEW (&valid_mode_changes_obstack, HARD_REG_SET); COPY_HARD_REG_SET (*valid_mode_changes[regno], - simplifiable_subregs (shape_of_subreg (subreg))); + simplifiable_subregs (shape)); } } @@ -1277,7 +1311,7 @@ find_subregs_of_mode (rtx x) int i; if (code == SUBREG) - record_subregs_of_mode (x); + record_subregs_of_mode (x, false); /* Time for some deep diving. */ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) @@ -1304,8 +1338,15 @@ init_subregs_of_mode (void) FOR_EACH_BB_FN (bb, cfun) FOR_BB_INSNS (bb, insn) - if (NONDEBUG_INSN_P (insn)) - find_subregs_of_mode (PATTERN (insn)); + { + if (NONDEBUG_INSN_P (insn)) + find_subregs_of_mode (PATTERN (insn)); + df_ref def; + FOR_EACH_INSN_DEF (def, insn) + if (DF_REF_FLAGS_IS_SET (def, DF_REF_PARTIAL) + && df_read_modify_subreg_p (DF_REF_REG (def))) + record_subregs_of_mode (DF_REF_REG (def), true); + } } const HARD_REG_SET * diff --git a/gcc/testsuite/gcc.target/i386/pr69577.c b/gcc/testsuite/gcc.target/i386/pr69577.c new file mode 100644 index 0000000..d680539 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr69577.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-O -fno-forward-propagate -fno-split-wide-types -mavx" } */ + +typedef unsigned int u32; +typedef unsigned __int128 u128; +typedef unsigned __int128 v32u128 __attribute__ ((vector_size (32))); + +u128 __attribute__ ((noinline, noclone)) +foo (u32 u32_0, v32u128 v32u128_0) +{ + v32u128_0[0] >>= u32_0; + v32u128_0 += (v32u128) {u32_0, 0}; + return u32_0 + v32u128_0[0] + v32u128_0[1]; +} + +int +main() +{ + u128 x = foo (1, (v32u128) {1, 4}); + if (x != 6) + __builtin_abort (); + return 0; +}