Message ID | 555D6F64.3040300@redhat.com |
---|---|
State | New |
Headers | show |
On Thu, May 21, 2015 at 7:38 AM, Jeff Law <law@redhat.com> wrote: > > find_split_point will tend to favor splitting complex insns in such a way as > to encourage multiply-add insns. It does this by splitting an > unrecognizable insn at the (plus (mult)). > > Now that many MULTs are canonicalized as ASHIFT, that code to prefer the > multiply-add is no longer triggering when it could/should. This ultimately > results in splitting at the ASHIFT rather than the containing PLUS and thus > we generate distinct shift and add insns rather than a single shadd insn on > the PA (and probably other architectures). > > This patch will treat (plus (ashift)) just like (plus (mult)) which > encourages creation of shift-add insns. > > This has been bootstrapped and regression tested on x86-unknown-linux-gnu > and with an hppa2.0w-hp-hpux11.00 cross compiler on the hppa.exp testsuite > (full disclosure -- hppa.exp only has two tests, so it's far from > extensive). > > I've also verified this is one of the changes ultimately necessary to > resolve the code generation regressions caused by Venkat's combine.c change > on the PA across my 300+ testfiles for a PA cross compiler. > > OK for the trunk? Sounds reasonable. Thanks, Richard. > > > Jeff > > > > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 490386e..250fa0a 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,5 +1,8 @@ > 2015-05-20 Jeff Law <law@redhat.com> > > + * combine.c (find_split_point): Handle ASHIFT like MULT to encourage > + multiply-accumulate/shift-add insn generation. > + > * config/pa/pa.c (pa_print_operand): New 'o' output modifier. > (pa_mem_shadd_constant_p): Renamed from pa_shadd_constant_p. > (pa_shadd_constant_p): Allow constants for shadd insns rather > diff --git a/gcc/combine.c b/gcc/combine.c > index a90849e..ab6de3a 100644 > --- a/gcc/combine.c > +++ b/gcc/combine.c > @@ -5145,7 +5163,9 @@ find_split_point (rtx *loc, rtx_insn *insn, bool > set_src) > /* Split at a multiply-accumulate instruction. However if this is > the SET_SRC, we likely do not have such an instruction and it's > worthless to try this split. */ > - if (!set_src && GET_CODE (XEXP (x, 0)) == MULT) > + if (!set_src > + && (GET_CODE (XEXP (x, 0)) == MULT > + || GET_CODE (XEXP (x, 0)) == ASHIFT)) > return loc; > > default: > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > index f20a131..bac0973 100644 > --- a/gcc/testsuite/ChangeLog > +++ b/gcc/testsuite/ChangeLog > @@ -1,5 +1,7 @@ > 2015-05-20 Jeff Law <law@redhat.com> > > + * gcc.target/hppa/shadd-2.c: New test. > + > * gcc.target/hppa/hppa.exp: New target test driver. > * gcc.target/hppa/shadd-1.c: New test. > > diff --git a/gcc/testsuite/gcc.target/hppa/shadd-2.c > b/gcc/testsuite/gcc.target/hppa/shadd-2.c > new file mode 100644 > index 0000000..34708e5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/hppa/shadd-2.c > @@ -0,0 +1,49 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler-times "sh.add" 2 } } */ > + > +typedef struct rtx_def *rtx; > +typedef const struct rtx_def *const_rtx; > +enum machine_mode > +{ > + VOIDmode, BLKmode, CCmode, CCGCmode, CCGOCmode, CCNOmode, CCAmode, > CCCmode, > + CCOmode, CCSmode, CCZmode, CCFPmode, CCFPUmode, BImode, QImode, HImode, > + SImode, DImode, TImode, OImode, QQmode, HQmode, SQmode, DQmode, TQmode, > + UQQmode, UHQmode, USQmode, UDQmode, UTQmode, HAmode, SAmode, DAmode, > + TAmode, UHAmode, USAmode, UDAmode, UTAmode, SFmode, DFmode, XFmode, > + TFmode, SDmode, DDmode, TDmode, CQImode, CHImode, CSImode, CDImode, > + CTImode, COImode, SCmode, DCmode, XCmode, TCmode, V2QImode, V4QImode, > + V2HImode, V1SImode, V8QImode, V4HImode, V2SImode, V1DImode, V16QImode, > + V8HImode, V4SImode, V2DImode, V1TImode, V32QImode, V16HImode, V8SImode, > + V4DImode, V2TImode, V64QImode, V32HImode, V16SImode, V8DImode, > V4TImode, > + V2SFmode, V4SFmode, V2DFmode, V8SFmode, V4DFmode, V2TFmode, V16SFmode, > + V8DFmode, V4TFmode, MAX_MACHINE_MODE, NUM_MACHINE_MODES = > MAX_MACHINE_MODE > +}; > +struct rtx_def > +{ > + __extension__ enum machine_mode mode:8; > +}; > +struct target_regs > +{ > + unsigned char x_hard_regno_nregs[53][MAX_MACHINE_MODE]; > +}; > +extern void oof (void); > +extern int rhs_regno (rtx); > + > +extern struct target_regs default_target_regs; > +__inline__ unsigned int > +end_hard_regno (enum machine_mode mode, unsigned int regno) > +{ > + return regno + > + ((&default_target_regs)->x_hard_regno_nregs)[regno][(int) mode]; > +} > + > +void > +note_btr_set (rtx dest, const_rtx set > + __attribute__ ((__unused__)), void *data) > +{ > + int regno, end_regno; > + end_regno = end_hard_regno (((dest)->mode), (rhs_regno (dest))); > + for (; regno < end_regno; regno++) > + oof (); > +} >
On Wed, May 20, 2015 at 11:38:44PM -0600, Jeff Law wrote: > I've also verified this is one of the changes ultimately necessary to > resolve the code generation regressions caused by Venkat's combine.c > change on the PA across my 300+ testfiles for a PA cross compiler. How much does it help, do you know? > OK for the trunk? Yes, please commit. Thanks. (One tiny comment below). Segher > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 490386e..250fa0a 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,5 +1,8 @@ > 2015-05-20 Jeff Law <law@redhat.com> > > + * combine.c (find_split_point): Handle ASHIFT like MULT to encourage > + multiply-accumulate/shift-add insn generation. > + > * config/pa/pa.c (pa_print_operand): New 'o' output modifier. > (pa_mem_shadd_constant_p): Renamed from pa_shadd_constant_p. > (pa_shadd_constant_p): Allow constants for shadd insns rather > diff --git a/gcc/combine.c b/gcc/combine.c > index a90849e..ab6de3a 100644 > --- a/gcc/combine.c > +++ b/gcc/combine.c > @@ -5145,7 +5163,9 @@ find_split_point (rtx *loc, rtx_insn *insn, bool set_src) > /* Split at a multiply-accumulate instruction. However if this is > the SET_SRC, we likely do not have such an instruction and it's > worthless to try this split. */ > - if (!set_src && GET_CODE (XEXP (x, 0)) == MULT) > + if (!set_src > + && (GET_CODE (XEXP (x, 0)) == MULT > + || GET_CODE (XEXP (x, 0)) == ASHIFT)) > return loc; It might be better to also check if it is shifting by a CONST_INT. I doubt it matters much, but it is closer to the original. Segher
On 05/21/2015 07:40 AM, Segher Boessenkool wrote: > On Wed, May 20, 2015 at 11:38:44PM -0600, Jeff Law wrote: >> I've also verified this is one of the changes ultimately necessary to >> resolve the code generation regressions caused by Venkat's combine.c >> change on the PA across my 300+ testfiles for a PA cross compiler. > > How much does it help, do you know? It resolves the remaining missed opportunities to create shadd insns across those 300+ files. There's one more combine.c patch on the way to canonicalize in one more place -- which fixes a missed CSE due to a MULT in one context and ASHIFT in another. Then it's strictly cleanup on the PA port to kill the old MULT patterns. > > It might be better to also check if it is shifting by a CONST_INT. > I doubt it matters much, but it is closer to the original. Sure, that's not a problem at all. Will do after the usual testing. jeff
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 490386e..250fa0a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,8 @@ 2015-05-20 Jeff Law <law@redhat.com> + * combine.c (find_split_point): Handle ASHIFT like MULT to encourage + multiply-accumulate/shift-add insn generation. + * config/pa/pa.c (pa_print_operand): New 'o' output modifier. (pa_mem_shadd_constant_p): Renamed from pa_shadd_constant_p. (pa_shadd_constant_p): Allow constants for shadd insns rather diff --git a/gcc/combine.c b/gcc/combine.c index a90849e..ab6de3a 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -5145,7 +5163,9 @@ find_split_point (rtx *loc, rtx_insn *insn, bool set_src) /* Split at a multiply-accumulate instruction. However if this is the SET_SRC, we likely do not have such an instruction and it's worthless to try this split. */ - if (!set_src && GET_CODE (XEXP (x, 0)) == MULT) + if (!set_src + && (GET_CODE (XEXP (x, 0)) == MULT + || GET_CODE (XEXP (x, 0)) == ASHIFT)) return loc; default: diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f20a131..bac0973 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,7 @@ 2015-05-20 Jeff Law <law@redhat.com> + * gcc.target/hppa/shadd-2.c: New test. + * gcc.target/hppa/hppa.exp: New target test driver. * gcc.target/hppa/shadd-1.c: New test. diff --git a/gcc/testsuite/gcc.target/hppa/shadd-2.c b/gcc/testsuite/gcc.target/hppa/shadd-2.c new file mode 100644 index 0000000..34708e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/hppa/shadd-2.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "sh.add" 2 } } */ + +typedef struct rtx_def *rtx; +typedef const struct rtx_def *const_rtx; +enum machine_mode +{ + VOIDmode, BLKmode, CCmode, CCGCmode, CCGOCmode, CCNOmode, CCAmode, CCCmode, + CCOmode, CCSmode, CCZmode, CCFPmode, CCFPUmode, BImode, QImode, HImode, + SImode, DImode, TImode, OImode, QQmode, HQmode, SQmode, DQmode, TQmode, + UQQmode, UHQmode, USQmode, UDQmode, UTQmode, HAmode, SAmode, DAmode, + TAmode, UHAmode, USAmode, UDAmode, UTAmode, SFmode, DFmode, XFmode, + TFmode, SDmode, DDmode, TDmode, CQImode, CHImode, CSImode, CDImode, + CTImode, COImode, SCmode, DCmode, XCmode, TCmode, V2QImode, V4QImode, + V2HImode, V1SImode, V8QImode, V4HImode, V2SImode, V1DImode, V16QImode, + V8HImode, V4SImode, V2DImode, V1TImode, V32QImode, V16HImode, V8SImode, + V4DImode, V2TImode, V64QImode, V32HImode, V16SImode, V8DImode, V4TImode, + V2SFmode, V4SFmode, V2DFmode, V8SFmode, V4DFmode, V2TFmode, V16SFmode, + V8DFmode, V4TFmode, MAX_MACHINE_MODE, NUM_MACHINE_MODES = MAX_MACHINE_MODE +}; +struct rtx_def +{ + __extension__ enum machine_mode mode:8; +}; +struct target_regs +{ + unsigned char x_hard_regno_nregs[53][MAX_MACHINE_MODE]; +}; +extern void oof (void); +extern int rhs_regno (rtx); + +extern struct target_regs default_target_regs; +__inline__ unsigned int +end_hard_regno (enum machine_mode mode, unsigned int regno) +{ + return regno + + ((&default_target_regs)->x_hard_regno_nregs)[regno][(int) mode]; +} + +void +note_btr_set (rtx dest, const_rtx set + __attribute__ ((__unused__)), void *data) +{ + int regno, end_regno; + end_regno = end_hard_regno (((dest)->mode), (rhs_regno (dest))); + for (; regno < end_regno; regno++) + oof (); +}