Message ID | m3aalhzif0.fsf@redhat.com |
---|---|
State | New |
Headers | show |
On Wed, Nov 10, 2010 at 9:34 AM, Nick Clifton <nickc@redhat.com> wrote: > Hi Guys, > > As Richard pointed out my previous patch (for LIW/SETLB) included a > separate feature which should have been submitted on its own. So here > it is - a machine reorg pass to eliminate redundant compares. > > The scan could be improved to look further back through the > instruction stream for insns that set the EPSW register, but I am > leaving that for a future patch. > > Tested without regressions on an mn10300-elf toolchain. > > OK to apply ? Not a single testcase? Function comment is missing. Richard. > Cheers > Nick > > gcc/ChangeLog > 2010-11-10 Nick Clifton <nickc@redhat.com> > > * config/mn10300/mn10300.c (scan_for_redundant_compares): New > function. > (mn10300_reorg): New function. > (TARGET_MACHINE_DEPENDENT_REORG): Define. > > > Index: gcc/config/mn10300/mn10300.c > =================================================================== > --- gcc/config/mn10300/mn10300.c (revision 166474) > +++ gcc/config/mn10300/mn10300.c (working copy) > @@ -2403,6 +2403,128 @@ > /* Extract the latency value from the timings attribute. */ > return timings < 100 ? (timings % 10) : (timings % 100); > } > + > +static void > +scan_for_redundant_compares (void) > +{ > + rtx cur_insn; > + > + /* Look for this sequence: > + > + (set (reg X) (arith_op (...))) > + (set (reg CC) (compare (reg X) (const_int 0))) > + (set (pc) (if_then_else (EQ|NE (...)) (...) (...))) > + > + And remove the compare as the flags in the > + EPSW register will already be correctly set. */ > + for (cur_insn = get_insns (); cur_insn != NULL; cur_insn = NEXT_INSN (cur_insn)) > + { > + rtx pattern; > + > + if (! INSN_P (cur_insn)) > + continue; > + > + pattern = PATTERN (cur_insn); > + > + if (GET_CODE (pattern) == SET > + && GET_CODE (SET_SRC (pattern)) == COMPARE > + /* Paranoia checks: */ > + && REG_P (SET_DEST (pattern)) > + && REGNO (SET_DEST (pattern)) == CC_REG > + && REG_P (XEXP (SET_SRC (pattern), 0)) > + /* Normal checks: */ > + && CONST_INT_P (XEXP (SET_SRC (pattern), 1)) > + && INTVAL (XEXP (SET_SRC (pattern), 1)) == 0) > + { > + rtx prev_insn, branch, condition; > + unsigned int compare_reg; > + > + /* FIXME: We should scan backwards until the first ESPW > + setter or clobber insn is found (or the beginning of > + the block). At the moment we just look back one insn. */ > + prev_insn = prev_nonnote_insn (cur_insn); > + > + if (prev_insn == NULL || ! INSN_P (prev_insn)) > + continue; > + > + /* An UNSPEC might be an LIW insn which will not set the > + condition code flags in a way that we currently expect. */ > + if (GET_CODE (PATTERN (prev_insn)) == UNSPEC) > + continue; > + > + if (GET_CODE (PATTERN (prev_insn)) != PARALLEL > + || XVECLEN (PATTERN (prev_insn), 0) != 2 > + || GET_CODE (XVECEXP (PATTERN (prev_insn), 0, 0)) != SET) > + continue; > + > + compare_reg = REGNO (XEXP (SET_SRC (pattern), 0)); > + pattern = XVECEXP (PATTERN (prev_insn), 0, 0); > + > + if (! REG_P (SET_DEST (pattern)) > + || REGNO (SET_DEST (pattern)) != compare_reg) > + continue; > + > + branch = next_nonnote_insn (cur_insn); > + if (branch == NULL || ! JUMP_P (branch) > + || GET_CODE (PATTERN (branch)) != SET > + || GET_CODE (SET_SRC (PATTERN (branch))) != IF_THEN_ELSE) > + continue; > + condition = XEXP (SET_SRC (PATTERN (branch)), 0); > + > + switch (GET_CODE (condition)) > + { > + case EQ: > + case NE: > + break; > + default: > + continue; > + } > + > + /* Adding 1 or 4 to an address register results in an > + INC/INC4 instruction that doesn't set the flags. */ > + if (GET_CODE (SET_SRC (pattern)) == PLUS > + && REG_P (SET_DEST (pattern)) > + && REGNO (SET_DEST (pattern)) >= FIRST_ADDRESS_REGNUM > + && REGNO (SET_DEST (pattern)) <= LAST_ADDRESS_REGNUM > + && REG_P (XEXP (SET_SRC (pattern), 0)) > + && REGNO (XEXP (SET_SRC (pattern), 0)) == REGNO (SET_DEST (pattern)) > + && CONST_INT_P (XEXP (SET_SRC (pattern), 1)) > + && (INTVAL (XEXP (SET_SRC (pattern), 1)) == 1 > + || INTVAL (XEXP (SET_SRC (pattern), 1)) == 4)) > + continue; > + > + switch (GET_CODE (SET_SRC (pattern))) > + { > + case PLUS: > + case MINUS: > + case MULT: > +#if 0 > + /* Some alternatives in the AND pattern use EXTBU which does > + not set the flags. Hence a CMP following an AND might be > + needed. */ > + case AND: > +#endif > + case XOR: > + case NOT: > + case ASHIFT: > + case LSHIFTRT: > + case ASHIFTRT: > + delete_insn (cur_insn); > + break; > + default: > + break; > + } > + } > + } > +} > + > +/* Implements TARGET_MACHINE_DEPENDENT_REORG. */ > + > +static void > +mn10300_reorg (void) > +{ > + scan_for_redundant_compares (); > +} > > /* Initialize the GCC target structure. */ > > @@ -2481,4 +2603,7 @@ > #undef TARGET_SCHED_ADJUST_COST > #define TARGET_SCHED_ADJUST_COST mn10300_adjust_sched_cost > > +#undef TARGET_MACHINE_DEPENDENT_REORG > +#define TARGET_MACHINE_DEPENDENT_REORG mn10300_reorg > + > struct gcc_target targetm = TARGET_INITIALIZER; >
On 11/10/2010 09:34 AM, Nick Clifton wrote: > Hi Guys, > > As Richard pointed out my previous patch (for LIW/SETLB) included a > separate feature which should have been submitted on its own. So here > it is - a machine reorg pass to eliminate redundant compares. > > The scan could be improved to look further back through the > instruction stream for insns that set the EPSW register, but I am > leaving that for a future patch. > > Tested without regressions on an mn10300-elf toolchain. > > OK to apply ? > > Cheers > Nick > > gcc/ChangeLog > 2010-11-10 Nick Clifton<nickc@redhat.com> > > * config/mn10300/mn10300.c (scan_for_redundant_compares): New > function. > (mn10300_reorg): New function. > (TARGET_MACHINE_DEPENDENT_REORG): Define. > > > Index: gcc/config/mn10300/mn10300.c > =================================================================== > --- gcc/config/mn10300/mn10300.c (revision 166474) > +++ gcc/config/mn10300/mn10300.c (working copy) > @@ -2403,6 +2403,128 @@ > /* Extract the latency value from the timings attribute. */ > return timings< 100 ? (timings % 10) : (timings % 100); > } > + > +static void > +scan_for_redundant_compares (void) > +{ > + rtx cur_insn; > + > + /* Look for this sequence: > + > + (set (reg X) (arith_op (...))) > + (set (reg CC) (compare (reg X) (const_int 0))) > + (set (pc) (if_then_else (EQ|NE (...)) (...) (...))) > + > + And remove the compare as the flags in the > + EPSW register will already be correctly set. */ This pass is only necessary because you are not modeling MODE_CC modes correctly. You should use a specific mode in SELECT_CC_MODE for EQ/NE comparisons (e.g. CCZmode) and add patterns like (parallel [ (set (reg X) (arith_op (...)) (set (reg:CCZ CC) (compare:CCZ (arith_op (...)) (const_int 0)))]) Then combine will be able to change the mode of the COMPARE to CCZmode (also in the use), and merge the two instructions. I guess it's okay to do it in mdreorg, but you should at least add a comment saying that the above would allow a more precise representation of the instruction stream, and also it would allow better register allocation in case X dies after the comparison. Paolo
On 11/10/10 01:34, Nick Clifton wrote: > Hi Guys, > > As Richard pointed out my previous patch (for LIW/SETLB) included a > separate feature which should have been submitted on its own. So here > it is - a machine reorg pass to eliminate redundant compares. > > The scan could be improved to look further back through the > instruction stream for insns that set the EPSW register, but I am > leaving that for a future patch. > > Tested without regressions on an mn10300-elf toolchain. > > OK to apply ? > > Cheers > Nick > > gcc/ChangeLog > 2010-11-10 Nick Clifton<nickc@redhat.com> > > * config/mn10300/mn10300.c (scan_for_redundant_compares): New > function. > (mn10300_reorg): New function. > (TARGET_MACHINE_DEPENDENT_REORG): Define. My first thought is isn't this handled in a target independent way elsewhere? How do the x86 and other targets that were converted away from cc0 handle this stuff? jeff
Hi Jeff, >> it is - a machine reorg pass to eliminate redundant compares. > My first thought is isn't this handled in a target independent way > elsewhere? How do the x86 and other targets that were converted away > from cc0 handle this stuff? Paolo has pointed out that I ought to be using SELECT_CC_MODE to return a new CC mode and then add patterns to combine the arithmetic and comparison instructions using this mode. He is right of course, but I would rather get this patch in and at least remove some redundant compares, and then work on a better patch at some point in the future. But if you feel we should be "doing the right thing" right from the start then I will withdraw the patch and try to work on a new one. In my copious free time of course... :-) Cheers Nick
On 11/10/10 07:48, Nick Clifton wrote: > Hi Jeff, > >>> it is - a machine reorg pass to eliminate redundant compares. > >> My first thought is isn't this handled in a target independent way >> elsewhere? How do the x86 and other targets that were converted away >> from cc0 handle this stuff? > > Paolo has pointed out that I ought to be using SELECT_CC_MODE to > return a new CC mode and then add patterns to combine the arithmetic > and comparison instructions using this mode. > > He is right of course, but I would rather get this patch in and at > least remove some redundant compares, and then work on a better patch > at some point in the future. But if you feel we should be "doing the > right thing" right from the start then I will withdraw the patch and > try to work on a new one. In my copious free time of course... :-) I'd like to see us DTRT, otherwise I suspect this will get dropped on the floor. This really should have been taken care of during the transition away from cc0, particularly since we had gone through the trouble to make sure this optimization was working in the cc0 world :-) ISTM you probably need just a few patterns to catch the vast majority of useful cases -- using match_operator liberally of course. I'd look to catch add/sub and the logicals; anything beyond those probably isn't worth the headache. jeff
On 11/10/2010 04:39 AM, Paolo Bonzini wrote: > Then combine will be able to change the mode of the COMPARE to > CCZmode (also in the use), and merge the two instructions. The biggest problem here is that the MN10300 has no general purpose addition instruction that doesn't modify the flags. Which means that he can't break up the compare-and-{branch,test} insns before reload is complete. Which means that he can't use combine to perform this optimization without creating branch insns with output reloads. And I think we'll agree that is a horrible thing to do, failing a new version of reload that actually handles such things. Which then begs the question of how *do* we go about performing this optimization after reload? One mechanism would be to do it all with peepholes. Another is a scan during md-reorg. Am I missing anything? -- Perhaps I'm mistaken about the requirement for a general-purpose addition instruction. I see that (An + imm32) is a valid addressing mode, apparently for all processor revisions, which might mean that reload will never need to generate an addition in order to fix up an address. That theory should be relatively easy to verify, with an assert added to the addsi3 expander. r~
On 11/10/2010 05:55 PM, Richard Henderson wrote: > On 11/10/2010 04:39 AM, Paolo Bonzini wrote: >> Then combine will be able to change the mode of the COMPARE to >> CCZmode (also in the use), and merge the two instructions. > > The biggest problem here is that the MN10300 has no general purpose > addition instruction that doesn't modify the flags. Which means that > he can't break up the compare-and-{branch,test} insns before reload is > complete. Which means that he can't use combine to perform this > optimization without creating branch insns with output reloads. Ouch, that changes the picture indeed. It may work if the predicates are strict enough (in particular, subregs of mem could be a problem) so that the only reloads on compare-and-branch are for spills. However I'm not sure it's good, as you lie to reload about what clobbers CC. > Which then begs the question of how *do* we go about performing this > optimization after reload? One mechanism would be to do it all with > peepholes. Another is a scan during md-reorg. > > Am I missing anything? No, I don't think so. A peephole should work right now since Nick is only looking at adjacent instruction, and has the advantage of modeling liveness properly (e.g. for regrename). However if he plans to change this, it would not work anymore. Paolo
On 11/10/10 09:55, Richard Henderson wrote: > On 11/10/2010 04:39 AM, Paolo Bonzini wrote: >> Then combine will be able to change the mode of the COMPARE to >> CCZmode (also in the use), and merge the two instructions. > The biggest problem here is that the MN10300 has no general purpose > addition instruction that doesn't modify the flags. Yea, that sounds familiar. In fact, I vaguely recall that it's exceedingly difficult to issue a nop-copy because the ISA can't encode such a thing and just about everything else you could use to generate a nop mucks up the flags. > Which means that > he can't break up the compare-and-{branch,test} insns before reload is > complete. Which means that he can't use combine to perform this > optimization without creating branch insns with output reloads. ARGGH. On a positive note this lame problem with no output reloads on branch insns is something that is fixable once we move away from our current reload implementation. Or more correctly, it's fixable for cases where there's not an abnormal critical edge. > And I think we'll agree that is a horrible thing to do, failing a new > version of reload that actually handles such things. In the queue, but we shouldn't hold everything up on that. > Which then begs the question of how *do* we go about performing this > optimization after reload? One mechanism would be to do it all with > peepholes. Another is a scan during md-reorg. Is this the first time we've converted a target from cc0 which didn't have a way to perform arithmetic without clobbering the flags? If not, then how did we handle it before. > -- > > Perhaps I'm mistaken about the requirement for a general-purpose > addition instruction. I see that (An + imm32) is a valid addressing > mode, apparently for all processor revisions, which might mean that > reload will never need to generate an addition in order to fix up an > address. That theory should be relatively easy to verify, with an > assert added to the addsi3 expander. The concern would be dreg = sp + const_int Which needs an intermediate register Jeff
On 11/10/2010 09:49 AM, Jeff Law wrote: > Is this the first time we've converted a target from cc0 which didn't > have a way to perform arithmetic without clobbering the flags? If > not, then how did we handle it before. Sort-of. Nick did convert the RX port away from cc0, and it also doesn't have a non-flags-clobbering add. But he makes no attempt to tidy up redundant flag settings there. r~
On 11/10/10 11:59, Richard Henderson wrote: > On 11/10/2010 09:49 AM, Jeff Law wrote: >> Is this the first time we've converted a target from cc0 which didn't >> have a way to perform arithmetic without clobbering the flags? If >> not, then how did we handle it before. > Sort-of. Nick did convert the RX port away from cc0, and it also > doesn't have a non-flags-clobbering add. But he makes no attempt > to tidy up redundant flag settings there. I think I'm going to withdraw my original objection. I think there's a few things that are reasonably unique here. jeff
On Wed, 10 Nov 2010, Nick Clifton wrote: > Hi Guys, > > As Richard pointed out my previous patch (for LIW/SETLB) included a > separate feature which should have been submitted on its own. So here > it is - a machine reorg pass to eliminate redundant compares. > + /* Look for this sequence: > + > + (set (reg X) (arith_op (...))) > + (set (reg CC) (compare (reg X) (const_int 0))) > + (set (pc) (if_then_else (EQ|NE (...)) (...) (...))) > + > + And remove the compare as the flags in the > + EPSW register will already be correctly set. */ I see parts of this may have gone moot, but shouldn't that set of (reg CC) instead be moved into a parallell with the arith_op setting? Or else the use of (reg CC) may be (at least theoretically) moved in later passes? Right, there may be no such "later passes" at the moment, but still... brgds, H-P
Index: gcc/config/mn10300/mn10300.c =================================================================== --- gcc/config/mn10300/mn10300.c (revision 166474) +++ gcc/config/mn10300/mn10300.c (working copy) @@ -2403,6 +2403,128 @@ /* Extract the latency value from the timings attribute. */ return timings < 100 ? (timings % 10) : (timings % 100); } + +static void +scan_for_redundant_compares (void) +{ + rtx cur_insn; + + /* Look for this sequence: + + (set (reg X) (arith_op (...))) + (set (reg CC) (compare (reg X) (const_int 0))) + (set (pc) (if_then_else (EQ|NE (...)) (...) (...))) + + And remove the compare as the flags in the + EPSW register will already be correctly set. */ + for (cur_insn = get_insns (); cur_insn != NULL; cur_insn = NEXT_INSN (cur_insn)) + { + rtx pattern; + + if (! INSN_P (cur_insn)) + continue; + + pattern = PATTERN (cur_insn); + + if (GET_CODE (pattern) == SET + && GET_CODE (SET_SRC (pattern)) == COMPARE + /* Paranoia checks: */ + && REG_P (SET_DEST (pattern)) + && REGNO (SET_DEST (pattern)) == CC_REG + && REG_P (XEXP (SET_SRC (pattern), 0)) + /* Normal checks: */ + && CONST_INT_P (XEXP (SET_SRC (pattern), 1)) + && INTVAL (XEXP (SET_SRC (pattern), 1)) == 0) + { + rtx prev_insn, branch, condition; + unsigned int compare_reg; + + /* FIXME: We should scan backwards until the first ESPW + setter or clobber insn is found (or the beginning of + the block). At the moment we just look back one insn. */ + prev_insn = prev_nonnote_insn (cur_insn); + + if (prev_insn == NULL || ! INSN_P (prev_insn)) + continue; + + /* An UNSPEC might be an LIW insn which will not set the + condition code flags in a way that we currently expect. */ + if (GET_CODE (PATTERN (prev_insn)) == UNSPEC) + continue; + + if (GET_CODE (PATTERN (prev_insn)) != PARALLEL + || XVECLEN (PATTERN (prev_insn), 0) != 2 + || GET_CODE (XVECEXP (PATTERN (prev_insn), 0, 0)) != SET) + continue; + + compare_reg = REGNO (XEXP (SET_SRC (pattern), 0)); + pattern = XVECEXP (PATTERN (prev_insn), 0, 0); + + if (! REG_P (SET_DEST (pattern)) + || REGNO (SET_DEST (pattern)) != compare_reg) + continue; + + branch = next_nonnote_insn (cur_insn); + if (branch == NULL || ! JUMP_P (branch) + || GET_CODE (PATTERN (branch)) != SET + || GET_CODE (SET_SRC (PATTERN (branch))) != IF_THEN_ELSE) + continue; + condition = XEXP (SET_SRC (PATTERN (branch)), 0); + + switch (GET_CODE (condition)) + { + case EQ: + case NE: + break; + default: + continue; + } + + /* Adding 1 or 4 to an address register results in an + INC/INC4 instruction that doesn't set the flags. */ + if (GET_CODE (SET_SRC (pattern)) == PLUS + && REG_P (SET_DEST (pattern)) + && REGNO (SET_DEST (pattern)) >= FIRST_ADDRESS_REGNUM + && REGNO (SET_DEST (pattern)) <= LAST_ADDRESS_REGNUM + && REG_P (XEXP (SET_SRC (pattern), 0)) + && REGNO (XEXP (SET_SRC (pattern), 0)) == REGNO (SET_DEST (pattern)) + && CONST_INT_P (XEXP (SET_SRC (pattern), 1)) + && (INTVAL (XEXP (SET_SRC (pattern), 1)) == 1 + || INTVAL (XEXP (SET_SRC (pattern), 1)) == 4)) + continue; + + switch (GET_CODE (SET_SRC (pattern))) + { + case PLUS: + case MINUS: + case MULT: +#if 0 + /* Some alternatives in the AND pattern use EXTBU which does + not set the flags. Hence a CMP following an AND might be + needed. */ + case AND: +#endif + case XOR: + case NOT: + case ASHIFT: + case LSHIFTRT: + case ASHIFTRT: + delete_insn (cur_insn); + break; + default: + break; + } + } + } +} + +/* Implements TARGET_MACHINE_DEPENDENT_REORG. */ + +static void +mn10300_reorg (void) +{ + scan_for_redundant_compares (); +} /* Initialize the GCC target structure. */ @@ -2481,4 +2603,7 @@ #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST mn10300_adjust_sched_cost +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG mn10300_reorg + struct gcc_target targetm = TARGET_INITIALIZER;