Message ID | 20190327134006.GA3194@bubble.grove.modra.org |
---|---|
State | New |
Headers | show |
Series | [RS6000] PR89271, gcc.target/powerpc/vsx-simode2.c | expand |
On Thu, Mar 28, 2019 at 12:10:06AM +1030, Alan Modra wrote: > Also, using a register move cost of 2 for for power9 direct moves > gives these fails, even with the .md file tweaks: > +FAIL: gcc.target/powerpc/vec-set-char.c scan-assembler-not mtvsrwz > +FAIL: gcc.target/powerpc/vec-set-char.c scan-assembler vspltisb > +FAIL: gcc.target/powerpc/vec-set-char.c scan-assembler xxspltib > +FAIL: gcc.target/powerpc/vec-set-int.c scan-assembler-not mtvsrwz > +FAIL: gcc.target/powerpc/vec-set-int.c scan-assembler vspltisw > +FAIL: gcc.target/powerpc/vec-set-short.c scan-assembler-not mtvsrwz > +FAIL: gcc.target/powerpc/vec-set-short.c scan-assembler vspltish > +FAIL: gcc.target/powerpc/vec-set-short.c scan-assembler xxspltib > +FAIL: gcc.target/powerpc/vsx-himode3.c scan-assembler lxsihzx > +FAIL: gcc.target/powerpc/vsx-qimode3.c scan-assembler lxsibzx > These can be all be fixed by removing "?"s disparaging vector > alternatives in movsi_internal1 and mov<mode>_internal. Like this. Bootstrapped and regression tested powerpc64le-linux. OK for stage1? * config/rs6000/rs6000.c (rs6000_register_move_cost): Reduce power9 direct move cost. * config/rs6000/rs6000.md (movsi_internal1): Don't disparage vector alternatives. (mov<mode>_internal): Likewise, excepting alternative that will be split. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index fbc16c65de4..a9e27b356df 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -35010,7 +35010,7 @@ rs6000_register_move_cost (machine_mode mode, if (rs6000_tune == PROCESSOR_POWER8) ret = 4 * hard_regno_nregs (0, mode); else - ret = 3 * hard_regno_nregs (0, mode); + ret = 2 * hard_regno_nregs (0, mode); /* SFmode requires a conversion when moving between gprs and vsx. */ if (mode == SFmode) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d383504c600..2fbab973907 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6832,10 +6832,10 @@ (define_insn "movsi_low" ;; MF%1 MT%0 NOP (define_insn "*movsi_internal1" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r, r, r, ?wI, ?wH, - m, ?Z, ?Z, r, r, - r, ?wIwH, ?wJwK, ?wJwK, ?wu, - ?wJwK, ?wH, ?wK, ?wIwH, ?r, + "=r, r, r, wI, wH, + m, Z, Z, r, r, + r, wIwH, wJwK, wJwK, wu, + wJwK, wH, wK, wIwH, r, r, *h, *h") (match_operand:SI 1 "input_operand" @@ -7106,13 +7106,13 @@ (define_expand "mov<mode>" ;; MTVSRWZ MF%1 MT%1 NOP (define_insn "*mov<mode>_internal" [(set (match_operand:QHI 0 "nonimmediate_operand" - "=r, r, ?wJwK, m, Z, r, - ?wJwK, ?wJwK, ?wJwK, ?wK, ?wK, r, - ?wJwK, r, *c*l, *h") + "=r, r, wJwK, m, Z, r, + wJwK, wJwK, wJwK, wK, ?wK, r, + wJwK, r, *c*l, *h") (match_operand:QHI 1 "input_operand" "r, m, Z, r, wJwK, i, - wJwK, O, wM, wB, wS, ?wJwK, + wJwK, O, wM, wB, wS, wJwK, r, *h, r, 0"))] "gpc_reg_operand (operands[0], <MODE>mode)
Hi! On Thu, Mar 28, 2019 at 12:10:06AM +1030, Alan Modra wrote: > This patch makes a number of corrections to rs6000_register_move_cost, > adds a new register union class, GEN_OR_VSX_REGS, and adjusts insn > alternative to suit. Cool beans. [ snip various great explanations, thanks! ] > TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS is needed for rs6000 in order > to fix the 20% cactus_adm spec regression when using GEN_OR_VSX_REGS > as an allocno class. It is similar to the aarch64 version but without > any selection by regno mode if the best class is a union class. It would be nice if we could do without such hacks. Alas. > OK assuming Pat's latest spec test run doesn't contain any nasty > surprises? Not for stage 4, no. Sorry. But it should be great in stage 1 :-) > diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h > index 9fb36e41e7d..20c59f89c8f 100644 > --- a/gcc/config/rs6000/darwin.h > +++ b/gcc/config/rs6000/darwin.h > @@ -346,7 +346,8 @@ extern int darwin_emit_branch_islands; > && reg_class_subset_p (BASE_REGS, (CLASS))) \ > ? BASE_REGS \ > : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT \ > - && (CLASS) == GEN_OR_FLOAT_REGS) \ > + && ((CLASS) == GEN_OR_FLOAT_REGS \ > + || (CLASS) == GEN_OR_VSX_REGS)) \ > ? GENERAL_REGS \ > : (CLASS)) Darwin doesn't do VSX at all... But maybe there is something that can get allocated to both FPRs and VRs, sure. And GPRs. This PREFERRED_RELOAD_CLASS, but it makes me worried if there are places where we should care about this change for correctness. > @@ -20236,7 +20239,8 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) > return NO_REGS; > } > > - if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS) > + if (GET_MODE_CLASS (mode) == MODE_INT > + && (rclass == GEN_OR_FLOAT_REGS || rclass == GEN_OR_VSX_REGS)) > return GENERAL_REGS; Maybe do this whenever rclass contains the GPRs? > @@ -34966,22 +34970,56 @@ rs6000_register_move_cost (machine_mode mode, > reg_class_t from, reg_class_t to) > { > int ret; > + reg_class_t rclass; > > if (TARGET_DEBUG_COST) > dbg_cost_ctrl++; > > + /* If we have VSX, we can easily move between FPR or Altivec registers, > + otherwise we can only easily move within classes. > + Do this first so we give best-case answers for union classes > + containing both gprs and vsx regs. */ > + HARD_REG_SET to_vsx, from_vsx; > + COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]); > + AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]); > + COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]); > + AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]); This is a bit expensive to run at every call of rs6000_register_move_cost. Can it be precomputed easily? > + { > + if (TARGET_DIRECT_MOVE > + && (rs6000_tune == PROCESSOR_POWER8 > + || rs6000_tune == PROCESSOR_POWER9)) TARGET_DIRECT_MOVE is always on for these CPUs. Should this also use the m*vsr* cost with say -mcpu=power7 -mtune=power9? If so you can simplify this condition. Or maybe it give problems elsewhere? It's not really worth spending to much time on, separate -mtune isn't used much at all. > +static reg_class_t > +rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED, > + reg_class_t allocno_class, > + reg_class_t best_class) > +{ > + switch (allocno_class) > + { > + default: > + break; Please put default cases at the end. > + gcc_checking_assert (best_class == GEN_OR_VSX_REGS > + || best_class == GEN_OR_FLOAT_REGS > + || best_class == VSX_REGS > + || best_class == ALTIVEC_REGS > + || best_class == FLOAT_REGS > + || best_class == GENERAL_REGS > + || best_class == BASE_REGS); This list makes me think... Should there be a GEN_OR_ALTIVEC as well? Segher
On Thu, Mar 28, 2019 at 09:15:54PM +1030, Alan Modra wrote: > On Thu, Mar 28, 2019 at 12:10:06AM +1030, Alan Modra wrote: > > Also, using a register move cost of 2 for for power9 direct moves > > gives these fails, even with the .md file tweaks: > > +FAIL: gcc.target/powerpc/vec-set-char.c scan-assembler-not mtvsrwz > > +FAIL: gcc.target/powerpc/vec-set-char.c scan-assembler vspltisb > > +FAIL: gcc.target/powerpc/vec-set-char.c scan-assembler xxspltib > > +FAIL: gcc.target/powerpc/vec-set-int.c scan-assembler-not mtvsrwz > > +FAIL: gcc.target/powerpc/vec-set-int.c scan-assembler vspltisw > > +FAIL: gcc.target/powerpc/vec-set-short.c scan-assembler-not mtvsrwz > > +FAIL: gcc.target/powerpc/vec-set-short.c scan-assembler vspltish > > +FAIL: gcc.target/powerpc/vec-set-short.c scan-assembler xxspltib > > +FAIL: gcc.target/powerpc/vsx-himode3.c scan-assembler lxsihzx > > +FAIL: gcc.target/powerpc/vsx-qimode3.c scan-assembler lxsibzx > > These can be all be fixed by removing "?"s disparaging vector > > alternatives in movsi_internal1 and mov<mode>_internal. > > Like this. Bootstrapped and regression tested powerpc64le-linux. > OK for stage1? Yup, together with the previous patch. Thanks, Segher
> On 28 Mar 2019, at 18:08, Segher Boessenkool <segher@kernel.crashing.org> wrote: > >> diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h >> index 9fb36e41e7d..20c59f89c8f 100644 >> --- a/gcc/config/rs6000/darwin.h >> +++ b/gcc/config/rs6000/darwin.h >> @@ -346,7 +346,8 @@ extern int darwin_emit_branch_islands; >> && reg_class_subset_p (BASE_REGS, (CLASS))) \ >> ? BASE_REGS \ >> : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT \ >> - && (CLASS) == GEN_OR_FLOAT_REGS) \ >> + && ((CLASS) == GEN_OR_FLOAT_REGS \ >> + || (CLASS) == GEN_OR_VSX_REGS)) \ >> ? GENERAL_REGS \ >> : (CLASS)) > > Darwin doesn't do VSX at all… Well.. Darwin doesn’t currently run on any CPU with VSX hardware… However, in their wisdom, the folks who were implementing it way back when made Darwin have a soft implementation of V2DF and V2DI (in case that matters, which seems unlikely in this context). > But maybe there is something that can get > allocated to both FPRs and VRs, sure. And GPRs. not sure what is being asked or stated here - is there a question about ABI, or just the assertion that some quantities could be placed in GPRs, FPRs or VRs? (the latter seems reasonable to me, the former I’d need to think some more about). Iain
On Thu, Mar 28, 2019 at 01:08:55PM -0500, Segher Boessenkool wrote: > > TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS is needed for rs6000 in order > > to fix the 20% cactus_adm spec regression when using GEN_OR_VSX_REGS > > as an allocno class. It is similar to the aarch64 version but without > > any selection by regno mode if the best class is a union class. > > It would be nice if we could do without such hacks. Alas. Yeah. We have some serious problems with register pressure calculations in sched1 and ira. This is merely a workaround for the regression. I intend to poke a little more at the scheduler to see whether I can figure out a proper fix, but for now this is the best I have. > > OK assuming Pat's latest spec test run doesn't contain any nasty > > surprises? > > Not for stage 4, no. Sorry. But it should be great in stage 1 :-) Good. I'm happy to leave this until next stage 1. > > diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h > > index 9fb36e41e7d..20c59f89c8f 100644 > > --- a/gcc/config/rs6000/darwin.h > > +++ b/gcc/config/rs6000/darwin.h > > @@ -346,7 +346,8 @@ extern int darwin_emit_branch_islands; > > && reg_class_subset_p (BASE_REGS, (CLASS))) \ > > ? BASE_REGS \ > > : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT \ > > - && (CLASS) == GEN_OR_FLOAT_REGS) \ > > + && ((CLASS) == GEN_OR_FLOAT_REGS \ > > + || (CLASS) == GEN_OR_VSX_REGS)) \ > > ? GENERAL_REGS \ > > : (CLASS)) > > Darwin doesn't do VSX at all... But maybe there is something that can get > allocated to both FPRs and VRs, sure. And GPRs. > > This PREFERRED_RELOAD_CLASS, but it makes me worried if there are places > where we should care about this change for correctness. For sure there are other places. A new union register class trips all those places where union classes fail. For example, ira.c:setup_class_translate_array doesn't give useful answers for GEN_OR_VSX_REGS, or GEN_OR_FLOAT_REGS for that matter. That makes uses of ira_allocno_class_translate and ira_pressure_class_translate for pseudos suspect whenever one of the union classes is involved. rs6000_ira_change_pseudo_allocno_class helps of course, in that GEN_OR_VSX_REGS mostly won't be used. > > @@ -20236,7 +20239,8 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) > > return NO_REGS; > > } > > > > - if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS) > > + if (GET_MODE_CLASS (mode) == MODE_INT > > + && (rclass == GEN_OR_FLOAT_REGS || rclass == GEN_OR_VSX_REGS)) > > return GENERAL_REGS; > > Maybe do this whenever rclass contains the GPRs? Well, you caught me out here. Like the darwin.h change I made this change early on in the process of fixing register_move_cost, on the grounds that whatever we did for GEN_OR_FLOAT_REGS probably should be done for GEN_OR_VSX_REGS. That's not a really good reason particularly since this code is so old (git a99459e46d, svn r35162). Maybe it's just a reload hack? So you might be better questioning the need for this change at all. I'll see how the test results look if I remove it. Int modes can live in VSX regs, after all. > > @@ -34966,22 +34970,56 @@ rs6000_register_move_cost (machine_mode mode, > > reg_class_t from, reg_class_t to) > > { > > int ret; > > + reg_class_t rclass; > > > > if (TARGET_DEBUG_COST) > > dbg_cost_ctrl++; > > > > + /* If we have VSX, we can easily move between FPR or Altivec registers, > > + otherwise we can only easily move within classes. > > + Do this first so we give best-case answers for union classes > > + containing both gprs and vsx regs. */ > > + HARD_REG_SET to_vsx, from_vsx; > > + COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]); > > + AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]); > > + COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]); > > + AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]); > > This is a bit expensive to run at every call of rs6000_register_move_cost. > Can it be precomputed easily? register_move_cost tends to be cached by callers of this function, so I'm inclined to go for simple and correct rather than fast and complicated. > > + { > > + if (TARGET_DIRECT_MOVE > > + && (rs6000_tune == PROCESSOR_POWER8 > > + || rs6000_tune == PROCESSOR_POWER9)) > > TARGET_DIRECT_MOVE is always on for these CPUs. Should this also use the > m*vsr* cost with say -mcpu=power7 -mtune=power9? No, because if we don't generate m*vsr*, and we shouldn't, then that would be telling a lie. > This list makes me think... Should there be a GEN_OR_ALTIVEC as well? That might make sense for pre-vsx processors, if you can find a testcase where the GENERAL_REGS cost is the same as ALTIVEC_REGS cost. Extending rs6000_ira_change_pseudo_allocno_class to handle GEN_OR_FLOAT_REGS and VSX_REGS for !TARGET_VSX might be an idea too. Where the cost for moves between float and altivec is low, you'll find that VSX_REGS is always used as the allocno class whenever ALTIVEC_REGS is preferred. So for power7 and later I expect GEN_OR_ALTIVEC_REGS wouldn't ever be used as an allocno class.
Hi Iain, On Fri, Mar 29, 2019 at 01:32:28AM +0000, Iain Sandoe wrote: > > > On 28 Mar 2019, at 18:08, Segher Boessenkool <segher@kernel.crashing.org> wrote: > >> diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h > >> index 9fb36e41e7d..20c59f89c8f 100644 > >> --- a/gcc/config/rs6000/darwin.h > >> +++ b/gcc/config/rs6000/darwin.h > >> @@ -346,7 +346,8 @@ extern int darwin_emit_branch_islands; > >> && reg_class_subset_p (BASE_REGS, (CLASS))) \ > >> ? BASE_REGS \ > >> : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT \ > >> - && (CLASS) == GEN_OR_FLOAT_REGS) \ > >> + && ((CLASS) == GEN_OR_FLOAT_REGS \ > >> + || (CLASS) == GEN_OR_VSX_REGS)) \ > >> ? GENERAL_REGS \ > >> : (CLASS)) > > > > Darwin doesn't do VSX at all… > > Well.. Darwin doesn’t currently run on any CPU with VSX hardware… "Currently"? Do you have plans to change that? :-) > However, in their wisdom, the folks who were implementing it way back when > made Darwin have a soft implementation of V2DF and V2DI (in case that > matters, which seems unlikely in this context). > > > But maybe there is something that can get > > allocated to both FPRs and VRs, sure. And GPRs. > > not sure what is being asked or stated here - is there a question about ABI, or > just the assertion that some quantities could be placed in GPRs, FPRs or VRs? > > (the latter seems reasonable to me, the former I’d need to think some more > about). It is mostly questioning if Darwin should have any VSX code. The change here seems to be harmless, but does it make much sense? Segher
Hi Segher, > On 29 Mar 2019, at 09:33, Segher Boessenkool <segher@kernel.crashing.org> wrote: > On Fri, Mar 29, 2019 at 01:32:28AM +0000, Iain Sandoe wrote: >> >>> On 28 Mar 2019, at 18:08, Segher Boessenkool <segher@kernel.crashing.org> wrote: >>>> diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h >>>> index 9fb36e41e7d..20c59f89c8f 100644 >>>> --- a/gcc/config/rs6000/darwin.h >>>> +++ b/gcc/config/rs6000/darwin.h >>>> @@ -346,7 +346,8 @@ extern int darwin_emit_branch_islands; >>>> && reg_class_subset_p (BASE_REGS, (CLASS))) \ >>>> ? BASE_REGS \ >>>> : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT \ >>>> - && (CLASS) == GEN_OR_FLOAT_REGS) \ >>>> + && ((CLASS) == GEN_OR_FLOAT_REGS \ >>>> + || (CLASS) == GEN_OR_VSX_REGS)) \ >>>> ? GENERAL_REGS \ >>>> : (CLASS)) >>> >>> Darwin doesn't do VSX at all… >> >> Well.. Darwin doesn’t currently run on any CPU with VSX hardware… > > "Currently"? Do you have plans to change that? :-) Well .. we all have long-term objectives, right? :-) >> However, in their wisdom, the folks who were implementing it way back when >> made Darwin have a soft implementation of V2DF and V2DI (in case that >> matters, which seems unlikely in this context). >> >>> But maybe there is something that can get >>> allocated to both FPRs and VRs, sure. And GPRs. >> >> not sure what is being asked or stated here - is there a question about ABI, or >> just the assertion that some quantities could be placed in GPRs, FPRs or VRs? >> >> (the latter seems reasonable to me, the former I’d need to think some more >> about). > > It is mostly questioning if Darwin should have any VSX code. The change > here seems to be harmless, but does it make much sense? I don’t think it makes any sense to add any more overt VSX support (the oddity of soft support for the two VSX vector sizes mentioned is a pain when dealing with other compilers), thanks Iain
Hi! On Fri, Mar 29, 2019 at 04:00:55PM +1030, Alan Modra wrote: > On Thu, Mar 28, 2019 at 01:08:55PM -0500, Segher Boessenkool wrote: > > Darwin doesn't do VSX at all... But maybe there is something that can get > > allocated to both FPRs and VRs, sure. And GPRs. > > > > This PREFERRED_RELOAD_CLASS, but it makes me worried if there are places > > where we should care about this change for correctness. > > For sure there are other places. A new union register class trips all > those places where union classes fail. For example, > ira.c:setup_class_translate_array doesn't give useful answers for > GEN_OR_VSX_REGS, or GEN_OR_FLOAT_REGS for that matter. That makes > uses of ira_allocno_class_translate and ira_pressure_class_translate > for pseudos suspect whenever one of the union classes is involved. And not all of those places will fail in an obviouswayfail. Well, stage 1, we'll find them all in time :-) > rs6000_ira_change_pseudo_allocno_class helps of course, in that > GEN_OR_VSX_REGS mostly won't be used. Heh. > > > @@ -34966,22 +34970,56 @@ rs6000_register_move_cost (machine_mode mode, > > > reg_class_t from, reg_class_t to) > > > { > > > int ret; > > > + reg_class_t rclass; > > > > > > if (TARGET_DEBUG_COST) > > > dbg_cost_ctrl++; > > > > > > + /* If we have VSX, we can easily move between FPR or Altivec registers, > > > + otherwise we can only easily move within classes. > > > + Do this first so we give best-case answers for union classes > > > + containing both gprs and vsx regs. */ > > > + HARD_REG_SET to_vsx, from_vsx; > > > + COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]); > > > + AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]); > > > + COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]); > > > + AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]); > > > > This is a bit expensive to run at every call of rs6000_register_move_cost. > > Can it be precomputed easily? > > register_move_cost tends to be cached by callers of this function, so > I'm inclined to go for simple and correct rather than fast and > complicated. The main problem is that every op on HARD_REG_SET can be *very* expensive. It isn't obvious to a reader how big it is. But this is target code of course, and for us it is only 128 bits these days. Okay. > > > + { > > > + if (TARGET_DIRECT_MOVE > > > + && (rs6000_tune == PROCESSOR_POWER8 > > > + || rs6000_tune == PROCESSOR_POWER9)) > > > > TARGET_DIRECT_MOVE is always on for these CPUs. Should this also use the > > m*vsr* cost with say -mcpu=power7 -mtune=power9? > > No, because if we don't generate m*vsr*, and we shouldn't, then that > would be telling a lie. Then should we have those "tune" things in this conditional? Just do it for any direct move target? > > This list makes me think... Should there be a GEN_OR_ALTIVEC as well? > > That might make sense for pre-vsx processors, if you can find a > testcase where the GENERAL_REGS cost is the same as ALTIVEC_REGS cost. > Extending rs6000_ira_change_pseudo_allocno_class to handle > GEN_OR_FLOAT_REGS and VSX_REGS for !TARGET_VSX might be an idea too. > > Where the cost for moves between float and altivec is low, you'll find > that VSX_REGS is always used as the allocno class whenever > ALTIVEC_REGS is preferred. So for power7 and later I expect > GEN_OR_ALTIVEC_REGS wouldn't ever be used as an allocno class. I was thinking for those instructions that work only on VRs. But those already had variants that work on float (like lxssp), or they are insn for quad precision float (ieee128), and those don't go in GPRs anyway. So nothing to worry about :-) Segher
On Fri, Mar 29, 2019 at 04:47:21AM -0500, Segher Boessenkool wrote: > On Fri, Mar 29, 2019 at 04:00:55PM +1030, Alan Modra wrote: > > On Thu, Mar 28, 2019 at 01:08:55PM -0500, Segher Boessenkool wrote: > > > TARGET_DIRECT_MOVE is always on for these CPUs. Should this also use the > > > m*vsr* cost with say -mcpu=power7 -mtune=power9? > > > > No, because if we don't generate m*vsr*, and we shouldn't, then that > > would be telling a lie. > > Then should we have those "tune" things in this conditional? Just do it > for any direct move target? Oh, right. I think we still need to "tune" the cost to reflect actual hardware, but that shouldn't be in the outer condition. So if (TARGET_DIRECT_MOVE) { if (rs6000_tune != PROCESSOR_POWER9) ret = 4 * hard_regno_nregs (0, mode); else ret = 2 * hard_regno_nregs (0, mode); ... should be good enough.
On Fri, Mar 29, 2019 at 10:50:04PM +1030, Alan Modra wrote: > On Fri, Mar 29, 2019 at 04:47:21AM -0500, Segher Boessenkool wrote: > > On Fri, Mar 29, 2019 at 04:00:55PM +1030, Alan Modra wrote: > > > On Thu, Mar 28, 2019 at 01:08:55PM -0500, Segher Boessenkool wrote: > > > > TARGET_DIRECT_MOVE is always on for these CPUs. Should this also use the > > > > m*vsr* cost with say -mcpu=power7 -mtune=power9? > > > > > > No, because if we don't generate m*vsr*, and we shouldn't, then that > > > would be telling a lie. > > > > Then should we have those "tune" things in this conditional? Just do it > > for any direct move target? > > Oh, right. I think we still need to "tune" the cost to reflect > actual hardware, but that shouldn't be in the outer condition. So > > if (TARGET_DIRECT_MOVE) > { > if (rs6000_tune != PROCESSOR_POWER9) > ret = 4 * hard_regno_nregs (0, mode); > else > ret = 2 * hard_regno_nregs (0, mode); > ... > > should be good enough. Yes exactly. Segher
This is https://gcc.gnu.org/ml/gcc-patches/2019-03/msg01299.html with the fixes Segher requested, plus a few more: - delete PREFERRED_RELOAD_CLASS changes - adjust for recent register renumbering - use defines rather than hard coding register numbers - flip altivec/float test when dealing with moves within vsx regs, so that the altivec hard reg count is preferred over the fp hard reg count when both reg types are possible. - use 2 for power9 direct move cost, and remove more '?'s from insns. - use reg_class_subset_p in the test for slow LR/CTR moves Bootstrapped and regression tested powerpc64le-linux. OK for mainline? PR target/89271 * config/rs6000/rs6000.h (enum reg_class, REG_CLASS_NAMES), (REG_CLASS_CONTENTS): Add GEN_OR_VSX_REGS class. * config/rs6000/rs6000.c (rs6000_register_move_cost): Correct cost for general <-> vsx when direct moves are available. Cost union classes at minimal cost for any reg in the class. Correct calculation for moves between vsx, float, and altivec. Don't return a low cost for moves between special regs. Don't use hard coded register numbers. (TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS): Define. (rs6000_ira_change_pseudo_allocno_class): New function. * config/rs6000/rs6000.md (movsi_internal1, mov<mode>_internal), (movdi_internal32, movdi_internal64): Remove '*' from vsx register alternatives. (movsi_internal1): Don't disparage vector alternatives. (mov<mode>_internal): Likewise, excepting alternative that will be split. * config/rs6000/vsx.md (vsx_splat_<mode>_reg): Don't disparage we <- b alternative. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 5d5765d89b2..e7c63c263ae 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1729,6 +1729,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost #undef TARGET_MEMORY_MOVE_COST #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost +#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS +#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \ + rs6000_ira_change_pseudo_allocno_class #undef TARGET_CANNOT_COPY_INSN_P #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p #undef TARGET_RTX_COSTS @@ -34648,22 +34651,54 @@ rs6000_register_move_cost (machine_mode mode, reg_class_t from, reg_class_t to) { int ret; + reg_class_t rclass; if (TARGET_DEBUG_COST) dbg_cost_ctrl++; + /* If we have VSX, we can easily move between FPR or Altivec registers, + otherwise we can only easily move within classes. + Do this first so we give best-case answers for union classes + containing both gprs and vsx regs. */ + HARD_REG_SET to_vsx, from_vsx; + COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]); + AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]); + COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]); + AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]); + if (!hard_reg_set_empty_p (to_vsx) + && !hard_reg_set_empty_p (from_vsx) + && (TARGET_VSX + || hard_reg_set_intersect_p (to_vsx, from_vsx))) + { + int reg = FIRST_FPR_REGNO; + if (TARGET_VSX + || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO) + && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO))) + reg = FIRST_ALTIVEC_REGNO; + ret = 2 * hard_regno_nregs (reg, mode); + } + /* Moves from/to GENERAL_REGS. */ - if (reg_classes_intersect_p (to, GENERAL_REGS) - || reg_classes_intersect_p (from, GENERAL_REGS)) + else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS)) + || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS))) { - reg_class_t rclass = from; - - if (! reg_classes_intersect_p (to, GENERAL_REGS)) - rclass = to; - if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS) - ret = (rs6000_memory_move_cost (mode, rclass, false) - + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); + { + if (TARGET_DIRECT_MOVE) + { + if (rs6000_tune != PROCESSOR_POWER9) + ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode); + else + ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode); + /* SFmode requires a conversion when moving between gprs + and vsx. */ + if (mode == SFmode) + ret += 2; + } + else + ret = (rs6000_memory_move_cost (mode, rclass, false) + + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); + } /* It's more expensive to move CR_REGS than CR0_REGS because of the shift. */ @@ -34676,24 +34711,14 @@ rs6000_register_move_cost (machine_mode mode, || rs6000_tune == PROCESSOR_POWER7 || rs6000_tune == PROCESSOR_POWER8 || rs6000_tune == PROCESSOR_POWER9) - && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS)) - ret = 6 * hard_regno_nregs (0, mode); + && reg_class_subset_p (rclass, SPECIAL_REGS)) + ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode); else /* A move will cost one instruction per GPR moved. */ - ret = 2 * hard_regno_nregs (0, mode); + ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode); } - /* If we have VSX, we can easily move between FPR or Altivec registers. */ - else if (VECTOR_MEM_VSX_P (mode) - && reg_classes_intersect_p (to, VSX_REGS) - && reg_classes_intersect_p (from, VSX_REGS)) - ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode); - - /* Moving between two similar registers is just one instruction. */ - else if (reg_classes_intersect_p (to, from)) - ret = (FLOAT128_2REG_P (mode)) ? 4 : 2; - /* Everything else has to go through GENERAL_REGS. */ else ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to) @@ -34746,6 +34771,64 @@ rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass, return ret; } +/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. + + The register allocator chooses GEN_OR_VSX_REGS for the allocno + class if GENERAL_REGS and VSX_REGS cost is lower than the memory + cost. This happens a lot when TARGET_DIRECT_MOVE makes the register + move cost between GENERAL_REGS and VSX_REGS low. + + It might seem reasonable to use a union class. After all, if usage + of vsr is low and gpr high, it might make sense to spill gpr to vsr + rather than memory. However, in cases where register pressure of + both is high, like the cactus_adm spec test, allowing + GEN_OR_VSX_REGS as the allocno class results in bad decisions in + the first scheduling pass. This is partly due to an allocno of + GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure + class, which gives too high a pressure for GENERAL_REGS and too low + for VSX_REGS. So, force a choice of the subclass here. + + The best class is also the union if GENERAL_REGS and VSX_REGS have + the same cost. In that case we do use GEN_OR_VSX_REGS as the + allocno class, since trying to narrow down the class by regno mode + is prone to error. For example, SImode is allowed in VSX regs and + in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect) + it would be wrong to choose an allocno of GENERAL_REGS based on + SImode. */ + +static reg_class_t +rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED, + reg_class_t allocno_class, + reg_class_t best_class) +{ + switch (allocno_class) + { + case GEN_OR_VSX_REGS: + /* best_class must be a subset of allocno_class. */ + gcc_checking_assert (best_class == GEN_OR_VSX_REGS + || best_class == GEN_OR_FLOAT_REGS + || best_class == VSX_REGS + || best_class == ALTIVEC_REGS + || best_class == FLOAT_REGS + || best_class == GENERAL_REGS + || best_class == BASE_REGS); + /* Use best_class but choose wider classes when copying from the + wider class to best_class is cheap. This mimics IRA choice + of allocno class. */ + if (best_class == BASE_REGS) + return GENERAL_REGS; + if (TARGET_VSX + && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS)) + return VSX_REGS; + return best_class; + + default: + break; + } + + return allocno_class; +} + /* Returns a code for a target-specific builtin that implements reciprocal of the function, or NULL_TREE if not available. */ diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 14a9e199bc8..30a72dd5e55 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1141,6 +1141,7 @@ enum reg_class VRSAVE_REGS, VSCR_REGS, GEN_OR_FLOAT_REGS, + GEN_OR_VSX_REGS, LINK_REGS, CTR_REGS, LINK_OR_CTR_REGS, @@ -1169,6 +1170,7 @@ enum reg_class "VRSAVE_REGS", \ "VSCR_REGS", \ "GEN_OR_FLOAT_REGS", \ + "GEN_OR_VSX_REGS", \ "LINK_REGS", \ "CTR_REGS", \ "LINK_OR_CTR_REGS", \ @@ -1205,6 +1207,8 @@ enum reg_class { 0x00000000, 0x00000000, 0x00000000, 0x00002000 }, \ /* GEN_OR_FLOAT_REGS. */ \ { 0xffffffff, 0xffffffff, 0x00000000, 0x00004008 }, \ + /* GEN_OR_VSX_REGS. */ \ + { 0xffffffff, 0xffffffff, 0xffffffff, 0x00004008 }, \ /* LINK_REGS. */ \ { 0x00000000, 0x00000000, 0x00000000, 0x00000001 }, \ /* CTR_REGS. */ \ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 411d7f0d352..8da7aba4080 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6830,10 +6830,10 @@ (define_insn "movsi_low" ;; MF%1 MT%0 NOP (define_insn "*movsi_internal1" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r, r, r, ?*wI, ?*wH, - m, ?Z, ?Z, r, r, - r, ?*wIwH, ?*wJwK, ?*wJwK, ?*wu, - ?*wJwK, ?*wH, ?*wK, ?*wIwH, ?r, + "=r, r, r, wI, wH, + m, Z, Z, r, r, + r, wIwH, wJwK, wJwK, wu, + wJwK, wH, wK, wIwH, r, r, *h, *h") (match_operand:SI 1 "input_operand" @@ -7104,13 +7104,13 @@ (define_expand "mov<mode>" ;; MTVSRWZ MF%1 MT%1 NOP (define_insn "*mov<mode>_internal" [(set (match_operand:QHI 0 "nonimmediate_operand" - "=r, r, ?*wJwK, m, Z, r, - ?*wJwK, ?*wJwK, ?*wJwK, ?*wK, ?*wK, r, - ?*wJwK, r, *c*l, *h") + "=r, r, wJwK, m, Z, r, + wJwK, wJwK, wJwK, wK, ?wK, r, + wJwK, r, *c*l, *h") (match_operand:QHI 1 "input_operand" "r, m, Z, r, wJwK, i, - wJwK, O, wM, wB, wS, ?*wJwK, + wJwK, O, wM, wB, wS, wJwK, r, *h, r, 0"))] "gpc_reg_operand (operands[0], <MODE>mode) @@ -8671,8 +8671,8 @@ (define_insn "*movdi_internal32" [(set (match_operand:DI 0 "nonimmediate_operand" "=Y, r, r, m, ^d, ^d, r, wY, Z, ^wb, $wv, ^wi, - *wo, *wo, *wv, *wi, *wi, *wv, - *wv") + wo, wo, wv, wi, *i, wv, + wv") (match_operand:DI 1 "input_operand" "r, Y, r, ^d, m, ^d, @@ -8751,9 +8751,9 @@ (define_insn "*movdi_internal64" [(set (match_operand:DI 0 "nonimmediate_operand" "=YZ, r, r, r, r, r, m, ^d, ^d, wY, Z, $wb, - $wv, ^wi, *wo, *wo, *wv, *wi, - *wi, *wv, *wv, r, *h, *h, - ?*r, ?*wg, ?*r, ?*wj") + $wv, ^wi, wo, wo, wv, wi, + wi, wv, wv, r, *h, *h, + ?r, ?wg, ?r, ?wj") (match_operand:DI 1 "input_operand" "r, YZ, r, I, L, nF, diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 607c0cd33f2..80434d10247 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4106,7 +4106,7 @@ (define_expand "vsx_splat_<mode>" }) (define_insn "vsx_splat_<mode>_reg" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we") + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,we") (vec_duplicate:VSX_D (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))] "VECTOR_MEM_VSX_P (<MODE>mode)"
Hi Alan, On Wed, May 08, 2019 at 03:02:48PM +0930, Alan Modra wrote: > This is https://gcc.gnu.org/ml/gcc-patches/2019-03/msg01299.html with > the fixes Segher requested, plus a few more: > - delete PREFERRED_RELOAD_CLASS changes > - adjust for recent register renumbering > - use defines rather than hard coding register numbers Did you need to adjust for the renumbering _at all_ after that? (edit: Ah, the reg sets). > - flip altivec/float test when dealing with moves within vsx regs, > so that the altivec hard reg count is preferred over the fp hard reg > count when both reg types are possible. > - use 2 for power9 direct move cost, and remove more '?'s from insns. > - use reg_class_subset_p in the test for slow LR/CTR moves Super minor: > + if (TARGET_DIRECT_MOVE) > + { > + if (rs6000_tune != PROCESSOR_POWER9) > + ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode); > + else > + ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode); Please write that the other way around, positive conditions are easier to read? And newer stuff first is nice, too. Thanks for the patch! Please apply to trunk. And watch for fallout... It is kind of inevitable I'm afraid. But it's stage 1 now. Segher
diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h index 9fb36e41e7d..20c59f89c8f 100644 --- a/gcc/config/rs6000/darwin.h +++ b/gcc/config/rs6000/darwin.h @@ -346,7 +346,8 @@ extern int darwin_emit_branch_islands; && reg_class_subset_p (BASE_REGS, (CLASS))) \ ? BASE_REGS \ : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT \ - && (CLASS) == GEN_OR_FLOAT_REGS) \ + && ((CLASS) == GEN_OR_FLOAT_REGS \ + || (CLASS) == GEN_OR_VSX_REGS)) \ ? GENERAL_REGS \ : (CLASS)) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index cc8dc941537..fbc16c65de4 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1737,6 +1737,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost #undef TARGET_MEMORY_MOVE_COST #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost +#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS +#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \ + rs6000_ira_change_pseudo_allocno_class #undef TARGET_CANNOT_COPY_INSN_P #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p #undef TARGET_RTX_COSTS @@ -20236,7 +20239,8 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) return NO_REGS; } - if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS) + if (GET_MODE_CLASS (mode) == MODE_INT + && (rclass == GEN_OR_FLOAT_REGS || rclass == GEN_OR_VSX_REGS)) return GENERAL_REGS; return rclass; @@ -34966,22 +34970,56 @@ rs6000_register_move_cost (machine_mode mode, reg_class_t from, reg_class_t to) { int ret; + reg_class_t rclass; if (TARGET_DEBUG_COST) dbg_cost_ctrl++; + /* If we have VSX, we can easily move between FPR or Altivec registers, + otherwise we can only easily move within classes. + Do this first so we give best-case answers for union classes + containing both gprs and vsx regs. */ + HARD_REG_SET to_vsx, from_vsx; + COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]); + AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]); + COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]); + AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]); + if (!hard_reg_set_empty_p (to_vsx) + && !hard_reg_set_empty_p (from_vsx) + && (TARGET_VSX + || hard_reg_set_intersect_p (to_vsx, from_vsx))) + { + int reg = FIRST_ALTIVEC_REGNO; + if (TARGET_VSX + || (TEST_HARD_REG_BIT (to_vsx, FIRST_FPR_REGNO) + && TEST_HARD_REG_BIT (from_vsx, FIRST_FPR_REGNO))) + reg = FIRST_FPR_REGNO; + ret = 2 * hard_regno_nregs (reg, mode); + } + /* Moves from/to GENERAL_REGS. */ - if (reg_classes_intersect_p (to, GENERAL_REGS) - || reg_classes_intersect_p (from, GENERAL_REGS)) + else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS)) + || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS))) { - reg_class_t rclass = from; - - if (! reg_classes_intersect_p (to, GENERAL_REGS)) - rclass = to; - if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS) - ret = (rs6000_memory_move_cost (mode, rclass, false) - + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); + { + if (TARGET_DIRECT_MOVE + && (rs6000_tune == PROCESSOR_POWER8 + || rs6000_tune == PROCESSOR_POWER9)) + { + if (rs6000_tune == PROCESSOR_POWER8) + ret = 4 * hard_regno_nregs (0, mode); + else + ret = 3 * hard_regno_nregs (0, mode); + /* SFmode requires a conversion when moving between gprs + and vsx. */ + if (mode == SFmode) + ret += 2; + } + else + ret = (rs6000_memory_move_cost (mode, rclass, false) + + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); + } /* It's more expensive to move CR_REGS than CR0_REGS because of the shift. */ @@ -34994,7 +35032,8 @@ rs6000_register_move_cost (machine_mode mode, || rs6000_tune == PROCESSOR_POWER7 || rs6000_tune == PROCESSOR_POWER8 || rs6000_tune == PROCESSOR_POWER9) - && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS)) + && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS) + && !reg_classes_intersect_p (rclass, GENERAL_REGS)) ret = 6 * hard_regno_nregs (0, mode); else @@ -35002,16 +35041,6 @@ rs6000_register_move_cost (machine_mode mode, ret = 2 * hard_regno_nregs (0, mode); } - /* If we have VSX, we can easily move between FPR or Altivec registers. */ - else if (VECTOR_MEM_VSX_P (mode) - && reg_classes_intersect_p (to, VSX_REGS) - && reg_classes_intersect_p (from, VSX_REGS)) - ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode); - - /* Moving between two similar registers is just one instruction. */ - else if (reg_classes_intersect_p (to, from)) - ret = (FLOAT128_2REG_P (mode)) ? 4 : 2; - /* Everything else has to go through GENERAL_REGS. */ else ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to) @@ -35064,6 +35093,64 @@ rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass, return ret; } +/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. + + The register allocator chooses GEN_OR_VSX_REGS for the allocno + class if GENERAL_REGS and VSX_REGS cost is lower than the memory + cost. This happens a lot when TARGET_DIRECT_MOVE makes the register + move cost between GENERAL_REGS and VSX_REGS low. + + It might seem reasonable to use a union class. After all, if usage + of vsr is low and gpr high, it might make sense to spill gpr to vsr + rather than memory. However, in cases where register pressure of + both is high, like the cactus_adm spec test, allowing + GEN_OR_VSX_REGS as the allocno class results in bad decisions in + the first scheduling pass. This is partly due to an allocno of + GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure + class, which gives too high a pressure for GENERAL_REGS and too low + for VSX_REGS. So, force a choice of the subclass here. + + The best class is also the union if GENERAL_REGS and VSX_REGS have + the same cost. In that case we do use GEN_OR_VSX_REGS as the + allocno class, since trying to narrow down the class by regno mode + is prone to error. For example, SImode is allowed in VSX regs and + in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect) + it would be wrong to choose an allocno of GENERAL_REGS based on + SImode. */ + +static reg_class_t +rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED, + reg_class_t allocno_class, + reg_class_t best_class) +{ + switch (allocno_class) + { + default: + break; + + case GEN_OR_VSX_REGS: + /* best_class must be a subset of allocno_class. */ + gcc_checking_assert (best_class == GEN_OR_VSX_REGS + || best_class == GEN_OR_FLOAT_REGS + || best_class == VSX_REGS + || best_class == ALTIVEC_REGS + || best_class == FLOAT_REGS + || best_class == GENERAL_REGS + || best_class == BASE_REGS); + /* Use best_class but choose wider classes when copying from the + wider class to best_class is cheap. This mimics IRA choice + of allocno class. */ + if (best_class == BASE_REGS) + return GENERAL_REGS; + if (TARGET_VSX + && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS)) + return VSX_REGS; + return best_class; + } + + return allocno_class; +} + /* Returns a code for a target-specific builtin that implements reciprocal of the function, or NULL_TREE if not available. */ diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index ff9449c2d45..f720368cfb4 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1139,6 +1139,7 @@ enum reg_class VSCR_REGS, SPR_REGS, GEN_OR_FLOAT_REGS, + GEN_OR_VSX_REGS, LINK_REGS, CTR_REGS, LINK_OR_CTR_REGS, @@ -1168,6 +1169,7 @@ enum reg_class "VSCR_REGS", \ "SPR_REGS", \ "GEN_OR_FLOAT_REGS", \ + "GEN_OR_VSX_REGS", \ "LINK_REGS", \ "CTR_REGS", \ "LINK_OR_CTR_REGS", \ @@ -1206,6 +1208,8 @@ enum reg_class { 0x00000000, 0x00000000, 0x00000000, 0x00010000 }, \ /* GEN_OR_FLOAT_REGS. */ \ { 0xffffffff, 0xffffffff, 0x00000008, 0x00008000 }, \ + /* GEN_OR_VSX_REGS. */ \ + { 0xffffffff, 0xffffffff, 0xffffe008, 0x00009fff }, \ /* LINK_REGS. */ \ { 0x00000000, 0x00000000, 0x00000002, 0x00000000 }, \ /* CTR_REGS. */ \ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b8dd85905f3..c4fddbe6f35 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6832,10 +6832,10 @@ (define_insn "movsi_low" ;; MF%1 MT%0 NOP (define_insn "*movsi_internal1" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r, r, r, ?*wI, ?*wH, + "=r, r, r, ?wI, ?wH, m, ?Z, ?Z, r, r, - r, ?*wIwH, ?*wJwK, ?*wJwK, ?*wu, - ?*wJwK, ?*wH, ?*wK, ?*wIwH, ?r, + r, ?wIwH, ?wJwK, ?wJwK, ?wu, + ?wJwK, ?wH, ?wK, ?wIwH, ?r, r, *h, *h") (match_operand:SI 1 "input_operand" @@ -7106,13 +7106,13 @@ (define_expand "mov<mode>" ;; MTVSRWZ MF%1 MT%1 NOP (define_insn "*mov<mode>_internal" [(set (match_operand:QHI 0 "nonimmediate_operand" - "=r, r, ?*wJwK, m, Z, r, - ?*wJwK, ?*wJwK, ?*wJwK, ?*wK, ?*wK, r, - ?*wJwK, r, *c*l, *h") + "=r, r, ?wJwK, m, Z, r, + ?wJwK, ?wJwK, ?wJwK, ?wK, ?wK, r, + ?wJwK, r, *c*l, *h") (match_operand:QHI 1 "input_operand" "r, m, Z, r, wJwK, i, - wJwK, O, wM, wB, wS, ?*wJwK, + wJwK, O, wM, wB, wS, ?wJwK, r, *h, r, 0"))] "gpc_reg_operand (operands[0], <MODE>mode) @@ -8673,8 +8673,8 @@ (define_insn "*movdi_internal32" [(set (match_operand:DI 0 "nonimmediate_operand" "=Y, r, r, m, ^d, ^d, r, wY, Z, ^wb, $wv, ^wi, - *wo, *wo, *wv, *wi, *wi, *wv, - *wv") + wo, wo, wv, wi, *i, wv, + wv") (match_operand:DI 1 "input_operand" "r, Y, r, ^d, m, ^d, @@ -8753,9 +8753,9 @@ (define_insn "*movdi_internal64" [(set (match_operand:DI 0 "nonimmediate_operand" "=YZ, r, r, r, r, r, m, ^d, ^d, wY, Z, $wb, - $wv, ^wi, *wo, *wo, *wv, *wi, - *wi, *wv, *wv, r, *h, *h, - ?*r, ?*wg, ?*r, ?*wj") + $wv, ^wi, wo, wo, wv, wi, + wi, wv, wv, r, *h, *h, + ?r, ?wg, ?r, ?wj") (match_operand:DI 1 "input_operand" "r, YZ, r, I, L, nF, diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index f81d5fb1009..0d3d902cb86 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4106,7 +4106,7 @@ (define_expand "vsx_splat_<mode>" }) (define_insn "vsx_splat_<mode>_reg" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we") + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,we") (vec_duplicate:VSX_D (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))] "VECTOR_MEM_VSX_P (<MODE>mode)"