Message ID | VI1PR0801MB2127BA15FECEB9AB870CC07083B90@VI1PR0801MB2127.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | [ARM] Remove support for MULS | expand |
ping Remove various MULS/MLAS patterns which are enabled when optimizing for size. However the codesize gain from these patterns is so minimal that there is no point in keeping them. Bootstrap OK on armhf, regress passes. ChangeLog: 2019-09-03 Wilco Dijkstra <wdijkstr@arm.com> * config/arm/arm.md (mulsi3_compare0): Remove pattern. (mulsi3_compare0_v6): Likewise. (mulsi_compare0_scratch): Likewise. (mulsi_compare0_scratch_v6): Likewise. (mulsi3addsi_compare0): Likewise. (mulsi3addsi_compare0_v6): Likewise. (mulsi3addsi_compare0_scratch): Likewise. (mulsi3addsi_compare0_scratch_v6): Likewise. * config/arm/thumb2.md (thumb2_mulsi_short_compare0): Remove pattern. (thumb2_mulsi_short_compare0_scratch): Likewise. -- diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 738d42fd164f117f1dec1108a824d984ccd70d09..66dafdc47b7cfc37c131764e482d47bcaab90538 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -1618,60 +1618,6 @@ (define_insn "*arm_mulsi3_v6" (set_attr "predicable_short_it" "yes,yes,no")] ) -(define_insn "*mulsi3_compare0" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV (mult:SI - (match_operand:SI 2 "s_register_operand" "r,r") - (match_operand:SI 1 "s_register_operand" "%0,r")) - (const_int 0))) - (set (match_operand:SI 0 "s_register_operand" "=&r,&r") - (mult:SI (match_dup 2) (match_dup 1)))] - "TARGET_ARM && !arm_arch6" - "muls%?\\t%0, %2, %1" - [(set_attr "conds" "set") - (set_attr "type" "muls")] -) - -(define_insn "*mulsi3_compare0_v6" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV (mult:SI - (match_operand:SI 2 "s_register_operand" "r") - (match_operand:SI 1 "s_register_operand" "r")) - (const_int 0))) - (set (match_operand:SI 0 "s_register_operand" "=r") - (mult:SI (match_dup 2) (match_dup 1)))] - "TARGET_ARM && arm_arch6 && optimize_size" - "muls%?\\t%0, %2, %1" - [(set_attr "conds" "set") - (set_attr "type" "muls")] -) - -(define_insn "*mulsi_compare0_scratch" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV (mult:SI - (match_operand:SI 2 "s_register_operand" "r,r") - (match_operand:SI 1 "s_register_operand" "%0,r")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=&r,&r"))] - "TARGET_ARM && !arm_arch6" - "muls%?\\t%0, %2, %1" - [(set_attr "conds" "set") - (set_attr "type" "muls")] -) - -(define_insn "*mulsi_compare0_scratch_v6" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV (mult:SI - (match_operand:SI 2 "s_register_operand" "r") - (match_operand:SI 1 "s_register_operand" "r")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=r"))] - "TARGET_ARM && arm_arch6 && optimize_size" - "muls%?\\t%0, %2, %1" - [(set_attr "conds" "set") - (set_attr "type" "muls")] -) - ;; Unnamed templates to match MLA instruction. (define_insn "*mulsi3addsi" @@ -1698,70 +1644,6 @@ (define_insn "*mulsi3addsi_v6" (set_attr "predicable" "yes")] ) -(define_insn "*mulsi3addsi_compare0" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (plus:SI (mult:SI - (match_operand:SI 2 "s_register_operand" "r,r,r,r") - (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) - (match_operand:SI 3 "s_register_operand" "r,r,0,0")) - (const_int 0))) - (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r") - (plus:SI (mult:SI (match_dup 2) (match_dup 1)) - (match_dup 3)))] - "TARGET_ARM && arm_arch6" - "mlas%?\\t%0, %2, %1, %3" - [(set_attr "conds" "set") - (set_attr "type" "mlas")] -) - -(define_insn "*mulsi3addsi_compare0_v6" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (plus:SI (mult:SI - (match_operand:SI 2 "s_register_operand" "r") - (match_operand:SI 1 "s_register_operand" "r")) - (match_operand:SI 3 "s_register_operand" "r")) - (const_int 0))) - (set (match_operand:SI 0 "s_register_operand" "=r") - (plus:SI (mult:SI (match_dup 2) (match_dup 1)) - (match_dup 3)))] - "TARGET_ARM && arm_arch6 && optimize_size" - "mlas%?\\t%0, %2, %1, %3" - [(set_attr "conds" "set") - (set_attr "type" "mlas")] -) - -(define_insn "*mulsi3addsi_compare0_scratch" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (plus:SI (mult:SI - (match_operand:SI 2 "s_register_operand" "r,r,r,r") - (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) - (match_operand:SI 3 "s_register_operand" "?r,r,0,0")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))] - "TARGET_ARM && !arm_arch6" - "mlas%?\\t%0, %2, %1, %3" - [(set_attr "conds" "set") - (set_attr "type" "mlas")] -) - -(define_insn "*mulsi3addsi_compare0_scratch_v6" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (plus:SI (mult:SI - (match_operand:SI 2 "s_register_operand" "r") - (match_operand:SI 1 "s_register_operand" "r")) - (match_operand:SI 3 "s_register_operand" "r")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=r"))] - "TARGET_ARM && arm_arch6 && optimize_size" - "mlas%?\\t%0, %2, %1, %3" - [(set_attr "conds" "set") - (set_attr "type" "mlas")] -) - (define_insn "*mulsi3subsi" [(set (match_operand:SI 0 "s_register_operand" "=r") (minus:SI diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 6ccc875e2b4e7b8ce256e52da966dfe220c6f5d6..8e26689b66263e7304a0da6163ceccfb4483d3e7 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -1381,31 +1381,6 @@ (define_insn "*thumb2_mulsi_short" (set_attr "length" "2") (set_attr "type" "muls")]) -(define_insn "*thumb2_mulsi_short_compare0" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (mult:SI (match_operand:SI 1 "register_operand" "%0") - (match_operand:SI 2 "register_operand" "l")) - (const_int 0))) - (set (match_operand:SI 0 "register_operand" "=l") - (mult:SI (match_dup 1) (match_dup 2)))] - "TARGET_THUMB2 && optimize_size" - "muls\\t%0, %2, %0" - [(set_attr "length" "2") - (set_attr "type" "muls")]) - -(define_insn "*thumb2_mulsi_short_compare0_scratch" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (mult:SI (match_operand:SI 1 "register_operand" "%0") - (match_operand:SI 2 "register_operand" "l")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=l"))] - "TARGET_THUMB2 && optimize_size" - "muls\\t%0, %2, %0" - [(set_attr "length" "2") - (set_attr "type" "muls")]) - (define_insn "*thumb2_cbz" [(set (pc) (if_then_else (eq (match_operand:SI 0 "s_register_operand" "l,?r")
Hi Wilco, On 9/9/19 6:07 PM, Wilco Dijkstra wrote: > ping > > > Remove various MULS/MLAS patterns which are enabled when optimizing for > size. However the codesize gain from these patterns is so minimal that > there is no point in keeping them. > I disagree. If they still trigger and generate better code than without we should keep them. What kind of code is *common* varies greatly from user to user. Thanks, Kyrill > Bootstrap OK on armhf, regress passes. > > ChangeLog: > 2019-09-03 Wilco Dijkstra <wdijkstr@arm.com> > > * config/arm/arm.md (mulsi3_compare0): Remove pattern. > (mulsi3_compare0_v6): Likewise. > (mulsi_compare0_scratch): Likewise. > (mulsi_compare0_scratch_v6): Likewise. > (mulsi3addsi_compare0): Likewise. > (mulsi3addsi_compare0_v6): Likewise. > (mulsi3addsi_compare0_scratch): Likewise. > (mulsi3addsi_compare0_scratch_v6): Likewise. > * config/arm/thumb2.md (thumb2_mulsi_short_compare0): Remove > pattern. > (thumb2_mulsi_short_compare0_scratch): Likewise. > > -- > diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md > index > 738d42fd164f117f1dec1108a824d984ccd70d09..66dafdc47b7cfc37c131764e482d47bcaab90538 > 100644 > --- a/gcc/config/arm/arm.md > +++ b/gcc/config/arm/arm.md > @@ -1618,60 +1618,6 @@ (define_insn "*arm_mulsi3_v6" > (set_attr "predicable_short_it" "yes,yes,no")] > ) > > -(define_insn "*mulsi3_compare0" > - [(set (reg:CC_NOOV CC_REGNUM) > - (compare:CC_NOOV (mult:SI > - (match_operand:SI 2 "s_register_operand" "r,r") > - (match_operand:SI 1 "s_register_operand" > "%0,r")) > - (const_int 0))) > - (set (match_operand:SI 0 "s_register_operand" "=&r,&r") > - (mult:SI (match_dup 2) (match_dup 1)))] > - "TARGET_ARM && !arm_arch6" > - "muls%?\\t%0, %2, %1" > - [(set_attr "conds" "set") > - (set_attr "type" "muls")] > -) > - > -(define_insn "*mulsi3_compare0_v6" > - [(set (reg:CC_NOOV CC_REGNUM) > - (compare:CC_NOOV (mult:SI > - (match_operand:SI 2 "s_register_operand" "r") > - (match_operand:SI 1 "s_register_operand" "r")) > - (const_int 0))) > - (set (match_operand:SI 0 "s_register_operand" "=r") > - (mult:SI (match_dup 2) (match_dup 1)))] > - "TARGET_ARM && arm_arch6 && optimize_size" > - "muls%?\\t%0, %2, %1" > - [(set_attr "conds" "set") > - (set_attr "type" "muls")] > -) > - > -(define_insn "*mulsi_compare0_scratch" > - [(set (reg:CC_NOOV CC_REGNUM) > - (compare:CC_NOOV (mult:SI > - (match_operand:SI 2 "s_register_operand" "r,r") > - (match_operand:SI 1 "s_register_operand" > "%0,r")) > - (const_int 0))) > - (clobber (match_scratch:SI 0 "=&r,&r"))] > - "TARGET_ARM && !arm_arch6" > - "muls%?\\t%0, %2, %1" > - [(set_attr "conds" "set") > - (set_attr "type" "muls")] > -) > - > -(define_insn "*mulsi_compare0_scratch_v6" > - [(set (reg:CC_NOOV CC_REGNUM) > - (compare:CC_NOOV (mult:SI > - (match_operand:SI 2 "s_register_operand" "r") > - (match_operand:SI 1 "s_register_operand" "r")) > - (const_int 0))) > - (clobber (match_scratch:SI 0 "=r"))] > - "TARGET_ARM && arm_arch6 && optimize_size" > - "muls%?\\t%0, %2, %1" > - [(set_attr "conds" "set") > - (set_attr "type" "muls")] > -) > - > ;; Unnamed templates to match MLA instruction. > > (define_insn "*mulsi3addsi" > @@ -1698,70 +1644,6 @@ (define_insn "*mulsi3addsi_v6" > (set_attr "predicable" "yes")] > ) > > -(define_insn "*mulsi3addsi_compare0" > - [(set (reg:CC_NOOV CC_REGNUM) > - (compare:CC_NOOV > - (plus:SI (mult:SI > - (match_operand:SI 2 "s_register_operand" "r,r,r,r") > - (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) > - (match_operand:SI 3 "s_register_operand" "r,r,0,0")) > - (const_int 0))) > - (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r") > - (plus:SI (mult:SI (match_dup 2) (match_dup 1)) > - (match_dup 3)))] > - "TARGET_ARM && arm_arch6" > - "mlas%?\\t%0, %2, %1, %3" > - [(set_attr "conds" "set") > - (set_attr "type" "mlas")] > -) > - > -(define_insn "*mulsi3addsi_compare0_v6" > - [(set (reg:CC_NOOV CC_REGNUM) > - (compare:CC_NOOV > - (plus:SI (mult:SI > - (match_operand:SI 2 "s_register_operand" "r") > - (match_operand:SI 1 "s_register_operand" "r")) > - (match_operand:SI 3 "s_register_operand" "r")) > - (const_int 0))) > - (set (match_operand:SI 0 "s_register_operand" "=r") > - (plus:SI (mult:SI (match_dup 2) (match_dup 1)) > - (match_dup 3)))] > - "TARGET_ARM && arm_arch6 && optimize_size" > - "mlas%?\\t%0, %2, %1, %3" > - [(set_attr "conds" "set") > - (set_attr "type" "mlas")] > -) > - > -(define_insn "*mulsi3addsi_compare0_scratch" > - [(set (reg:CC_NOOV CC_REGNUM) > - (compare:CC_NOOV > - (plus:SI (mult:SI > - (match_operand:SI 2 "s_register_operand" "r,r,r,r") > - (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) > - (match_operand:SI 3 "s_register_operand" "?r,r,0,0")) > - (const_int 0))) > - (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))] > - "TARGET_ARM && !arm_arch6" > - "mlas%?\\t%0, %2, %1, %3" > - [(set_attr "conds" "set") > - (set_attr "type" "mlas")] > -) > - > -(define_insn "*mulsi3addsi_compare0_scratch_v6" > - [(set (reg:CC_NOOV CC_REGNUM) > - (compare:CC_NOOV > - (plus:SI (mult:SI > - (match_operand:SI 2 "s_register_operand" "r") > - (match_operand:SI 1 "s_register_operand" "r")) > - (match_operand:SI 3 "s_register_operand" "r")) > - (const_int 0))) > - (clobber (match_scratch:SI 0 "=r"))] > - "TARGET_ARM && arm_arch6 && optimize_size" > - "mlas%?\\t%0, %2, %1, %3" > - [(set_attr "conds" "set") > - (set_attr "type" "mlas")] > -) > - > (define_insn "*mulsi3subsi" > [(set (match_operand:SI 0 "s_register_operand" "=r") > (minus:SI > diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md > index > 6ccc875e2b4e7b8ce256e52da966dfe220c6f5d6..8e26689b66263e7304a0da6163ceccfb4483d3e7 > 100644 > --- a/gcc/config/arm/thumb2.md > +++ b/gcc/config/arm/thumb2.md > @@ -1381,31 +1381,6 @@ (define_insn "*thumb2_mulsi_short" > (set_attr "length" "2") > (set_attr "type" "muls")]) > > -(define_insn "*thumb2_mulsi_short_compare0" > - [(set (reg:CC_NOOV CC_REGNUM) > - (compare:CC_NOOV > - (mult:SI (match_operand:SI 1 "register_operand" "%0") > - (match_operand:SI 2 "register_operand" "l")) > - (const_int 0))) > - (set (match_operand:SI 0 "register_operand" "=l") > - (mult:SI (match_dup 1) (match_dup 2)))] > - "TARGET_THUMB2 && optimize_size" > - "muls\\t%0, %2, %0" > - [(set_attr "length" "2") > - (set_attr "type" "muls")]) > - > -(define_insn "*thumb2_mulsi_short_compare0_scratch" > - [(set (reg:CC_NOOV CC_REGNUM) > - (compare:CC_NOOV > - (mult:SI (match_operand:SI 1 "register_operand" "%0") > - (match_operand:SI 2 "register_operand" "l")) > - (const_int 0))) > - (clobber (match_scratch:SI 0 "=l"))] > - "TARGET_THUMB2 && optimize_size" > - "muls\\t%0, %2, %0" > - [(set_attr "length" "2") > - (set_attr "type" "muls")]) > - > (define_insn "*thumb2_cbz" > [(set (pc) (if_then_else > (eq (match_operand:SI 0 "s_register_operand" "l,?r")
On 18/09/2019 17:31, Kyrill Tkachov wrote: > Hi Wilco, > > On 9/9/19 6:07 PM, Wilco Dijkstra wrote: >> ping >> >> >> Remove various MULS/MLAS patterns which are enabled when optimizing for >> size. However the codesize gain from these patterns is so minimal that >> there is no point in keeping them. >> > I disagree. If they still trigger and generate better code than without > we should keep them. > > What kind of code is *common* varies greatly from user to user. Also, the main reason for restricting their use was that in the 'olden days', when we had multi-cycle implementations of the multiply instructions with short-circuit fast termination when the result was completed, the flag setting variants would never short-circuit. These days we have fixed cycle counts for multiply instructions, so this is no-longer a penalty. In the thumb2 case in particular we can often reduce mul-cmp (6 bytes) to muls (2 bytes), that's a 66% saving on this sequence and definitely worth exploiting when we can, even if it's not all that common. In fact, give the latest architectural implementations, I think we should look again at re-enabling these when doing normal optimization as well. R. > > Thanks, > > Kyrill > > >> Bootstrap OK on armhf, regress passes. >> >> ChangeLog: >> 2019-09-03 Wilco Dijkstra <wdijkstr@arm.com> >> >> * config/arm/arm.md (mulsi3_compare0): Remove pattern. >> (mulsi3_compare0_v6): Likewise. >> (mulsi_compare0_scratch): Likewise. >> (mulsi_compare0_scratch_v6): Likewise. >> (mulsi3addsi_compare0): Likewise. >> (mulsi3addsi_compare0_v6): Likewise. >> (mulsi3addsi_compare0_scratch): Likewise. >> (mulsi3addsi_compare0_scratch_v6): Likewise. >> * config/arm/thumb2.md (thumb2_mulsi_short_compare0): Remove >> pattern. >> (thumb2_mulsi_short_compare0_scratch): Likewise. >> >> -- >> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md >> index >> 738d42fd164f117f1dec1108a824d984ccd70d09..66dafdc47b7cfc37c131764e482d47bcaab90538 >> 100644 >> --- a/gcc/config/arm/arm.md >> +++ b/gcc/config/arm/arm.md >> @@ -1618,60 +1618,6 @@ (define_insn "*arm_mulsi3_v6" >> (set_attr "predicable_short_it" "yes,yes,no")] >> ) >> >> -(define_insn "*mulsi3_compare0" >> - [(set (reg:CC_NOOV CC_REGNUM) >> - (compare:CC_NOOV (mult:SI >> - (match_operand:SI 2 "s_register_operand" >> "r,r") >> - (match_operand:SI 1 "s_register_operand" >> "%0,r")) >> - (const_int 0))) >> - (set (match_operand:SI 0 "s_register_operand" "=&r,&r") >> - (mult:SI (match_dup 2) (match_dup 1)))] >> - "TARGET_ARM && !arm_arch6" >> - "muls%?\\t%0, %2, %1" >> - [(set_attr "conds" "set") >> - (set_attr "type" "muls")] >> -) >> - >> -(define_insn "*mulsi3_compare0_v6" >> - [(set (reg:CC_NOOV CC_REGNUM) >> - (compare:CC_NOOV (mult:SI >> - (match_operand:SI 2 "s_register_operand" "r") >> - (match_operand:SI 1 "s_register_operand" "r")) >> - (const_int 0))) >> - (set (match_operand:SI 0 "s_register_operand" "=r") >> - (mult:SI (match_dup 2) (match_dup 1)))] >> - "TARGET_ARM && arm_arch6 && optimize_size" >> - "muls%?\\t%0, %2, %1" >> - [(set_attr "conds" "set") >> - (set_attr "type" "muls")] >> -) >> - >> -(define_insn "*mulsi_compare0_scratch" >> - [(set (reg:CC_NOOV CC_REGNUM) >> - (compare:CC_NOOV (mult:SI >> - (match_operand:SI 2 "s_register_operand" >> "r,r") >> - (match_operand:SI 1 "s_register_operand" >> "%0,r")) >> - (const_int 0))) >> - (clobber (match_scratch:SI 0 "=&r,&r"))] >> - "TARGET_ARM && !arm_arch6" >> - "muls%?\\t%0, %2, %1" >> - [(set_attr "conds" "set") >> - (set_attr "type" "muls")] >> -) >> - >> -(define_insn "*mulsi_compare0_scratch_v6" >> - [(set (reg:CC_NOOV CC_REGNUM) >> - (compare:CC_NOOV (mult:SI >> - (match_operand:SI 2 "s_register_operand" "r") >> - (match_operand:SI 1 "s_register_operand" "r")) >> - (const_int 0))) >> - (clobber (match_scratch:SI 0 "=r"))] >> - "TARGET_ARM && arm_arch6 && optimize_size" >> - "muls%?\\t%0, %2, %1" >> - [(set_attr "conds" "set") >> - (set_attr "type" "muls")] >> -) >> - >> ;; Unnamed templates to match MLA instruction. >> >> (define_insn "*mulsi3addsi" >> @@ -1698,70 +1644,6 @@ (define_insn "*mulsi3addsi_v6" >> (set_attr "predicable" "yes")] >> ) >> >> -(define_insn "*mulsi3addsi_compare0" >> - [(set (reg:CC_NOOV CC_REGNUM) >> - (compare:CC_NOOV >> - (plus:SI (mult:SI >> - (match_operand:SI 2 "s_register_operand" "r,r,r,r") >> - (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) >> - (match_operand:SI 3 "s_register_operand" "r,r,0,0")) >> - (const_int 0))) >> - (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r") >> - (plus:SI (mult:SI (match_dup 2) (match_dup 1)) >> - (match_dup 3)))] >> - "TARGET_ARM && arm_arch6" >> - "mlas%?\\t%0, %2, %1, %3" >> - [(set_attr "conds" "set") >> - (set_attr "type" "mlas")] >> -) >> - >> -(define_insn "*mulsi3addsi_compare0_v6" >> - [(set (reg:CC_NOOV CC_REGNUM) >> - (compare:CC_NOOV >> - (plus:SI (mult:SI >> - (match_operand:SI 2 "s_register_operand" "r") >> - (match_operand:SI 1 "s_register_operand" "r")) >> - (match_operand:SI 3 "s_register_operand" "r")) >> - (const_int 0))) >> - (set (match_operand:SI 0 "s_register_operand" "=r") >> - (plus:SI (mult:SI (match_dup 2) (match_dup 1)) >> - (match_dup 3)))] >> - "TARGET_ARM && arm_arch6 && optimize_size" >> - "mlas%?\\t%0, %2, %1, %3" >> - [(set_attr "conds" "set") >> - (set_attr "type" "mlas")] >> -) >> - >> -(define_insn "*mulsi3addsi_compare0_scratch" >> - [(set (reg:CC_NOOV CC_REGNUM) >> - (compare:CC_NOOV >> - (plus:SI (mult:SI >> - (match_operand:SI 2 "s_register_operand" "r,r,r,r") >> - (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) >> - (match_operand:SI 3 "s_register_operand" "?r,r,0,0")) >> - (const_int 0))) >> - (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))] >> - "TARGET_ARM && !arm_arch6" >> - "mlas%?\\t%0, %2, %1, %3" >> - [(set_attr "conds" "set") >> - (set_attr "type" "mlas")] >> -) >> - >> -(define_insn "*mulsi3addsi_compare0_scratch_v6" >> - [(set (reg:CC_NOOV CC_REGNUM) >> - (compare:CC_NOOV >> - (plus:SI (mult:SI >> - (match_operand:SI 2 "s_register_operand" "r") >> - (match_operand:SI 1 "s_register_operand" "r")) >> - (match_operand:SI 3 "s_register_operand" "r")) >> - (const_int 0))) >> - (clobber (match_scratch:SI 0 "=r"))] >> - "TARGET_ARM && arm_arch6 && optimize_size" >> - "mlas%?\\t%0, %2, %1, %3" >> - [(set_attr "conds" "set") >> - (set_attr "type" "mlas")] >> -) >> - >> (define_insn "*mulsi3subsi" >> [(set (match_operand:SI 0 "s_register_operand" "=r") >> (minus:SI >> diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md >> index >> 6ccc875e2b4e7b8ce256e52da966dfe220c6f5d6..8e26689b66263e7304a0da6163ceccfb4483d3e7 >> 100644 >> --- a/gcc/config/arm/thumb2.md >> +++ b/gcc/config/arm/thumb2.md >> @@ -1381,31 +1381,6 @@ (define_insn "*thumb2_mulsi_short" >> (set_attr "length" "2") >> (set_attr "type" "muls")]) >> >> -(define_insn "*thumb2_mulsi_short_compare0" >> - [(set (reg:CC_NOOV CC_REGNUM) >> - (compare:CC_NOOV >> - (mult:SI (match_operand:SI 1 "register_operand" "%0") >> - (match_operand:SI 2 "register_operand" "l")) >> - (const_int 0))) >> - (set (match_operand:SI 0 "register_operand" "=l") >> - (mult:SI (match_dup 1) (match_dup 2)))] >> - "TARGET_THUMB2 && optimize_size" >> - "muls\\t%0, %2, %0" >> - [(set_attr "length" "2") >> - (set_attr "type" "muls")]) >> - >> -(define_insn "*thumb2_mulsi_short_compare0_scratch" >> - [(set (reg:CC_NOOV CC_REGNUM) >> - (compare:CC_NOOV >> - (mult:SI (match_operand:SI 1 "register_operand" "%0") >> - (match_operand:SI 2 "register_operand" "l")) >> - (const_int 0))) >> - (clobber (match_scratch:SI 0 "=l"))] >> - "TARGET_THUMB2 && optimize_size" >> - "muls\\t%0, %2, %0" >> - [(set_attr "length" "2") >> - (set_attr "type" "muls")]) >> - >> (define_insn "*thumb2_cbz" >> [(set (pc) (if_then_else >> (eq (match_operand:SI 0 "s_register_operand" "l,?r")
Hi Richard, Kyrill, >> I disagree. If they still trigger and generate better code than without >> we should keep them. > >> What kind of code is *common* varies greatly from user to user. Not really - doing a multiply and checking whether the result is zero is exceedingly rare. I found only 3 cases out of 7300 mul/mla in all of SPEC2006... Overall codesize effect with -Os: 28 bytes or 0.00045%. So we really should not even consider wasting any more time on maintaining such useless patterns. > Also, the main reason for restricting their use was that in the 'olden > days', when we had multi-cycle implementations of the multiply > instructions with short-circuit fast termination when the result was > completed, the flag setting variants would never short-circuit. That only applied to conditional multiplies IIRC, some implementations would not early-terminate if the condition failed. Today there are serious penalties for conditional multiplies - but that's something to address in a different patch. > These days we have fixed cycle counts for multiply instructions, so this > is no-longer a penalty. No, there is a large overhead on modern cores when you set the flags, and there are other penalties due to the extra micro-ops. > In the thumb2 case in particular we can often > reduce mul-cmp (6 bytes) to muls (2 bytes), that's a 66% saving on this > sequence and definitely worth exploiting when we can, even if it's not > all that common. Using muls+cbz is equally small. With my patch we generate this with -Os: void g(void); int f(int x) { if (x * x != 0) g(); } f: muls r0, r0, r0 push {r3, lr} cbz r0, .L9 bl g .L9: pop {r3, pc} Wilco
Any further comments? Note GCC doesn't support S/UMULLS either since it is equally useless. It's no surprise that Thumb-2 removed support for flag-setting 64-bit multiplies, while AArch64 didn't add flag-setting multiplies. So there is no argument that these instructions are in any way useful to compilers. Wilco Hi Richard, Kyrill, >> I disagree. If they still trigger and generate better code than without >> we should keep them. > >> What kind of code is *common* varies greatly from user to user. Not really - doing a multiply and checking whether the result is zero is exceedingly rare. I found only 3 cases out of 7300 mul/mla in all of SPEC2006... Overall codesize effect with -Os: 28 bytes or 0.00045%. So we really should not even consider wasting any more time on maintaining such useless patterns. > Also, the main reason for restricting their use was that in the 'olden > days', when we had multi-cycle implementations of the multiply > instructions with short-circuit fast termination when the result was > completed, the flag setting variants would never short-circuit. That only applied to conditional multiplies IIRC, some implementations would not early-terminate if the condition failed. Today there are serious penalties for conditional multiplies - but that's something to address in a different patch. > These days we have fixed cycle counts for multiply instructions, so this > is no-longer a penalty. No, there is a large overhead on modern cores when you set the flags, and there are other penalties due to the extra micro-ops. > In the thumb2 case in particular we can often > reduce mul-cmp (6 bytes) to muls (2 bytes), that's a 66% saving on this > sequence and definitely worth exploiting when we can, even if it's not > all that common. Using muls+cbz is equally small. With my patch we generate this with -Os: void g(void); int f(int x) { if (x * x != 0) g(); } f: muls r0, r0, r0 push {r3, lr} cbz r0, .L9 bl g .L9: pop {r3, pc} Wilco
Any further comments? Note GCC doesn't support S/UMULLS either since it is equally useless. It's no surprise that Thumb-2 removed support for flag-setting 64-bit multiplies, while AArch64 didn't add flag-setting multiplies. So there is no argument that these instructions are in any way useful to compilers. Wilco Hi Richard, Kyrill, >> I disagree. If they still trigger and generate better code than without >> we should keep them. > >> What kind of code is *common* varies greatly from user to user. Not really - doing a multiply and checking whether the result is zero is exceedingly rare. I found only 3 cases out of 7300 mul/mla in all of SPEC2006... Overall codesize effect with -Os: 28 bytes or 0.00045%. So we really should not even consider wasting any more time on maintaining such useless patterns. > Also, the main reason for restricting their use was that in the 'olden > days', when we had multi-cycle implementations of the multiply > instructions with short-circuit fast termination when the result was > completed, the flag setting variants would never short-circuit. That only applied to conditional multiplies IIRC, some implementations would not early-terminate if the condition failed. Today there are serious penalties for conditional multiplies - but that's something to address in a different patch. > These days we have fixed cycle counts for multiply instructions, so this > is no-longer a penalty. No, there is a large overhead on modern cores when you set the flags, and there are other penalties due to the extra micro-ops. > In the thumb2 case in particular we can often > reduce mul-cmp (6 bytes) to muls (2 bytes), that's a 66% saving on this > sequence and definitely worth exploiting when we can, even if it's not > all that common. Using muls+cbz is equally small. With my patch we generate this with -Os: void g(void); int f(int x) { if (x * x != 0) g(); } f: muls r0, r0, r0 push {r3, lr} cbz r0, .L9 bl g .L9: pop {r3, pc} Wilco
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 738d42fd164f117f1dec1108a824d984ccd70d09..66dafdc47b7cfc37c131764e482d47bcaab90538 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -1618,60 +1618,6 @@ (define_insn "*arm_mulsi3_v6" (set_attr "predicable_short_it" "yes,yes,no")] ) -(define_insn "*mulsi3_compare0" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV (mult:SI - (match_operand:SI 2 "s_register_operand" "r,r") - (match_operand:SI 1 "s_register_operand" "%0,r")) - (const_int 0))) - (set (match_operand:SI 0 "s_register_operand" "=&r,&r") - (mult:SI (match_dup 2) (match_dup 1)))] - "TARGET_ARM && !arm_arch6" - "muls%?\\t%0, %2, %1" - [(set_attr "conds" "set") - (set_attr "type" "muls")] -) - -(define_insn "*mulsi3_compare0_v6" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV (mult:SI - (match_operand:SI 2 "s_register_operand" "r") - (match_operand:SI 1 "s_register_operand" "r")) - (const_int 0))) - (set (match_operand:SI 0 "s_register_operand" "=r") - (mult:SI (match_dup 2) (match_dup 1)))] - "TARGET_ARM && arm_arch6 && optimize_size" - "muls%?\\t%0, %2, %1" - [(set_attr "conds" "set") - (set_attr "type" "muls")] -) - -(define_insn "*mulsi_compare0_scratch" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV (mult:SI - (match_operand:SI 2 "s_register_operand" "r,r") - (match_operand:SI 1 "s_register_operand" "%0,r")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=&r,&r"))] - "TARGET_ARM && !arm_arch6" - "muls%?\\t%0, %2, %1" - [(set_attr "conds" "set") - (set_attr "type" "muls")] -) - -(define_insn "*mulsi_compare0_scratch_v6" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV (mult:SI - (match_operand:SI 2 "s_register_operand" "r") - (match_operand:SI 1 "s_register_operand" "r")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=r"))] - "TARGET_ARM && arm_arch6 && optimize_size" - "muls%?\\t%0, %2, %1" - [(set_attr "conds" "set") - (set_attr "type" "muls")] -) - ;; Unnamed templates to match MLA instruction. (define_insn "*mulsi3addsi" @@ -1698,70 +1644,6 @@ (define_insn "*mulsi3addsi_v6" (set_attr "predicable" "yes")] ) -(define_insn "*mulsi3addsi_compare0" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (plus:SI (mult:SI - (match_operand:SI 2 "s_register_operand" "r,r,r,r") - (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) - (match_operand:SI 3 "s_register_operand" "r,r,0,0")) - (const_int 0))) - (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r") - (plus:SI (mult:SI (match_dup 2) (match_dup 1)) - (match_dup 3)))] - "TARGET_ARM && arm_arch6" - "mlas%?\\t%0, %2, %1, %3" - [(set_attr "conds" "set") - (set_attr "type" "mlas")] -) - -(define_insn "*mulsi3addsi_compare0_v6" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (plus:SI (mult:SI - (match_operand:SI 2 "s_register_operand" "r") - (match_operand:SI 1 "s_register_operand" "r")) - (match_operand:SI 3 "s_register_operand" "r")) - (const_int 0))) - (set (match_operand:SI 0 "s_register_operand" "=r") - (plus:SI (mult:SI (match_dup 2) (match_dup 1)) - (match_dup 3)))] - "TARGET_ARM && arm_arch6 && optimize_size" - "mlas%?\\t%0, %2, %1, %3" - [(set_attr "conds" "set") - (set_attr "type" "mlas")] -) - -(define_insn "*mulsi3addsi_compare0_scratch" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (plus:SI (mult:SI - (match_operand:SI 2 "s_register_operand" "r,r,r,r") - (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) - (match_operand:SI 3 "s_register_operand" "?r,r,0,0")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))] - "TARGET_ARM && !arm_arch6" - "mlas%?\\t%0, %2, %1, %3" - [(set_attr "conds" "set") - (set_attr "type" "mlas")] -) - -(define_insn "*mulsi3addsi_compare0_scratch_v6" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (plus:SI (mult:SI - (match_operand:SI 2 "s_register_operand" "r") - (match_operand:SI 1 "s_register_operand" "r")) - (match_operand:SI 3 "s_register_operand" "r")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=r"))] - "TARGET_ARM && arm_arch6 && optimize_size" - "mlas%?\\t%0, %2, %1, %3" - [(set_attr "conds" "set") - (set_attr "type" "mlas")] -) - (define_insn "*mulsi3subsi" [(set (match_operand:SI 0 "s_register_operand" "=r") (minus:SI diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 6ccc875e2b4e7b8ce256e52da966dfe220c6f5d6..8e26689b66263e7304a0da6163ceccfb4483d3e7 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -1381,31 +1381,6 @@ (define_insn "*thumb2_mulsi_short" (set_attr "length" "2") (set_attr "type" "muls")]) -(define_insn "*thumb2_mulsi_short_compare0" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (mult:SI (match_operand:SI 1 "register_operand" "%0") - (match_operand:SI 2 "register_operand" "l")) - (const_int 0))) - (set (match_operand:SI 0 "register_operand" "=l") - (mult:SI (match_dup 1) (match_dup 2)))] - "TARGET_THUMB2 && optimize_size" - "muls\\t%0, %2, %0" - [(set_attr "length" "2") - (set_attr "type" "muls")]) - -(define_insn "*thumb2_mulsi_short_compare0_scratch" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV - (mult:SI (match_operand:SI 1 "register_operand" "%0") - (match_operand:SI 2 "register_operand" "l")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=l"))] - "TARGET_THUMB2 && optimize_size" - "muls\\t%0, %2, %0" - [(set_attr "length" "2") - (set_attr "type" "muls")]) - (define_insn "*thumb2_cbz" [(set (pc) (if_then_else (eq (match_operand:SI 0 "s_register_operand" "l,?r")