Message ID | e6e25beda56618622711f8bf4a06e3d10058b2cf.1624025450.git.julian@codesourcery.com |
---|---|
State | New |
Headers | show |
Series | amdgcn: Improve TImode support | expand |
On 18/06/2021 15:19, Julian Brown wrote: > This patch improves 64-bit multiplication for AMD GCN: patterns for > unsigned and signed 32x32->64 bit multiplication have been added, and > also 64x64->64 bit multiplication is now open-coded rather than calling > a library function (which may be a win for code size as well as speed: > the function calling sequence isn't particularly concise for GCN). > > The <su>mulsi3_highpart pattern has also been extended for GCN5+, since > that ISA version supports high-part result multiply instructions with > SGPR operands. > > The DImode multiply implementation is lost from libgcc if we build it > for DImode/TImode rather than SImode/DImode, a change we make in a later > patch in this series. > > I can probably self-approve this, but I'll give Andrew Stubbs a chance > to comment. > > Thanks, > > Julian > > 2021-06-18 Julian Brown <julian@codesourcery.com> > > gcc/ > * config/gcn/gcn.md (<su>mulsi3_highpart): Add SGPR alternatives for > GCN5+. > (<su>mulsidi3, muldi3): Add expanders. > --- > gcc/config/gcn/gcn.md | 55 ++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 49 insertions(+), 6 deletions(-) > > diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md > index b5f895a93e2..70655ca4b8b 100644 > --- a/gcc/config/gcn/gcn.md > +++ b/gcc/config/gcn/gcn.md > @@ -1392,19 +1392,62 @@ > (define_code_attr e [(sign_extend "e") (zero_extend "")]) > > (define_insn "<su>mulsi3_highpart" > - [(set (match_operand:SI 0 "register_operand" "= v") > + [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v") > (truncate:SI > (lshiftrt:DI > (mult:DI > (any_extend:DI > - (match_operand:SI 1 "register_operand" "% v")) > + (match_operand:SI 1 "register_operand" "%SgA,SgA, v")) > (any_extend:DI > - (match_operand:SI 2 "register_operand" "vSv"))) > + (match_operand:SI 2 "register_operand" "SgA, B,vSv"))) > (const_int 32))))] > "" > - "v_mul_hi<sgnsuffix>0\t%0, %2, %1" > - [(set_attr "type" "vop3a") > - (set_attr "length" "8")]) > + "@ > + s_mul_hi<sgnsuffix>0\t%0, %1, %2 > + s_mul_hi<sgnsuffix>0\t%0, %1, %2 > + v_mul_hi<sgnsuffix>0\t%0, %2, %1" > + [(set_attr "type" "sop2,sop2,vop3a") > + (set_attr "length" "4,8,8") > + (set_attr "gcn_version" "gcn5,gcn5,*")]) > + > +(define_expand "<su>mulsidi3" > + [(set (match_operand:DI 0 "register_operand" "") > + (mult:DI > + (any_extend:DI (match_operand:SI 1 "register_operand" "")) > + (any_extend:DI (match_operand:SI 2 "register_operand" ""))))] > + "" > + { > + rtx dst = gen_reg_rtx (DImode); > + rtx dstlo = gen_lowpart (SImode, dst); > + rtx dsthi = gen_highpart_mode (SImode, DImode, dst); > + emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); > + emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2])); > + emit_move_insn (operands[0], dst); > + DONE; > + }) > + > +(define_expand "muldi3" > + [(set (match_operand:DI 0 "register_operand" "") > + (mult:DI (match_operand:DI 1 "register_operand" "") > + (match_operand:DI 2 "register_operand" "")))] > + "" > + { > + rtx tmp0 = gen_reg_rtx (SImode); > + rtx tmp1 = gen_reg_rtx (SImode); > + rtx dst = gen_reg_rtx (DImode); > + rtx dsthi = gen_highpart_mode (SImode, DImode, dst); > + rtx op1lo = gen_lowpart (SImode, operands[1]); > + rtx op1hi = gen_highpart_mode (SImode, DImode, operands[1]); > + rtx op2lo = gen_lowpart (SImode, operands[2]); > + rtx op2hi = gen_highpart_mode (SImode, DImode, operands[2]); > + emit_insn (gen_umulsidi3 (dst, op1lo, op2lo)); > + emit_insn (gen_mulsi3 (tmp0, op1lo, op2hi)); > + emit_insn (gen_addsi3 (dsthi, dsthi, tmp0)); > + emit_insn (gen_mulsi3 (tmp1, op1hi, op2lo)); > + emit_insn (gen_addsi3 (dsthi, dsthi, tmp1)); > + emit_move_insn (operands[0], dst); > + DONE; > + }) > > (define_insn "<u>mulhisi3" > [(set (match_operand:SI 0 "register_operand" "=v") > Most of the rest of the backend expands 64-bit operations to 32-bit pairs much later, using define_insn_and_split, because there were lots of issues with splitting it early. I don't recall exactly what right now, unfortunately. (It might have been related to spilling only half the value to the stack?) It also makes it hard to debug, I think. Andrew
On Fri, 18 Jun 2021 15:55:09 +0100 Andrew Stubbs <ams@codesourcery.com> wrote: > On 18/06/2021 15:19, Julian Brown wrote: > > This patch improves 64-bit multiplication for AMD GCN: patterns for > > unsigned and signed 32x32->64 bit multiplication have been added, > > and also 64x64->64 bit multiplication is now open-coded rather than > > calling a library function (which may be a win for code size as > > well as speed: the function calling sequence isn't particularly > > concise for GCN). > > > > The <su>mulsi3_highpart pattern has also been extended for GCN5+, > > since that ISA version supports high-part result multiply > > instructions with SGPR operands. > > > > The DImode multiply implementation is lost from libgcc if we build > > it for DImode/TImode rather than SImode/DImode, a change we make in > > a later patch in this series. [snip] > Most of the rest of the backend expands 64-bit operations to 32-bit > pairs much later, using define_insn_and_split, because there were > lots of issues with splitting it early. I don't recall exactly what > right now, unfortunately. (It might have been related to spilling > only half the value to the stack?) It also makes it hard to debug, I > think. FTR, I followed up on this here: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573911.html Julian
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index b5f895a93e2..70655ca4b8b 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1392,19 +1392,62 @@ (define_code_attr e [(sign_extend "e") (zero_extend "")]) (define_insn "<su>mulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "= v") + [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v") (truncate:SI (lshiftrt:DI (mult:DI (any_extend:DI - (match_operand:SI 1 "register_operand" "% v")) + (match_operand:SI 1 "register_operand" "%SgA,SgA, v")) (any_extend:DI - (match_operand:SI 2 "register_operand" "vSv"))) + (match_operand:SI 2 "register_operand" "SgA, B,vSv"))) (const_int 32))))] "" - "v_mul_hi<sgnsuffix>0\t%0, %2, %1" - [(set_attr "type" "vop3a") - (set_attr "length" "8")]) + "@ + s_mul_hi<sgnsuffix>0\t%0, %1, %2 + s_mul_hi<sgnsuffix>0\t%0, %1, %2 + v_mul_hi<sgnsuffix>0\t%0, %2, %1" + [(set_attr "type" "sop2,sop2,vop3a") + (set_attr "length" "4,8,8") + (set_attr "gcn_version" "gcn5,gcn5,*")]) + +(define_expand "<su>mulsidi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI + (any_extend:DI (match_operand:SI 1 "register_operand" "")) + (any_extend:DI (match_operand:SI 2 "register_operand" ""))))] + "" + { + rtx dst = gen_reg_rtx (DImode); + rtx dstlo = gen_lowpart (SImode, dst); + rtx dsthi = gen_highpart_mode (SImode, DImode, dst); + emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); + emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2])); + emit_move_insn (operands[0], dst); + DONE; + }) + +(define_expand "muldi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" "")))] + "" + { + rtx tmp0 = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (SImode); + rtx dst = gen_reg_rtx (DImode); + rtx dsthi = gen_highpart_mode (SImode, DImode, dst); + rtx op1lo = gen_lowpart (SImode, operands[1]); + rtx op1hi = gen_highpart_mode (SImode, DImode, operands[1]); + rtx op2lo = gen_lowpart (SImode, operands[2]); + rtx op2hi = gen_highpart_mode (SImode, DImode, operands[2]); + emit_insn (gen_umulsidi3 (dst, op1lo, op2lo)); + emit_insn (gen_mulsi3 (tmp0, op1lo, op2hi)); + emit_insn (gen_addsi3 (dsthi, dsthi, tmp0)); + emit_insn (gen_mulsi3 (tmp1, op1hi, op2lo)); + emit_insn (gen_addsi3 (dsthi, dsthi, tmp1)); + emit_move_insn (operands[0], dst); + DONE; + }) (define_insn "<u>mulhisi3" [(set (match_operand:SI 0 "register_operand" "=v")