Message ID | bcd0a865-2bda-ab01-33de-36d384cb89e3@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [rs6000] Add multiply-add expand pattern [PR103109] | expand |
Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598744.html Thanks On 25/7/2022 下午 1:11, HAO CHEN GUI wrote: > Hi, > This patch adds an expand and several insns for multiply-add with > three 64bit operands. > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > 2022-07-22 Haochen Gui <guihaoc@linux.ibm.com> > > gcc/ > PR target/103109 > * config/rs6000/rs6000.md (<u>maddditi4): New pattern for > multiply-add. > (<u>madddi4_lowpart): New. > (<u>madddi4_lowpart_le): New. > (<u>madddi4_highpart): New. > (<u>madddi4_highpart_le): New. > > gcc/testsuite/ > PR target/103109 > * gcc.target/powerpc/pr103109.c: New. > > patch.diff > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index c55ee7e171a..4f3b56e103e 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -3226,6 +3226,97 @@ (define_insn "*maddld<mode>4" > "maddld %0,%1,%2,%3" > [(set_attr "type" "mul")]) > > +(define_expand "<u>maddditi4" > + [(set (match_operand:TI 0 "gpc_reg_operand") > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand"))))] > + "TARGET_POWERPC64 && TARGET_MADDLD" > +{ > + rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 0); > + rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 8); > + > + if (BYTES_BIG_ENDIAN) > + { > + emit_insn (gen_<u>madddi4_lowpart (op0_lo, operands[1], operands[2], > + operands[3])); > + emit_insn (gen_<u>madddi4_highpart (op0_hi, operands[1], operands[2], > + operands[3])); > + } > + else > + { > + emit_insn (gen_<u>madddi4_lowpart_le (op0_lo, operands[1], operands[2], > + operands[3])); > + emit_insn (gen_<u>madddi4_highpart_le (op0_hi, operands[1], operands[2], > + operands[3])); > + } > + DONE; > +}) > + > +(define_insn "<u>madddi4_lowpart" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (subreg:DI > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand" "r")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand" "r"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand" "r"))) > + 8))] > + "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN" > + "maddld %0,%1,%2,%3" > + [(set_attr "type" "mul")]) > + > +(define_insn "<u>madddi4_lowpart_le" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (subreg:DI > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand" "r")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand" "r"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand" "r"))) > + 0))] > + "TARGET_POWERPC64 && TARGET_MADDLD && !BYTES_BIG_ENDIAN" > + "maddld %0,%1,%2,%3" > + [(set_attr "type" "mul")]) > + > +(define_insn "<u>madddi4_highpart" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (subreg:DI > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand" "r")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand" "r"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand" "r"))) > + 0))] > + "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN" > + "maddhd<u> %0,%1,%2,%3" > + [(set_attr "type" "mul")]) > + > +(define_insn "<u>madddi4_highpart_le" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (subreg:DI > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand" "r")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand" "r"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand" "r"))) > + 8))] > + "TARGET_POWERPC64 && TARGET_MADDLD && !BYTES_BIG_ENDIAN" > + "maddhd<u> %0,%1,%2,%3" > + [(set_attr "type" "mul")]) > + > (define_insn "udiv<mode>3" > [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") > (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") > diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c b/gcc/testsuite/gcc.target/powerpc/pr103109.c > new file mode 100644 > index 00000000000..256e05d5677 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile { target { lp64 } } } */ > +/* { dg-require-effective-target powerpc_p9modulo_ok } */ > +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ > +/* { dg-final { scan-assembler-times {\mmaddld\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mmaddhd\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mmaddhdu\M} 1 } } */ > + > +__int128 test (long a, long b, long c) > +{ > + return (__int128) a * (__int128) b + (__int128) c; > +} > + > +unsigned __int128 testu (unsigned long a, unsigned long b, unsigned long c) > +{ > + return (unsigned __int128) a * (unsigned __int128) b + (unsigned __int128) c; > +}
Hi Haochen, Thanks for the patch, some comments are inlined. on 2022/7/25 13:11, HAO CHEN GUI wrote: > Hi, > This patch adds an expand and several insns for multiply-add with > three 64bit operands. > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > 2022-07-22 Haochen Gui <guihaoc@linux.ibm.com> > > gcc/ > PR target/103109 > * config/rs6000/rs6000.md (<u>maddditi4): New pattern for > multiply-add. > (<u>madddi4_lowpart): New. > (<u>madddi4_lowpart_le): New. > (<u>madddi4_highpart): New. > (<u>madddi4_highpart_le): New. > > gcc/testsuite/ > PR target/103109 > * gcc.target/powerpc/pr103109.c: New. > > patch.diff > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index c55ee7e171a..4f3b56e103e 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -3226,6 +3226,97 @@ (define_insn "*maddld<mode>4" > "maddld %0,%1,%2,%3" > [(set_attr "type" "mul")]) > > +(define_expand "<u>maddditi4" > + [(set (match_operand:TI 0 "gpc_reg_operand") > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand"))))] > + "TARGET_POWERPC64 && TARGET_MADDLD" > +{ > + rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 0); > + rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 8); > + > + if (BYTES_BIG_ENDIAN) > + { > + emit_insn (gen_<u>madddi4_lowpart (op0_lo, operands[1], operands[2], > + operands[3])); > + emit_insn (gen_<u>madddi4_highpart (op0_hi, operands[1], operands[2], > + operands[3])); > + } > + else > + { > + emit_insn (gen_<u>madddi4_lowpart_le (op0_lo, operands[1], operands[2], > + operands[3])); > + emit_insn (gen_<u>madddi4_highpart_le (op0_hi, operands[1], operands[2], > + operands[3])); > + } > + DONE; > +}) > + > +(define_insn "<u>madddi4_lowpart" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (subreg:DI > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand" "r")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand" "r"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand" "r"))) > + 8))] > + "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN" > + "maddld %0,%1,%2,%3" > + [(set_attr "type" "mul")]) > + > +(define_insn "<u>madddi4_lowpart_le" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (subreg:DI > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand" "r")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand" "r"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand" "r"))) > + 0))] > + "TARGET_POWERPC64 && TARGET_MADDLD && !BYTES_BIG_ENDIAN" > + "maddld %0,%1,%2,%3" > + [(set_attr "type" "mul")] > + > +(define_insn "<u>madddi4_highpart" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (subreg:DI > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand" "r")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand" "r"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand" "r"))) > + 0))] > + "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN" > + "maddhd<u> %0,%1,%2,%3" > + [(set_attr "type" "mul")]) > + > +(define_insn "<u>madddi4_highpart_le" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (subreg:DI > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand" "r")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand" "r"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand" "r"))) > + 8))] > + "TARGET_POWERPC64 && TARGET_MADDLD && !BYTES_BIG_ENDIAN" > + "maddhd<u> %0,%1,%2,%3" > + [(set_attr "type" "mul")]) > + > (define_insn "udiv<mode>3" > [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") > (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") > diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c b/gcc/testsuite/gcc.target/powerpc/pr103109.c > new file mode 100644 > index 00000000000..256e05d5677 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile { target { lp64 } } } */ Since the guard is TARGET_POWERPC64, should use has_arch_ppc64? > +/* { dg-require-effective-target powerpc_p9modulo_ok } */ Need effective target int128 as well? > +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ > +/* { dg-final { scan-assembler-times {\mmaddld\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mmaddhd\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mmaddhdu\M} 1 } } */ > + > +__int128 test (long a, long b, long c) > +{ > + return (__int128) a * (__int128) b + (__int128) c; > +} > + > +unsigned __int128 testu (unsigned long a, unsigned long b, unsigned long c) > +{ > + return (unsigned __int128) a * (unsigned __int128) b + (unsigned __int128) c; > +} Not sure there is some coverage for this kind of multiply-add (promoted first then mul and add), if no, it seems better to add one runnable test case. BR, Kewen
Hi! On Mon, Jul 25, 2022 at 01:11:47PM +0800, HAO CHEN GUI wrote: > This patch adds an expand and several insns for multiply-add with > three 64bit operands. > PR target/103109 > * config/rs6000/rs6000.md (<u>maddditi4): New pattern for > multiply-add. Please don't break lines unnecessarily. > +(define_expand "<u>maddditi4" > + [(set (match_operand:TI 0 "gpc_reg_operand") > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand"))))] Broken indentation, should be (define_expand "<u>maddditi4" [(set (match_operand:TI 0 "gpc_reg_operand") (plus:TI (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand")) (any_extend:TI (match_operand:DI 2 "gpc_reg_operand"))) (any_extend:TI (match_operand:DI 3 "gpc_reg_operand"))))] > + "TARGET_POWERPC64 && TARGET_MADDLD" TARGET_MADDLD should itself already include TARGET_POWERPC64. Could you please send a separate patch for that first? > +(define_insn "<u>madddi4_lowpart" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (subreg:DI > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand" "r")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand" "r"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand" "r"))) > + 8))] [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (subreg:DI (plus:TI (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r")) (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r"))) (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))) 8))] (and similar for _le of course). Segher
On Mon, Aug 01, 2022 at 02:19:32PM +0800, Kewen.Lin wrote: > > new file mode 100644 > > index 00000000000..256e05d5677 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c > > @@ -0,0 +1,16 @@ > > +/* { dg-do compile { target { lp64 } } } */ > > Since the guard is TARGET_POWERPC64, should use has_arch_ppc64? Yes, we should almost never have lp64 in testcases anymore, since now we have has_arch_ppc64. Previously we had only has_powerpc64, which is a very different thing unfortunately. > > +/* { dg-require-effective-target powerpc_p9modulo_ok } */ > > Need effective target int128 as well? Yes. This was hidden by the lp64 test, which skips on -m32 already :-) > > +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ > > +/* { dg-final { scan-assembler-times {\mmaddld\M} 2 } } */ > > +/* { dg-final { scan-assembler-times {\mmaddhd\M} 1 } } */ > > +/* { dg-final { scan-assembler-times {\mmaddhdu\M} 1 } } */ > > + > > +__int128 test (long a, long b, long c) > > +{ > > + return (__int128) a * (__int128) b + (__int128) c; > > +} > > + > > +unsigned __int128 testu (unsigned long a, unsigned long b, unsigned long c) > > +{ > > + return (unsigned __int128) a * (unsigned __int128) b + (unsigned __int128) c; > > +} (You only need to cast the first here always, the rest is promoted automatically). > Not sure there is some coverage for this kind of multiply-add (promoted first > then mul and add), if no, it seems better to add one runnable test case. Good point. We won't automatically see it from the compiler build itself for example, int128 isn't used there. Segher
Hi! On Mon, Jul 25, 2022 at 01:11:47PM +0800, HAO CHEN GUI wrote: > +(define_insn "<u>madddi4_lowpart" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (subreg:DI > + (plus:TI > + (mult:TI (any_extend:TI > + (match_operand:DI 1 "gpc_reg_operand" "r")) > + (any_extend:TI > + (match_operand:DI 2 "gpc_reg_operand" "r"))) > + (any_extend:TI > + (match_operand:DI 3 "gpc_reg_operand" "r"))) > + 8))] > + "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN" > + "maddld %0,%1,%2,%3" > + [(set_attr "type" "mul")]) So, hrm. This (as well as the _le version) simplifies to just the :DI ops, without subreg. Not properly simplified patterns like this will not ever match, so most optimisations on this will not work :-( Segher
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index c55ee7e171a..4f3b56e103e 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -3226,6 +3226,97 @@ (define_insn "*maddld<mode>4" "maddld %0,%1,%2,%3" [(set_attr "type" "mul")]) +(define_expand "<u>maddditi4" + [(set (match_operand:TI 0 "gpc_reg_operand") + (plus:TI + (mult:TI (any_extend:TI + (match_operand:DI 1 "gpc_reg_operand")) + (any_extend:TI + (match_operand:DI 2 "gpc_reg_operand"))) + (any_extend:TI + (match_operand:DI 3 "gpc_reg_operand"))))] + "TARGET_POWERPC64 && TARGET_MADDLD" +{ + rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 0); + rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 8); + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_<u>madddi4_lowpart (op0_lo, operands[1], operands[2], + operands[3])); + emit_insn (gen_<u>madddi4_highpart (op0_hi, operands[1], operands[2], + operands[3])); + } + else + { + emit_insn (gen_<u>madddi4_lowpart_le (op0_lo, operands[1], operands[2], + operands[3])); + emit_insn (gen_<u>madddi4_highpart_le (op0_hi, operands[1], operands[2], + operands[3])); + } + DONE; +}) + +(define_insn "<u>madddi4_lowpart" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (subreg:DI + (plus:TI + (mult:TI (any_extend:TI + (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI + (match_operand:DI 2 "gpc_reg_operand" "r"))) + (any_extend:TI + (match_operand:DI 3 "gpc_reg_operand" "r"))) + 8))] + "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN" + "maddld %0,%1,%2,%3" + [(set_attr "type" "mul")]) + +(define_insn "<u>madddi4_lowpart_le" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (subreg:DI + (plus:TI + (mult:TI (any_extend:TI + (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI + (match_operand:DI 2 "gpc_reg_operand" "r"))) + (any_extend:TI + (match_operand:DI 3 "gpc_reg_operand" "r"))) + 0))] + "TARGET_POWERPC64 && TARGET_MADDLD && !BYTES_BIG_ENDIAN" + "maddld %0,%1,%2,%3" + [(set_attr "type" "mul")]) + +(define_insn "<u>madddi4_highpart" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (subreg:DI + (plus:TI + (mult:TI (any_extend:TI + (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI + (match_operand:DI 2 "gpc_reg_operand" "r"))) + (any_extend:TI + (match_operand:DI 3 "gpc_reg_operand" "r"))) + 0))] + "TARGET_POWERPC64 && TARGET_MADDLD && BYTES_BIG_ENDIAN" + "maddhd<u> %0,%1,%2,%3" + [(set_attr "type" "mul")]) + +(define_insn "<u>madddi4_highpart_le" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (subreg:DI + (plus:TI + (mult:TI (any_extend:TI + (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI + (match_operand:DI 2 "gpc_reg_operand" "r"))) + (any_extend:TI + (match_operand:DI 3 "gpc_reg_operand" "r"))) + 8))] + "TARGET_POWERPC64 && TARGET_MADDLD && !BYTES_BIG_ENDIAN" + "maddhd<u> %0,%1,%2,%3" + [(set_attr "type" "mul")]) + (define_insn "udiv<mode>3" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c b/gcc/testsuite/gcc.target/powerpc/pr103109.c new file mode 100644 index 00000000000..256e05d5677 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { lp64 } } } */ +/* { dg-require-effective-target powerpc_p9modulo_ok } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ +/* { dg-final { scan-assembler-times {\mmaddld\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mmaddhd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mmaddhdu\M} 1 } } */ + +__int128 test (long a, long b, long c) +{ + return (__int128) a * (__int128) b + (__int128) c; +} + +unsigned __int128 testu (unsigned long a, unsigned long b, unsigned long c) +{ + return (unsigned __int128) a * (unsigned __int128) b + (unsigned __int128) c; +}