diff mbox series

rs6000: Support [u]mul<mode>3_highpart for vector

Message ID d9c74ae2-6163-6d9f-5c88-29385e690479@linux.ibm.com
State New
Headers show
Series rs6000: Support [u]mul<mode>3_highpart for vector | expand

Commit Message

Kewen.Lin July 13, 2021, 8:58 a.m. UTC
Hi,

This patch is to make Power10 newly introduced vector
multiply high (part) instructions exploited in vectorized
loops, it renames existing define_insns as standard pattern
names.  It depends on that patch which enables vectorizer
to recog mul_highpart.

Tested on powerpc64le-linux-gnu P9 with P10 supported
binutils, will test more if the vectorizer patch gets
landed.

BR,
Kewen.
-----
gcc/ChangeLog:

	* config/rs6000/vsx.md (mulhs_<mode>): Rename to...
	(smul<mode>3_highpart): ... this.
	(mulhu_<mode>): Rename to...
	(umul<mode>3_highpart): ... this.
	* config/rs6000/rs6000-builtin.def (MULHS_V2DI, MULHS_V4SI,
	MULHU_V2DI, MULHU_V4SI): Adjust.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/mul-vectorize-3.c: New test.
	* gcc.target/powerpc/mul-vectorize-4.c: New test.
---
 gcc/config/rs6000/rs6000-builtin.def          |  8 ++---
 gcc/config/rs6000/vsx.md                      |  4 +--
 .../gcc.target/powerpc/mul-vectorize-3.c      | 32 ++++++++++++++++++
 .../gcc.target/powerpc/mul-vectorize-4.c      | 33 +++++++++++++++++++
 4 files changed, 71 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c

Comments

Segher Boessenkool July 13, 2021, 10:07 p.m. UTC | #1
Hi!

On Tue, Jul 13, 2021 at 04:58:42PM +0800, Kewen.Lin wrote:
> This patch is to make Power10 newly introduced vector
> multiply high (part) instructions exploited in vectorized
> loops, it renames existing define_insns as standard pattern
> names.  It depends on that patch which enables vectorizer
> to recog mul_highpart.

It actually is correct already, it will just not be used yet, right?
But the testcases will fail until the generic support lands.

Okay for trunk.  Thanks!


Segher
Kewen.Lin July 14, 2021, 2:12 a.m. UTC | #2
on 2021/7/14 上午6:07, Segher Boessenkool wrote:
> Hi!
> 
> On Tue, Jul 13, 2021 at 04:58:42PM +0800, Kewen.Lin wrote:
>> This patch is to make Power10 newly introduced vector
>> multiply high (part) instructions exploited in vectorized
>> loops, it renames existing define_insns as standard pattern
>> names.  It depends on that patch which enables vectorizer
>> to recog mul_highpart.
> 
> It actually is correct already, it will just not be used yet, right?

Yes, the names are just not standard.  :)

> But the testcases will fail until the generic support lands.
> 

Yes!

> Okay for trunk.  Thanks!
> 
> 

Thanks!

BR,
Kewen
Segher Boessenkool July 14, 2021, 6:38 p.m. UTC | #3
On Wed, Jul 14, 2021 at 10:12:46AM +0800, Kewen.Lin wrote:
> on 2021/7/14 上午6:07, Segher Boessenkool wrote:
> > Hi!
> > 
> > On Tue, Jul 13, 2021 at 04:58:42PM +0800, Kewen.Lin wrote:
> >> This patch is to make Power10 newly introduced vector
> >> multiply high (part) instructions exploited in vectorized
> >> loops, it renames existing define_insns as standard pattern
> >> names.  It depends on that patch which enables vectorizer
> >> to recog mul_highpart.
> > 
> > It actually is correct already, it will just not be used yet, right?
> 
> Yes, the names are just not standard.  :)

I meant after this patch is applied :-)

Doesn't change much though -- applying it right now is fine, but you can
wait for the generic code to get in first, to make the new tests not
fail.


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 592efe31b04..cbacbc6b785 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -3016,10 +3016,10 @@  BU_P10V_AV_2 (MODS_V2DI, "vmodsd", CONST, modv2di3)
 BU_P10V_AV_2 (MODS_V4SI, "vmodsw", CONST, modv4si3)
 BU_P10V_AV_2 (MODU_V2DI, "vmodud", CONST, umodv2di3)
 BU_P10V_AV_2 (MODU_V4SI, "vmoduw", CONST, umodv4si3)
-BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, mulhs_v2di)
-BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, mulhs_v4si)
-BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, mulhu_v2di)
-BU_P10V_AV_2 (MULHU_V4SI, "vmulhuw", CONST, mulhu_v4si)
+BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, smulv2di3_highpart)
+BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, smulv4si3_highpart)
+BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, umulv2di3_highpart)
+BU_P10V_AV_2 (MULHU_V4SI, "vmulhuw", CONST, umulv4si3_highpart)
 BU_P10V_AV_2 (MULLD_V2DI, "vmulld", CONST, mulv2di3)
 
 BU_P10V_VSX_1 (VXXSPLTIW_V4SI, "vxxspltiw_v4si", CONST, xxspltiw_v4si)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f622873d758..6f6fc0bd835 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6351,7 +6351,7 @@  (define_insn "umod<mode>3"
   [(set_attr "type" "vecdiv")
    (set_attr "size" "<bits>")])
 
-(define_insn "mulhs_<mode>"
+(define_insn "smul<mode>3_highpart"
   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
 	(mult:VIlong (ashiftrt
 		       (match_operand:VIlong 1 "vsx_register_operand" "v")
@@ -6363,7 +6363,7 @@  (define_insn "mulhs_<mode>"
   "vmulhs<wd> %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "mulhu_<mode>"
+(define_insn "umul<mode>3_highpart"
   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
 	(us_mult:VIlong (ashiftrt
 			  (match_operand:VIlong 1 "vsx_register_operand" "v")
diff --git a/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c
new file mode 100644
index 00000000000..2c89c0faec2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c
@@ -0,0 +1,32 @@ 
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test vectorizer can exploit ISA 3.1 instructions Vector Multiply
+   High Signed/Unsigned Word for both signed and unsigned int high part
+   multiplication.  */
+
+#define N 128
+
+extern signed int si_a[N], si_b[N], si_c[N];
+extern unsigned int ui_a[N], ui_b[N], ui_c[N];
+
+typedef signed long long sLL;
+typedef unsigned long long uLL;
+
+__attribute__ ((noipa)) void
+test_si ()
+{
+  for (int i = 0; i < N; i++)
+    si_c[i] = ((sLL) si_a[i] * (sLL) si_b[i]) >> 32;
+}
+
+__attribute__ ((noipa)) void
+test_ui ()
+{
+  for (int i = 0; i < N; i++)
+    ui_c[i] = ((uLL) ui_a[i] * (uLL) ui_b[i]) >> 32;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvmulhsw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmulhuw\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c
new file mode 100644
index 00000000000..265e7588bb6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c
@@ -0,0 +1,33 @@ 
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test vectorizer can exploit ISA 3.1 instructions Vector Multiply
+   High Signed/Unsigned Doubleword for both signed and unsigned long
+   long high part multiplication.  */
+
+#define N 128
+
+extern signed long long sll_a[N], sll_b[N], sll_c[N];
+extern unsigned long long ull_a[N], ull_b[N], ull_c[N];
+
+typedef signed __int128 s128;
+typedef unsigned __int128 u128;
+
+__attribute__ ((noipa)) void
+test_sll ()
+{
+  for (int i = 0; i < N; i++)
+    sll_c[i] = ((s128) sll_a[i] * (s128) sll_b[i]) >> 64;
+}
+
+__attribute__ ((noipa)) void
+test_ull ()
+{
+  for (int i = 0; i < N; i++)
+    ull_c[i] = ((u128) ull_a[i] * (u128) ull_b[i]) >> 64;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvmulhsd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmulhud\M} 1 } } */