diff mbox series

PR target/108958: Use mtvsrdd to zero extend GPR DImode to VSX TImode

Message ID Zzrj84eBWQGouWYG@cowardly-lion.the-meissners.org
State New
Headers show
Series PR target/108958: Use mtvsrdd to zero extend GPR DImode to VSX TImode | expand

Commit Message

Michael Meissner Nov. 18, 2024, 6:51 a.m. UTC
Previously GCC would zero externd a DImode GPR value to TImode by first zero
extending the DImode value into a GPR TImode value, and then do a MTVSRDD to
move this value to a VSX register.

This patch does the move directly, since if the middle argument to MTVSRDD is 0,
it does the zero extend.

If the DImode value is already in a vector register, it does a XXSPLTIB and
XXPERMDI to get the value into the bottom 64-bits of the register.

I have built GCC with the patches in this patch set applied on both little and
big endian PowerPC systems and there were no regressions.  Can I apply this
patch to GCC 15?

2024-11-17  Michael Meissner  <meissner@linux.ibm.com>

gcc/

	PR target/108598
	* gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn.

gcc/testsuite/

	PR target/108598
	* gcc.target/powerpc/pr108958.c: New test.
---
 gcc/config/rs6000/rs6000.md                 | 46 +++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr108958.c | 27 ++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108958.c

Comments

Michael Meissner Dec. 4, 2024, 8:06 a.m. UTC | #1
Ping patch for PR target/108958, Use mtvsrdd to zero extend GPR DImode to VSX
TImode

Message-ID <Zzrj84eBWQGouWYG@cowardly-lion.the-meissners.org>

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/669242.html
Michael Meissner Jan. 9, 2025, 6:07 p.m. UTC | #2
Ping patch for PR target/108958, Use mtvsrdd to zero extend GPR DImode to VSX
TImode

Message-ID <Zzrj84eBWQGouWYG@cowardly-lion.the-meissners.org>

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/669242.html
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index d266f93ff2e..bfb02b07ef4 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1026,6 +1026,52 @@  (define_insn_and_split "*zero_extendsi<mode>2_dot2"
    (set_attr "dot" "yes")
    (set_attr "length" "4,8")])
 
+(define_insn_and_split "zero_extendditi2"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=r,wa,&wa")
+	(zero_extend:TI
+	 (match_operand:DI 1 "gpc_reg_operand" "rwa,r,wa")))]
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
+  "@
+  #
+  mtvsrdd %x0,0,%1
+  #"
+  "&& reload_completed
+   && (int_reg_operand (operands[0], TImode)
+       || vsx_register_operand (operands[1], DImode))"
+  [(set (match_dup 2)
+	(match_dup 3))
+   (set (match_dup 4)
+	(match_dup 5))]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  int r = reg_or_subregno (op0);
+
+  if (int_reg_operand (op0, TImode))
+    {
+      int lo = BYTES_BIG_ENDIAN ? 1 : 0;
+      int hi = 1 - lo;
+
+      operands[2] = gen_rtx_REG (DImode, r + lo);
+      operands[3] = op1;
+      operands[4] = gen_rtx_REG (DImode, r + hi);
+      operands[5] = const0_rtx;
+    }
+  else
+    {
+      rtx op0_di = gen_rtx_REG (DImode, r);
+      rtx op0_v2di = gen_rtx_REG (V2DImode, r);
+      rtx lo = WORDS_BIG_ENDIAN ? op1 : op0_di;
+      rtx hi = WORDS_BIG_ENDIAN ? op0_di : op1;
+
+      operands[2] = op0_v2di;
+      operands[3] = CONST0_RTX (V2DImode);
+      operands[4] = op0_v2di;
+      operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo);
+    }
+}
+  [(set_attr "type" "*,mtvsr,vecperm")
+   (set_attr "length" "8,*,8")])
 
 (define_insn "extendqi<mode>2"
   [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c
new file mode 100644
index 00000000000..03eb58d069e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register.  */
+
+void
+gpr_to_vsx (unsigned long long x, __uint128_t *p)
+{
+  /* mtvsrdd vsx,0,gpr.  */
+  __uint128_t y = x;
+  __asm__ (" # %x0" : "+wa" (y));
+  *p = y;
+}
+
+void
+gpr_to_gpr (unsigned long long x, __uint128_t *p)
+{
+  /* mr and li.  */
+  __uint128_t y = x;
+  __asm__ (" # %0" : "+r" (y));
+  *p = y;
+}
+
+/* { dg-final { scan-assembler-times {\mli\M}              1 } } */
+/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */