diff mbox

[rs6000] Use lxvx and stxvx for 128-bit float, etc., with -mcpu=power9

Message ID 1452105298.3351.9.camel@gnopaine
State New
Headers show

Commit Message

Bill Schmidt Jan. 6, 2016, 6:34 p.m. UTC
Hi,

I previously added POWER9 support for lxvx and stxvx to replace the
load-swap and swap-store patterns for POWER8.  However, I missed the
fact that we have different patterns for loads and stores of 128-bit
floats and other scalars.  This patch expands the previous POWER9
override to catch these cases, and disables those other patterns when P9
vector support is available.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Ok for trunk?

Thanks,
Bill


[gcc]

2015-01-06  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/vsx.md (*p9_vecload_<mode>): Replace VSX_M
	mode iterator with VSX_M2.
	(*p9_vecstore_<mode>): Likewise.
	(*vsx_le_permute_<mode>): Restrict to !TARGET_P9_VECTOR.
	(*vsx_le_perm_load_<mode> for VSX_LE_128): Likewise.
	(*vsx_le_perm_store_<mode> for VSX_LE_128): Likewise.
	(define_split for VSX_LE128 stores): Likewise.
	(define_peephole2 for TImode LE swaps): Likewise.
	(define_split for VSX_LE128 post-reload stores): Likewise.

[gcc/testsuite]

2015-01-06  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* gcc.target/powerpc/p9-lxvx-stxvx-3.c: New test.

Comments

David Edelsohn Jan. 6, 2016, 6:47 p.m. UTC | #1
On Wed, Jan 6, 2016 at 1:34 PM, Bill Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> Hi,
>
> I previously added POWER9 support for lxvx and stxvx to replace the
> load-swap and swap-store patterns for POWER8.  However, I missed the
> fact that we have different patterns for loads and stores of 128-bit
> floats and other scalars.  This patch expands the previous POWER9
> override to catch these cases, and disables those other patterns when P9
> vector support is available.
>
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> regressions.  Ok for trunk?
>
> Thanks,
> Bill
>
>
> [gcc]
>
> 2015-01-06  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>         * config/rs6000/vsx.md (*p9_vecload_<mode>): Replace VSX_M
>         mode iterator with VSX_M2.
>         (*p9_vecstore_<mode>): Likewise.
>         (*vsx_le_permute_<mode>): Restrict to !TARGET_P9_VECTOR.
>         (*vsx_le_perm_load_<mode> for VSX_LE_128): Likewise.
>         (*vsx_le_perm_store_<mode> for VSX_LE_128): Likewise.
>         (define_split for VSX_LE128 stores): Likewise.
>         (define_peephole2 for TImode LE swaps): Likewise.
>         (define_split for VSX_LE128 post-reload stores): Likewise.
>
> [gcc/testsuite]
>
> 2015-01-06  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>         * gcc.target/powerpc/p9-lxvx-stxvx-3.c: New test.

Okay.

Thanks, David
diff mbox

Patch

Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	(revision 232077)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -304,8 +304,8 @@ 
 ;; VSX (P9) moves
 
 (define_insn "*p9_vecload_<mode>"
-  [(set (match_operand:VSX_M 0 "vsx_register_operand" "=<VSa>")
-        (match_operand:VSX_M 1 "memory_operand" "Z"))]
+  [(set (match_operand:VSX_M2 0 "vsx_register_operand" "=<VSa>")
+        (match_operand:VSX_M2 1 "memory_operand" "Z"))]
   "TARGET_P9_VECTOR"
   "lxvx %x0,%y1"
   [(set_attr "type" "vecload")
@@ -312,8 +312,8 @@ 
    (set_attr "length" "4")])
 
 (define_insn "*p9_vecstore_<mode>"
-  [(set (match_operand:VSX_M 0 "memory_operand" "=Z")
-        (match_operand:VSX_M 1 "vsx_register_operand" "<VSa>"))]
+  [(set (match_operand:VSX_M2 0 "memory_operand" "=Z")
+        (match_operand:VSX_M2 1 "vsx_register_operand" "<VSa>"))]
   "TARGET_P9_VECTOR"
   "stxvx %x1,%y0"
   [(set_attr "type" "vecstore")
@@ -680,7 +680,7 @@ 
 	(rotate:VSX_LE_128
 	 (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
 	 (const_int 64)))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "@
    xxpermdi %x0,%x1,%x1,2
    lxvd2x %x0,%y1
@@ -714,9 +714,9 @@ 
 (define_insn_and_split "*vsx_le_perm_load_<mode>"
   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>")
         (match_operand:VSX_LE_128 1 "memory_operand" "Z"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   [(set (match_dup 2)
 	(rotate:VSX_LE_128 (match_dup 1)
 			   (const_int 64)))
@@ -735,7 +735,7 @@ 
 (define_insn "*vsx_le_perm_store_<mode>"
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   [(set_attr "type" "vecstore")
    (set_attr "length" "12")])
@@ -743,7 +743,7 @@ 
 (define_split
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
   [(set (match_dup 2)
 	(rotate:VSX_LE_128 (match_dup 1)
 			   (const_int 64)))
@@ -765,7 +765,7 @@ 
    (set (match_operand:TI 2 "vsx_register_operand" "")
 	(rotate:TI (match_dup 0)
 		   (const_int 64)))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE && !TARGET_P9_VECTOR
    && (rtx_equal_p (operands[0], operands[2])
        || peep2_reg_dead_p (2, operands[0]))"
    [(set (match_dup 2) (match_dup 1))])
@@ -775,7 +775,7 @@ 
 (define_split
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
   [(set (match_dup 1)
 	(rotate:VSX_LE_128 (match_dup 1)
 			   (const_int 64)))
Index: gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-3.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-3.c	(working copy)
@@ -0,0 +1,30 @@ 
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O3" } */
+/* { dg-final { scan-assembler "lxvx" } } */
+/* { dg-final { scan-assembler "stxvx" } } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* Verify P9 vector loads and stores are used rather than the
+   load-swap/swap-store workarounds for P8.  */
+#define SIZE (16384/sizeof(__float128))
+
+static __float128 x[SIZE] __attribute__ ((aligned (16)));
+static __float128 y[SIZE] __attribute__ ((aligned (16)));
+static __float128 a;
+
+void obfuscate(void *a, ...);
+
+void __attribute__((noinline)) do_one(void)
+{
+  unsigned long i;
+
+  obfuscate(x, y, &a);
+
+  for (i = 0; i < SIZE; i++)
+    y[i] = a * x[i];
+
+  obfuscate(x, y, &a);
+}