diff mbox series

[committed] aarch64: LD3/LD4 post-modify costs for struct modes

Message ID mptbl2v2aop.fsf@arm.com
State New
Headers show
Series [committed] aarch64: LD3/LD4 post-modify costs for struct modes | expand

Commit Message

Richard Sandiford Nov. 8, 2021, 10:39 a.m. UTC
The LD3/ST3 and LD4/ST4 address cost code had no test coverage (oops).
This patch fixes that and updates it for the new structure modes.
The test only covers Advanced SIMD because SVE doesn't have
post-increment forms.

Tested on aarch64-linxu-gnu & pushed.

Richard


gcc/
	* config/aarch64/aarch64.c (aarch64_ldn_stn_vectors): New function.
	(aarch64_address_cost): Use it instead of testing for CImode and
	XImode directly.

gcc/testsuite/
	* gcc.target/aarch64/neoverse_v1_1.c: New test.
---
 gcc/config/aarch64/aarch64.c                  | 22 +++++++++++++++++--
 .../gcc.target/aarch64/neoverse_v1_1.c        | 15 +++++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neoverse_v1_1.c
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fdf05505846..19f67415234 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3108,6 +3108,23 @@  aarch64_vl_bytes (machine_mode mode, unsigned int vec_flags)
   return BYTES_PER_SVE_PRED;
 }
 
+/* If MODE holds an array of vectors, return the number of vectors
+   in the array, otherwise return 1.  */
+
+static unsigned int
+aarch64_ldn_stn_vectors (machine_mode mode)
+{
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  if (vec_flags == (VEC_ADVSIMD | VEC_PARTIAL | VEC_STRUCT))
+    return exact_div (GET_MODE_SIZE (mode), 8).to_constant ();
+  if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
+    return exact_div (GET_MODE_SIZE (mode), 16).to_constant ();
+  if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
+    return exact_div (GET_MODE_SIZE (mode),
+		      BYTES_PER_SVE_VECTOR).to_constant ();
+  return 1;
+}
+
 /* Given an Advanced SIMD vector mode MODE and a tuple size NELEMS, return the
    corresponding vector structure mode.  */
 static opt_machine_mode
@@ -12511,9 +12528,10 @@  aarch64_address_cost (rtx x,
 	  cost += addr_cost->pre_modify;
 	else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
 	  {
-	    if (mode == CImode)
+	    unsigned int nvectors = aarch64_ldn_stn_vectors (mode);
+	    if (nvectors == 3)
 	      cost += addr_cost->post_modify_ld3_st3;
-	    else if (mode == XImode)
+	    else if (nvectors == 4)
 	      cost += addr_cost->post_modify_ld4_st4;
 	    else
 	      cost += addr_cost->post_modify;
diff --git a/gcc/testsuite/gcc.target/aarch64/neoverse_v1_1.c b/gcc/testsuite/gcc.target/aarch64/neoverse_v1_1.c
new file mode 100644
index 00000000000..c1563f01861
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neoverse_v1_1.c
@@ -0,0 +1,15 @@ 
+/* { dg-options "-O2 -mcpu=neoverse-v1" } */
+
+void
+foo (short *restrict x, short y[restrict][128])
+{
+  for (int i = 0; i < 128; ++i)
+    {
+      y[0][i] = x[i * 3 + 0];
+      y[1][i] = x[i * 3 + 1];
+      y[2][i] = x[i * 3 + 2];
+    }
+}
+
+/* This shouldn't be a post-increment.  */
+/* { dg-final { scan-assembler {ld3\t{[^{}]*}, \[x[0-9]+\]\n} } } */