diff mbox series

[pushed] aarch64: Avoid using mismatched ZERO ZA sizes

Message ID mpt4jc6msyt.fsf@arm.com
State New
Headers show
Series [pushed] aarch64: Avoid using mismatched ZERO ZA sizes | expand

Commit Message

Richard Sandiford April 12, 2024, 4:44 p.m. UTC
The svzero_mask_za intrinsic tried to use the shortest combination
of .b, .h, .s and .d tiles, allowing mixtures of sizes where necessary.
However, Iain S pointed out that LLVM instead requires the tiles to
have the same suffix.  GAS supports both versions, so this patch
generates the LLVM-friendly form.

Tested on aarch64-linux-gnu & pushed.

Please revert the patch if it causes any problems.

Richard


gcc/
	* config/aarch64/aarch64.cc (aarch64_output_sme_zero_za): Require
	all tiles to have the same suffix.

gcc/testsuite/
	* gcc.target/aarch64/sme/acle-asm/zero_mask_za.c (zero_mask_za_ab)
	(zero_mask_za_d7, zero_mask_za_bf): Expect a list of .d tiles instead
	of a mixture.
---
 gcc/config/aarch64/aarch64.cc                 | 20 +++++++++++--------
 .../aarch64/sme/acle-asm/zero_mask_za.c       |  6 +++---
 2 files changed, 15 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index a2e3d208d76..1beec94629d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -13210,29 +13210,33 @@  aarch64_output_sme_zero_za (rtx mask)
   /* The last entry in the list has the form "za7.d }", but that's the
      same length as "za7.d, ".  */
   static char buffer[sizeof("zero\t{ ") + sizeof ("za7.d, ") * 8 + 1];
-  unsigned int i = 0;
-  i += snprintf (buffer + i, sizeof (buffer) - i, "zero\t");
-  const char *prefix = "{ ";
   for (auto &tile : tiles)
     {
       unsigned int tile_mask = tile.mask;
       unsigned int tile_index = 0;
+      unsigned int i = snprintf (buffer, sizeof (buffer), "zero\t");
+      const char *prefix = "{ ";
+      auto remaining_mask = mask_val;
       while (tile_mask < 0x100)
 	{
-	  if ((mask_val & tile_mask) == tile_mask)
+	  if ((remaining_mask & tile_mask) == tile_mask)
 	    {
 	      i += snprintf (buffer + i, sizeof (buffer) - i, "%sza%d.%c",
 			     prefix, tile_index, tile.letter);
 	      prefix = ", ";
-	      mask_val &= ~tile_mask;
+	      remaining_mask &= ~tile_mask;
 	    }
 	  tile_mask <<= 1;
 	  tile_index += 1;
 	}
+      if (remaining_mask == 0)
+	{
+	  gcc_assert (i + 3 <= sizeof (buffer));
+	  snprintf (buffer + i, sizeof (buffer) - i, " }");
+	  return buffer;
+	}
     }
-  gcc_assert (mask_val == 0 && i + 3 <= sizeof (buffer));
-  snprintf (buffer + i, sizeof (buffer) - i, " }");
-  return buffer;
+  gcc_unreachable ();
 }
 
 /* Return size in bits of an arithmetic operand which is shifted/scaled and
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c
index 9ce7331ebdd..2ba8f8cc332 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c
@@ -103,21 +103,21 @@  PROTO (zero_mask_za_aa, void, ()) { svzero_mask_za (0xaa); }
 
 /*
 ** zero_mask_za_ab:
-**	zero	{ za1\.h, za0\.d }
+**	zero	{ za0\.d, za1\.d, za3\.d, za5\.d, za7\.d }
 **	ret
 */
 PROTO (zero_mask_za_ab, void, ()) { svzero_mask_za (0xab); }
 
 /*
 ** zero_mask_za_d7:
-**	zero	{ za0\.h, za1\.d, za7\.d }
+**	zero	{ za0\.d, za1\.d, za2\.d, za4\.d, za6\.d, za7\.d }
 **	ret
 */
 PROTO (zero_mask_za_d7, void, ()) { svzero_mask_za (0xd7); }
 
 /*
 ** zero_mask_za_bf:
-**	zero	{ za1\.h, za0\.s, za2\.d }
+**	zero	{ za0\.d, za1\.d, za2\.d, za3\.d, za4\.d, za5\.d, za7\.d }
 **	ret
 */
 PROTO (zero_mask_za_bf, void, ()) { svzero_mask_za (0xbf); }