diff mbox series

[1/3] aarch64: Restrict FCLAMP to SME2

Message ID 20241106102641.511909-2-richard.sandiford@arm.com
State New
Headers show
Series aarch64: Fix various issues with the SME support | expand

Commit Message

Richard Sandiford Nov. 6, 2024, 10:26 a.m. UTC
There are two sets of patterns for FCLAMP: one set for single registers
and one set for multiple registers.  The multiple-register set was
correctly gated on SME2, but the single-register set only required SME.
This doesn't matter for ACLE usage, since the intrinsic definitions
are correctly gated.  But it does matter for automatic generation of
FCLAMP from separate minimum and maximum operations (either ACLE
intrinsics or autovectorised code).

gcc/
	* config/aarch64/aarch64-sve2.md (@aarch64_sve_fclamp<mode>)
	(*aarch64_sve_fclamp<mode>_x): Require TARGET_STREAMING_SME2
	rather than TARGET_STREAMING_SME.

gcc/testsuite/
	* gcc.target/aarch64/sme/clamp_3.c: Force sme2
	* gcc.target/aarch64/sme/clamp_4.c: Likewise.
	* gcc.target/aarch64/sme/clamp_5.c: New test.
---
 gcc/config/aarch64/aarch64-sve2.md            |  4 ++--
 .../gcc.target/aarch64/sme/clamp_3.c          |  2 ++
 .../gcc.target/aarch64/sme/clamp_4.c          |  2 ++
 .../gcc.target/aarch64/sme/clamp_5.c          | 24 +++++++++++++++++++
 4 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 8047f405a17..08f83fc7ca0 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1117,7 +1117,7 @@  (define_insn "@aarch64_sve_fclamp<mode>"
 	     UNSPEC_FMAXNM)
 	   (match_operand:SVE_FULL_F 3 "register_operand")]
 	  UNSPEC_FMINNM))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
      [       w, %0, w, w; *             ] fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
      [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
@@ -1137,7 +1137,7 @@  (define_insn_and_split "*aarch64_sve_fclamp<mode>_x"
 	     UNSPEC_COND_FMAXNM)
 	   (match_operand:SVE_FULL_F 3 "register_operand")]
 	  UNSPEC_COND_FMINNM))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
      [       w, %0, w, w; *             ] #
      [     ?&w,  w, w, w; yes           ] #
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
index 44959f79490..162de6224d5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
@@ -2,6 +2,8 @@ 
 
 #include <arm_sme.h>
 
+#pragma GCC target "+sme2"
+
 #define TEST(TYPE)							\
   TYPE									\
   tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming			\
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
index 643b2635b90..453c82cd860 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
@@ -2,6 +2,8 @@ 
 
 #include <arm_sme.h>
 
+#pragma GCC target "+sme2"
+
 #define TEST(TYPE)							\
   TYPE									\
   untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) __arm_streaming		\
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
new file mode 100644
index 00000000000..7c5464bdc36
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
@@ -0,0 +1,24 @@ 
+// { dg-options "-O" }
+
+#include <arm_sme.h>
+
+#pragma GCC target "+nosme2"
+
+#define TEST(TYPE)							\
+  TYPE									\
+  tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming			\
+  {									\
+    return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), a, b), c);	\
+  }									\
+									\
+  TYPE									\
+  tied2_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming			\
+  {									\
+    return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, a), c);	\
+  }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-not {\tfclamp\t} } } */