Message ID | mpth68k8bo9.fsf@arm.com |
---|---|
State | New |
Headers | show |
Series | aarch64: Add support for SVE2.1 | expand |
Richard Sandiford <richard.sandiford@arm.com> writes: > Some instructions that were previously restricted to streaming mode > can also be used in non-streaming mode with SVE2.1. This patch adds > support for those, as well as the usual new-extension boilerplate. > A later patch will add the feature macro. > > gcc/ > * config/aarch64/aarch64-option-extensions.def (sve2p1): New extension. Gah, just realised that I forgot to document this :( Will fix. Richard > * config/aarch64/aarch64-sve-builtins-sve2.def: Mark instructions > that are common to both SVE2p1 and SME. > * config/aarch64/aarch64.h (TARGET_SVE2p1): New macro. > (TARGET_SVE2p1_OR_SME): Likewise. > * config/aarch64/aarch64-sve2.md > (@aarch64_sve_psel<BHSD_BITS>): Require TARGET_SVE2p1_OR_SME > instead of TARGET_STREAMING. > (*aarch64_sve_psel<BHSD_BITS>_plus): Likewise. > (@aarch64_sve_<su>clamp<mode>): Likewise. > (*aarch64_sve_<su>clamp<mode>_x): Likewise. > (@aarch64_pred_<optab><mode>): Likewise. > (@cond_<optab><mode>): Likewise. > > gcc/testsuite/ > * lib/target-supports.exp > (check_effective_target_aarch64_asm_sve2p1_ok): New procedure. > * gcc.target/aarch64/sve/clamp_1.c: New test. > * gcc.target/aarch64/sve/clamp_2.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/clamp_s16.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/clamp_s32.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/clamp_s64.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/clamp_s8.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/clamp_u16.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/clamp_u32.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/clamp_u64.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/clamp_u8.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/psel_lane_b16.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/psel_lane_b32.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/psel_lane_b64.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/psel_lane_b8.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/psel_lane_c16.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/psel_lane_c32.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/psel_lane_c64.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/psel_lane_c8.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_bf16.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_f16.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_f32.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_f64.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_s16.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_s32.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_s64.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_s8.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_u16.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_u32.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_u64.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/revd_u8.c: Likewise. > --- > .../aarch64/aarch64-option-extensions.def | 2 + > .../aarch64/aarch64-sve-builtins-sve2.def | 2 +- > gcc/config/aarch64/aarch64-sve2.md | 12 +-- > gcc/config/aarch64/aarch64.h | 9 ++ > .../gcc.target/aarch64/sve/clamp_1.c | 40 ++++++++ > .../gcc.target/aarch64/sve/clamp_2.c | 34 +++++++ > .../aarch64/sve2/acle/asm/clamp_s16.c | 46 +++++++++ > .../aarch64/sve2/acle/asm/clamp_s32.c | 46 +++++++++ > .../aarch64/sve2/acle/asm/clamp_s64.c | 46 +++++++++ > .../aarch64/sve2/acle/asm/clamp_s8.c | 46 +++++++++ > .../aarch64/sve2/acle/asm/clamp_u16.c | 46 +++++++++ > .../aarch64/sve2/acle/asm/clamp_u32.c | 46 +++++++++ > .../aarch64/sve2/acle/asm/clamp_u64.c | 46 +++++++++ > .../aarch64/sve2/acle/asm/clamp_u8.c | 46 +++++++++ > .../aarch64/sve2/acle/asm/psel_lane_b16.c | 93 +++++++++++++++++++ > .../aarch64/sve2/acle/asm/psel_lane_b32.c | 93 +++++++++++++++++++ > .../aarch64/sve2/acle/asm/psel_lane_b64.c | 84 +++++++++++++++++ > .../aarch64/sve2/acle/asm/psel_lane_b8.c | 93 +++++++++++++++++++ > .../aarch64/sve2/acle/asm/psel_lane_c16.c | 93 +++++++++++++++++++ > .../aarch64/sve2/acle/asm/psel_lane_c32.c | 93 +++++++++++++++++++ > .../aarch64/sve2/acle/asm/psel_lane_c64.c | 84 +++++++++++++++++ > .../aarch64/sve2/acle/asm/psel_lane_c8.c | 93 +++++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_bf16.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_f16.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_f32.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_f64.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_s16.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_s32.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_s64.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_s8.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_u16.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_u32.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_u64.c | 80 ++++++++++++++++ > .../aarch64/sve2/acle/asm/revd_u8.c | 80 ++++++++++++++++ > gcc/testsuite/lib/target-supports.exp | 10 ++ > 35 files changed, 2156 insertions(+), 7 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s32.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s64.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s8.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u32.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u64.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u8.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b32.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b64.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b8.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c32.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c64.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c8.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_bf16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f32.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f64.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s32.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s64.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s8.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u32.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u64.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u8.c > > diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def > index 8279f5a76ea..c9d419afc8f 100644 > --- a/gcc/config/aarch64/aarch64-option-extensions.def > +++ b/gcc/config/aarch64/aarch64-option-extensions.def > @@ -192,6 +192,8 @@ AARCH64_OPT_EXTENSION("sve2-sm4", SVE2_SM4, (SVE2, SM4), (), (), "svesm4") > > AARCH64_FMV_FEATURE("sve2-sm4", SVE_SM4, (SVE2_SM4)) > > +AARCH64_OPT_EXTENSION("sve2p1", SVE2p1, (SVE2), (), (), "") > + > AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") > > AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "") > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def > index 12548fe39cb..5cc32aa8871 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def > +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def > @@ -220,7 +220,7 @@ DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none) > DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none) > #undef REQUIRED_EXTENSIONS > > -#define REQUIRED_EXTENSIONS streaming_only (0) > +#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p1, 0) > DEF_SVE_FUNCTION (svclamp, clamp, all_integer, none) > DEF_SVE_FUNCTION (svpsel_lane, select_pred, all_pred_count, none) > DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz) > diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md > index a7b29daeba4..fd4bd42b6d9 100644 > --- a/gcc/config/aarch64/aarch64-sve2.md > +++ b/gcc/config/aarch64/aarch64-sve2.md > @@ -418,7 +418,7 @@ (define_insn "@aarch64_sve_psel<BHSD_BITS>" > (match_operand:SI 3 "register_operand" "Ucj") > (const_int BHSD_BITS)] > UNSPEC_PSEL))] > - "TARGET_STREAMING" > + "TARGET_SVE2p1_OR_SME" > "psel\t%0, %1, %2.<bits_etype>[%w3, 0]" > ) > > @@ -432,7 +432,7 @@ (define_insn "*aarch64_sve_psel<BHSD_BITS>_plus" > (match_operand:SI 4 "const_int_operand")) > (const_int BHSD_BITS)] > UNSPEC_PSEL))] > - "TARGET_STREAMING > + "TARGET_SVE2p1_OR_SME > && UINTVAL (operands[4]) < 128 / <BHSD_BITS>" > "psel\t%0, %1, %2.<bits_etype>[%w3, %4]" > ) > @@ -560,7 +560,7 @@ (define_insn "@aarch64_sve_<su>clamp<mode>" > (match_operand:SVE_FULL_I 1 "register_operand") > (match_operand:SVE_FULL_I 2 "register_operand")) > (match_operand:SVE_FULL_I 3 "register_operand")))] > - "TARGET_STREAMING" > + "TARGET_SVE2p1_OR_SME" > {@ [cons: =0, 1, 2, 3; attrs: movprfx] > [ w, %0, w, w; * ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> > [ ?&w, w, w, w; yes ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> > @@ -580,7 +580,7 @@ (define_insn_and_split "*aarch64_sve_<su>clamp<mode>_x" > UNSPEC_PRED_X) > (match_operand:SVE_FULL_I 3 "register_operand"))] > UNSPEC_PRED_X))] > - "TARGET_STREAMING" > + "TARGET_SVE2p1_OR_SME" > {@ [cons: =0, 1, 2, 3; attrs: movprfx] > [ w, %0, w, w; * ] # > [ ?&w, w, w, w; yes ] # > @@ -3182,7 +3182,7 @@ (define_insn "@aarch64_pred_<optab><mode>" > [(match_operand:SVE_FULL 2 "register_operand")] > UNSPEC_REVD_ONLY)] > UNSPEC_PRED_X))] > - "TARGET_STREAMING" > + "TARGET_SVE2p1_OR_SME" > {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] > [ w , Upl , 0 ; * ] revd\t%0.q, %1/m, %2.q > [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;revd\t%0.q, %1/m, %2.q > @@ -3198,7 +3198,7 @@ (define_insn "@cond_<optab><mode>" > UNSPEC_REVD_ONLY) > (match_operand:SVE_FULL 3 "register_operand")] > UNSPEC_SEL))] > - "TARGET_STREAMING" > + "TARGET_SVE2p1_OR_SME" > {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] > [ w , Upl , w , 0 ; * ] revd\t%0.q, %1/m, %2.q > [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;revd\t%0.q, %1/m, %2.q > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index d17f40ce22e..404efa16c28 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -338,6 +338,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED > /* SVE2 SM4 instructions, enabled through +sve2-sm4. */ > #define TARGET_SVE2_SM4 (AARCH64_HAVE_ISA (SVE2_SM4) && TARGET_NON_STREAMING) > > +/* SVE2p1 instructions, enabled through +sve2p1. */ > +#define TARGET_SVE2p1 AARCH64_HAVE_ISA (SVE2p1) > + > /* SME instructions, enabled through +sme. Note that this does not > imply anything about the state of PSTATE.SM; instructions that require > SME and streaming mode should use TARGET_STREAMING instead. */ > @@ -481,6 +484,12 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED > /* fp8 instructions are enabled through +fp8. */ > #define TARGET_FP8 AARCH64_HAVE_ISA (FP8) > > +/* Combinatorial tests. */ > + > +/* There's no need to check TARGET_SME for streaming or streaming-compatible > + functions, since streaming mode itself implies SME. */ > +#define TARGET_SVE2p1_OR_SME (TARGET_SVE2p1 || TARGET_STREAMING) > + > /* Standard register usage. */ > > /* 31 64-bit general purpose registers R0-R30: > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c > new file mode 100644 > index 00000000000..92fef098865 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c > @@ -0,0 +1,40 @@ > +// { dg-options "-O" } > + > +#include <arm_sve.h> > + > +#pragma GCC target "+sve2p1" > + > +#define TEST(TYPE) \ > + TYPE \ > + tied1_##TYPE(TYPE a, TYPE b, TYPE c) \ > + { \ > + return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), a, b), c); \ > + } \ > + \ > + TYPE \ > + tied2_##TYPE(TYPE a, TYPE b, TYPE c) \ > + { \ > + return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, a), c); \ > + } > + > +TEST(svint8_t) > +TEST(svint16_t) > +TEST(svint32_t) > +TEST(svint64_t) > + > +TEST(svuint8_t) > +TEST(svuint16_t) > +TEST(svuint32_t) > +TEST(svuint64_t) > + > +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */ > + > +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c > new file mode 100644 > index 00000000000..f96c0046465 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c > @@ -0,0 +1,34 @@ > +// { dg-options "-O" } > + > +#include <arm_sve.h> > + > +#pragma GCC target "+sve2p1" > + > +#define TEST(TYPE) \ > + TYPE \ > + untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) \ > + { \ > + return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, c), d); \ > + } > + > +TEST(svint8_t) > +TEST(svint16_t) > +TEST(svint32_t) > +TEST(svint64_t) > + > +TEST(svuint8_t) > +TEST(svuint16_t) > +TEST(svuint32_t) > +TEST(svuint64_t) > + > +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */ > + > +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 8 } } */ > diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp > index 75703ddca60..a8833d585c6 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -12100,6 +12100,16 @@ foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" > }] > } > > +proc check_effective_target_aarch64_asm_sve2p1_ok { } { > + if { [istarget aarch64*-*-*] } { > + return [check_no_compiler_messages aarch64_sve2p1_assembler object { > + __asm__ (".arch_extension sve2p1; ld1w {z0.q},p7/z,[x0]"); > + } "-march=armv8-a+sve2p1"] > + } else { > + return 0 > + } > +} > + > proc check_effective_target_aarch64_small { } { > if { [istarget aarch64*-*-*] } { > return [check_no_compiler_messages aarch64_small object {
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 8279f5a76ea..c9d419afc8f 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -192,6 +192,8 @@ AARCH64_OPT_EXTENSION("sve2-sm4", SVE2_SM4, (SVE2, SM4), (), (), "svesm4") AARCH64_FMV_FEATURE("sve2-sm4", SVE_SM4, (SVE2_SM4)) +AARCH64_OPT_EXTENSION("sve2p1", SVE2p1, (SVE2), (), (), "") + AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "") diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index 12548fe39cb..5cc32aa8871 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -220,7 +220,7 @@ DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none) DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none) #undef REQUIRED_EXTENSIONS -#define REQUIRED_EXTENSIONS streaming_only (0) +#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p1, 0) DEF_SVE_FUNCTION (svclamp, clamp, all_integer, none) DEF_SVE_FUNCTION (svpsel_lane, select_pred, all_pred_count, none) DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index a7b29daeba4..fd4bd42b6d9 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -418,7 +418,7 @@ (define_insn "@aarch64_sve_psel<BHSD_BITS>" (match_operand:SI 3 "register_operand" "Ucj") (const_int BHSD_BITS)] UNSPEC_PSEL))] - "TARGET_STREAMING" + "TARGET_SVE2p1_OR_SME" "psel\t%0, %1, %2.<bits_etype>[%w3, 0]" ) @@ -432,7 +432,7 @@ (define_insn "*aarch64_sve_psel<BHSD_BITS>_plus" (match_operand:SI 4 "const_int_operand")) (const_int BHSD_BITS)] UNSPEC_PSEL))] - "TARGET_STREAMING + "TARGET_SVE2p1_OR_SME && UINTVAL (operands[4]) < 128 / <BHSD_BITS>" "psel\t%0, %1, %2.<bits_etype>[%w3, %4]" ) @@ -560,7 +560,7 @@ (define_insn "@aarch64_sve_<su>clamp<mode>" (match_operand:SVE_FULL_I 1 "register_operand") (match_operand:SVE_FULL_I 2 "register_operand")) (match_operand:SVE_FULL_I 3 "register_operand")))] - "TARGET_STREAMING" + "TARGET_SVE2p1_OR_SME" {@ [cons: =0, 1, 2, 3; attrs: movprfx] [ w, %0, w, w; * ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> [ ?&w, w, w, w; yes ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> @@ -580,7 +580,7 @@ (define_insn_and_split "*aarch64_sve_<su>clamp<mode>_x" UNSPEC_PRED_X) (match_operand:SVE_FULL_I 3 "register_operand"))] UNSPEC_PRED_X))] - "TARGET_STREAMING" + "TARGET_SVE2p1_OR_SME" {@ [cons: =0, 1, 2, 3; attrs: movprfx] [ w, %0, w, w; * ] # [ ?&w, w, w, w; yes ] # @@ -3182,7 +3182,7 @@ (define_insn "@aarch64_pred_<optab><mode>" [(match_operand:SVE_FULL 2 "register_operand")] UNSPEC_REVD_ONLY)] UNSPEC_PRED_X))] - "TARGET_STREAMING" + "TARGET_SVE2p1_OR_SME" {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] [ w , Upl , 0 ; * ] revd\t%0.q, %1/m, %2.q [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;revd\t%0.q, %1/m, %2.q @@ -3198,7 +3198,7 @@ (define_insn "@cond_<optab><mode>" UNSPEC_REVD_ONLY) (match_operand:SVE_FULL 3 "register_operand")] UNSPEC_SEL))] - "TARGET_STREAMING" + "TARGET_SVE2p1_OR_SME" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] [ w , Upl , w , 0 ; * ] revd\t%0.q, %1/m, %2.q [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;revd\t%0.q, %1/m, %2.q diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index d17f40ce22e..404efa16c28 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -338,6 +338,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* SVE2 SM4 instructions, enabled through +sve2-sm4. */ #define TARGET_SVE2_SM4 (AARCH64_HAVE_ISA (SVE2_SM4) && TARGET_NON_STREAMING) +/* SVE2p1 instructions, enabled through +sve2p1. */ +#define TARGET_SVE2p1 AARCH64_HAVE_ISA (SVE2p1) + /* SME instructions, enabled through +sme. Note that this does not imply anything about the state of PSTATE.SM; instructions that require SME and streaming mode should use TARGET_STREAMING instead. */ @@ -481,6 +484,12 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* fp8 instructions are enabled through +fp8. */ #define TARGET_FP8 AARCH64_HAVE_ISA (FP8) +/* Combinatorial tests. */ + +/* There's no need to check TARGET_SME for streaming or streaming-compatible + functions, since streaming mode itself implies SME. */ +#define TARGET_SVE2p1_OR_SME (TARGET_SVE2p1 || TARGET_STREAMING) + /* Standard register usage. */ /* 31 64-bit general purpose registers R0-R30: diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c new file mode 100644 index 00000000000..92fef098865 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c @@ -0,0 +1,40 @@ +// { dg-options "-O" } + +#include <arm_sve.h> + +#pragma GCC target "+sve2p1" + +#define TEST(TYPE) \ + TYPE \ + tied1_##TYPE(TYPE a, TYPE b, TYPE c) \ + { \ + return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), a, b), c); \ + } \ + \ + TYPE \ + tied2_##TYPE(TYPE a, TYPE b, TYPE c) \ + { \ + return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, a), c); \ + } + +TEST(svint8_t) +TEST(svint16_t) +TEST(svint32_t) +TEST(svint64_t) + +TEST(svuint8_t) +TEST(svuint16_t) +TEST(svuint32_t) +TEST(svuint64_t) + +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c new file mode 100644 index 00000000000..f96c0046465 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c @@ -0,0 +1,34 @@ +// { dg-options "-O" } + +#include <arm_sve.h> + +#pragma GCC target "+sve2p1" + +#define TEST(TYPE) \ + TYPE \ + untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, c), d); \ + } + +TEST(svint8_t) +TEST(svint16_t) +TEST(svint32_t) +TEST(svint64_t) + +TEST(svuint8_t) +TEST(svuint16_t) +TEST(svuint32_t) +TEST(svuint64_t) + +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 8 } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 75703ddca60..a8833d585c6 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -12100,6 +12100,16 @@ foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" }] } +proc check_effective_target_aarch64_asm_sve2p1_ok { } { + if { [istarget aarch64*-*-*] } { + return [check_no_compiler_messages aarch64_sve2p1_assembler object { + __asm__ (".arch_extension sve2p1; ld1w {z0.q},p7/z,[x0]"); + } "-march=armv8-a+sve2p1"] + } else { + return 0 + } +} + proc check_effective_target_aarch64_small { } { if { [istarget aarch64*-*-*] } { return [check_no_compiler_messages aarch64_small object {