diff mbox series

[12/15] aarch64: Add common subset of SVE2p1 and SME

Message ID mpth68k8bo9.fsf@arm.com
State New
Headers show
Series aarch64: Add support for SVE2.1 | expand

Commit Message

Richard Sandiford Nov. 6, 2024, 6:22 p.m. UTC
Some instructions that were previously restricted to streaming mode
can also be used in non-streaming mode with SVE2.1.  This patch adds
support for those, as well as the usual new-extension boilerplate.
A later patch will add the feature macro.

gcc/
	* config/aarch64/aarch64-option-extensions.def (sve2p1): New extension.
	* config/aarch64/aarch64-sve-builtins-sve2.def: Mark instructions
	that are common to both SVE2p1 and SME.
	* config/aarch64/aarch64.h (TARGET_SVE2p1): New macro.
	(TARGET_SVE2p1_OR_SME): Likewise.
	* config/aarch64/aarch64-sve2.md
	(@aarch64_sve_psel<BHSD_BITS>): Require TARGET_SVE2p1_OR_SME
	instead of TARGET_STREAMING.
	(*aarch64_sve_psel<BHSD_BITS>_plus): Likewise.
	(@aarch64_sve_<su>clamp<mode>): Likewise.
	(*aarch64_sve_<su>clamp<mode>_x): Likewise.
	(@aarch64_pred_<optab><mode>): Likewise.
	(@cond_<optab><mode>): Likewise.

gcc/testsuite/
	* lib/target-supports.exp
	(check_effective_target_aarch64_asm_sve2p1_ok): New procedure.
	* gcc.target/aarch64/sve/clamp_1.c: New test.
	* gcc.target/aarch64/sve/clamp_2.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/clamp_s16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/clamp_s32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/clamp_s64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/clamp_s8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/clamp_u16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/clamp_u32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/clamp_u64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/clamp_u8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/psel_lane_b16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/psel_lane_b32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/psel_lane_b64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/psel_lane_b8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/psel_lane_c16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/psel_lane_c32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/psel_lane_c64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/psel_lane_c8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_bf16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_f16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_f32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_f64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_s16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_s32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_s64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_s8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_u16.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_u32.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_u64.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/revd_u8.c: Likewise.
---
 .../aarch64/aarch64-option-extensions.def     |  2 +
 .../aarch64/aarch64-sve-builtins-sve2.def     |  2 +-
 gcc/config/aarch64/aarch64-sve2.md            | 12 +--
 gcc/config/aarch64/aarch64.h                  |  9 ++
 .../gcc.target/aarch64/sve/clamp_1.c          | 40 ++++++++
 .../gcc.target/aarch64/sve/clamp_2.c          | 34 +++++++
 .../aarch64/sve2/acle/asm/clamp_s16.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_s32.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_s64.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_s8.c          | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_u16.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_u32.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_u64.c         | 46 +++++++++
 .../aarch64/sve2/acle/asm/clamp_u8.c          | 46 +++++++++
 .../aarch64/sve2/acle/asm/psel_lane_b16.c     | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_b32.c     | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_b64.c     | 84 +++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_b8.c      | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_c16.c     | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_c32.c     | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_c64.c     | 84 +++++++++++++++++
 .../aarch64/sve2/acle/asm/psel_lane_c8.c      | 93 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_bf16.c         | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_f16.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_f32.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_f64.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_s16.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_s32.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_s64.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_s8.c           | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_u16.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_u32.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_u64.c          | 80 ++++++++++++++++
 .../aarch64/sve2/acle/asm/revd_u8.c           | 80 ++++++++++++++++
 gcc/testsuite/lib/target-supports.exp         | 10 ++
 35 files changed, 2156 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_bf16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u32.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u64.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u8.c

Comments

Richard Sandiford Nov. 6, 2024, 7:32 p.m. UTC | #1
Richard Sandiford <richard.sandiford@arm.com> writes:
> Some instructions that were previously restricted to streaming mode
> can also be used in non-streaming mode with SVE2.1.  This patch adds
> support for those, as well as the usual new-extension boilerplate.
> A later patch will add the feature macro.
>
> gcc/
> 	* config/aarch64/aarch64-option-extensions.def (sve2p1): New extension.

Gah, just realised that I forgot to document this :(  Will fix.

Richard

> 	* config/aarch64/aarch64-sve-builtins-sve2.def: Mark instructions
> 	that are common to both SVE2p1 and SME.
> 	* config/aarch64/aarch64.h (TARGET_SVE2p1): New macro.
> 	(TARGET_SVE2p1_OR_SME): Likewise.
> 	* config/aarch64/aarch64-sve2.md
> 	(@aarch64_sve_psel<BHSD_BITS>): Require TARGET_SVE2p1_OR_SME
> 	instead of TARGET_STREAMING.
> 	(*aarch64_sve_psel<BHSD_BITS>_plus): Likewise.
> 	(@aarch64_sve_<su>clamp<mode>): Likewise.
> 	(*aarch64_sve_<su>clamp<mode>_x): Likewise.
> 	(@aarch64_pred_<optab><mode>): Likewise.
> 	(@cond_<optab><mode>): Likewise.
>
> gcc/testsuite/
> 	* lib/target-supports.exp
> 	(check_effective_target_aarch64_asm_sve2p1_ok): New procedure.
> 	* gcc.target/aarch64/sve/clamp_1.c: New test.
> 	* gcc.target/aarch64/sve/clamp_2.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/clamp_s16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/clamp_s32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/clamp_s64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/clamp_s8.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/clamp_u16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/clamp_u32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/clamp_u64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/clamp_u8.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/psel_lane_b16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/psel_lane_b32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/psel_lane_b64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/psel_lane_b8.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/psel_lane_c16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/psel_lane_c32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/psel_lane_c64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/psel_lane_c8.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_bf16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_f16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_f32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_f64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_s16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_s32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_s64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_s8.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_u16.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_u32.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_u64.c: Likewise.
> 	* gcc.target/aarch64/sve2/acle/asm/revd_u8.c: Likewise.
> ---
>  .../aarch64/aarch64-option-extensions.def     |  2 +
>  .../aarch64/aarch64-sve-builtins-sve2.def     |  2 +-
>  gcc/config/aarch64/aarch64-sve2.md            | 12 +--
>  gcc/config/aarch64/aarch64.h                  |  9 ++
>  .../gcc.target/aarch64/sve/clamp_1.c          | 40 ++++++++
>  .../gcc.target/aarch64/sve/clamp_2.c          | 34 +++++++
>  .../aarch64/sve2/acle/asm/clamp_s16.c         | 46 +++++++++
>  .../aarch64/sve2/acle/asm/clamp_s32.c         | 46 +++++++++
>  .../aarch64/sve2/acle/asm/clamp_s64.c         | 46 +++++++++
>  .../aarch64/sve2/acle/asm/clamp_s8.c          | 46 +++++++++
>  .../aarch64/sve2/acle/asm/clamp_u16.c         | 46 +++++++++
>  .../aarch64/sve2/acle/asm/clamp_u32.c         | 46 +++++++++
>  .../aarch64/sve2/acle/asm/clamp_u64.c         | 46 +++++++++
>  .../aarch64/sve2/acle/asm/clamp_u8.c          | 46 +++++++++
>  .../aarch64/sve2/acle/asm/psel_lane_b16.c     | 93 +++++++++++++++++++
>  .../aarch64/sve2/acle/asm/psel_lane_b32.c     | 93 +++++++++++++++++++
>  .../aarch64/sve2/acle/asm/psel_lane_b64.c     | 84 +++++++++++++++++
>  .../aarch64/sve2/acle/asm/psel_lane_b8.c      | 93 +++++++++++++++++++
>  .../aarch64/sve2/acle/asm/psel_lane_c16.c     | 93 +++++++++++++++++++
>  .../aarch64/sve2/acle/asm/psel_lane_c32.c     | 93 +++++++++++++++++++
>  .../aarch64/sve2/acle/asm/psel_lane_c64.c     | 84 +++++++++++++++++
>  .../aarch64/sve2/acle/asm/psel_lane_c8.c      | 93 +++++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_bf16.c         | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_f16.c          | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_f32.c          | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_f64.c          | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_s16.c          | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_s32.c          | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_s64.c          | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_s8.c           | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_u16.c          | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_u32.c          | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_u64.c          | 80 ++++++++++++++++
>  .../aarch64/sve2/acle/asm/revd_u8.c           | 80 ++++++++++++++++
>  gcc/testsuite/lib/target-supports.exp         | 10 ++
>  35 files changed, 2156 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s16.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s32.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s64.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_s8.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u16.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u32.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u64.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_u8.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b16.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b32.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b64.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_b8.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c16.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c32.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c64.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/psel_lane_c8.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_bf16.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f16.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f32.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_f64.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s16.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s32.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s64.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_s8.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u16.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u32.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u64.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_u8.c
>
> diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
> index 8279f5a76ea..c9d419afc8f 100644
> --- a/gcc/config/aarch64/aarch64-option-extensions.def
> +++ b/gcc/config/aarch64/aarch64-option-extensions.def
> @@ -192,6 +192,8 @@ AARCH64_OPT_EXTENSION("sve2-sm4", SVE2_SM4, (SVE2, SM4), (), (), "svesm4")
>  
>  AARCH64_FMV_FEATURE("sve2-sm4", SVE_SM4, (SVE2_SM4))
>  
> +AARCH64_OPT_EXTENSION("sve2p1", SVE2p1, (SVE2), (), (), "")
> +
>  AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme")
>  
>  AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "")
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> index 12548fe39cb..5cc32aa8871 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> @@ -220,7 +220,7 @@ DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
>  DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
>  #undef REQUIRED_EXTENSIONS
>  
> -#define REQUIRED_EXTENSIONS streaming_only (0)
> +#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p1, 0)
>  DEF_SVE_FUNCTION (svclamp, clamp, all_integer, none)
>  DEF_SVE_FUNCTION (svpsel_lane, select_pred, all_pred_count, none)
>  DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz)
> diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
> index a7b29daeba4..fd4bd42b6d9 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -418,7 +418,7 @@ (define_insn "@aarch64_sve_psel<BHSD_BITS>"
>  	   (match_operand:SI 3 "register_operand" "Ucj")
>  	   (const_int BHSD_BITS)]
>  	  UNSPEC_PSEL))]
> -  "TARGET_STREAMING"
> +  "TARGET_SVE2p1_OR_SME"
>    "psel\t%0, %1, %2.<bits_etype>[%w3, 0]"
>  )
>  
> @@ -432,7 +432,7 @@ (define_insn "*aarch64_sve_psel<BHSD_BITS>_plus"
>  	     (match_operand:SI 4 "const_int_operand"))
>  	   (const_int BHSD_BITS)]
>  	  UNSPEC_PSEL))]
> -  "TARGET_STREAMING
> +  "TARGET_SVE2p1_OR_SME
>     && UINTVAL (operands[4]) < 128 / <BHSD_BITS>"
>    "psel\t%0, %1, %2.<bits_etype>[%w3, %4]"
>  )
> @@ -560,7 +560,7 @@ (define_insn "@aarch64_sve_<su>clamp<mode>"
>  	    (match_operand:SVE_FULL_I 1 "register_operand")
>  	    (match_operand:SVE_FULL_I 2 "register_operand"))
>  	  (match_operand:SVE_FULL_I 3 "register_operand")))]
> -  "TARGET_STREAMING"
> +  "TARGET_SVE2p1_OR_SME"
>    {@ [cons: =0,  1, 2, 3; attrs: movprfx]
>       [       w, %0, w, w; *             ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
>       [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
> @@ -580,7 +580,7 @@ (define_insn_and_split "*aarch64_sve_<su>clamp<mode>_x"
>  	       UNSPEC_PRED_X)
>  	     (match_operand:SVE_FULL_I 3 "register_operand"))]
>  	  UNSPEC_PRED_X))]
> -  "TARGET_STREAMING"
> +  "TARGET_SVE2p1_OR_SME"
>    {@ [cons: =0,  1, 2, 3; attrs: movprfx]
>       [       w, %0, w, w; *             ] #
>       [     ?&w,  w, w, w; yes           ] #
> @@ -3182,7 +3182,7 @@ (define_insn "@aarch64_pred_<optab><mode>"
>  	     [(match_operand:SVE_FULL 2 "register_operand")]
>  	     UNSPEC_REVD_ONLY)]
>  	  UNSPEC_PRED_X))]
> -  "TARGET_STREAMING"
> +  "TARGET_SVE2p1_OR_SME"
>    {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
>       [ w        , Upl , 0 ; *              ] revd\t%0.q, %1/m, %2.q
>       [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;revd\t%0.q, %1/m, %2.q
> @@ -3198,7 +3198,7 @@ (define_insn "@cond_<optab><mode>"
>  	     UNSPEC_REVD_ONLY)
>  	   (match_operand:SVE_FULL 3 "register_operand")]
>  	  UNSPEC_SEL))]
> -  "TARGET_STREAMING"
> +  "TARGET_SVE2p1_OR_SME"
>    {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
>       [ w        , Upl , w , 0  ; *              ] revd\t%0.q, %1/m, %2.q
>       [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;revd\t%0.q, %1/m, %2.q
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index d17f40ce22e..404efa16c28 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -338,6 +338,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
>  /* SVE2 SM4 instructions, enabled through +sve2-sm4.  */
>  #define TARGET_SVE2_SM4 (AARCH64_HAVE_ISA (SVE2_SM4) && TARGET_NON_STREAMING)
>  
> +/* SVE2p1 instructions, enabled through +sve2p1.  */
> +#define TARGET_SVE2p1 AARCH64_HAVE_ISA (SVE2p1)
> +
>  /* SME instructions, enabled through +sme.  Note that this does not
>     imply anything about the state of PSTATE.SM; instructions that require
>     SME and streaming mode should use TARGET_STREAMING instead.  */
> @@ -481,6 +484,12 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
>  /* fp8 instructions are enabled through +fp8.  */
>  #define TARGET_FP8 AARCH64_HAVE_ISA (FP8)
>  
> +/* Combinatorial tests.  */
> +
> +/* There's no need to check TARGET_SME for streaming or streaming-compatible
> +   functions, since streaming mode itself implies SME.  */
> +#define TARGET_SVE2p1_OR_SME (TARGET_SVE2p1 || TARGET_STREAMING)
> +
>  /* Standard register usage.  */
>  
>  /* 31 64-bit general purpose registers R0-R30:
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c
> new file mode 100644
> index 00000000000..92fef098865
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c
> @@ -0,0 +1,40 @@
> +// { dg-options "-O" }
> +
> +#include <arm_sve.h>
> +
> +#pragma GCC target "+sve2p1"
> +
> +#define TEST(TYPE)							\
> +  TYPE									\
> +  tied1_##TYPE(TYPE a, TYPE b, TYPE c)					\
> +  {									\
> +    return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), a, b), c);	\
> +  }									\
> +									\
> +  TYPE									\
> +  tied2_##TYPE(TYPE a, TYPE b, TYPE c)					\
> +  {									\
> +    return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, a), c);	\
> +  }
> +
> +TEST(svint8_t)
> +TEST(svint16_t)
> +TEST(svint32_t)
> +TEST(svint64_t)
> +
> +TEST(svuint8_t)
> +TEST(svuint16_t)
> +TEST(svuint32_t)
> +TEST(svuint64_t)
> +
> +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
> +
> +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
> +
> +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c
> new file mode 100644
> index 00000000000..f96c0046465
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c
> @@ -0,0 +1,34 @@
> +// { dg-options "-O" }
> +
> +#include <arm_sve.h>
> +
> +#pragma GCC target "+sve2p1"
> +
> +#define TEST(TYPE)							\
> +  TYPE									\
> +  untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d)				\
> +  {									\
> +    return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, c), d);	\
> +  }
> +
> +TEST(svint8_t)
> +TEST(svint16_t)
> +TEST(svint32_t)
> +TEST(svint64_t)
> +
> +TEST(svuint8_t)
> +TEST(svuint16_t)
> +TEST(svuint32_t)
> +TEST(svuint64_t)
> +
> +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
> +
> +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
> +
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 8 } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 75703ddca60..a8833d585c6 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -12100,6 +12100,16 @@ foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
>      }]
>  }
>  
> +proc check_effective_target_aarch64_asm_sve2p1_ok { } {
> +    if { [istarget aarch64*-*-*] } {
> +	return [check_no_compiler_messages aarch64_sve2p1_assembler object {
> +	    __asm__ (".arch_extension sve2p1; ld1w {z0.q},p7/z,[x0]");
> +	} "-march=armv8-a+sve2p1"]
> +    } else {
> +	return 0
> +    }
> +}
> +
>  proc check_effective_target_aarch64_small { } {
>      if { [istarget aarch64*-*-*] } {
>  	return [check_no_compiler_messages aarch64_small object {
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 8279f5a76ea..c9d419afc8f 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -192,6 +192,8 @@  AARCH64_OPT_EXTENSION("sve2-sm4", SVE2_SM4, (SVE2, SM4), (), (), "svesm4")
 
 AARCH64_FMV_FEATURE("sve2-sm4", SVE_SM4, (SVE2_SM4))
 
+AARCH64_OPT_EXTENSION("sve2p1", SVE2p1, (SVE2), (), (), "")
+
 AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme")
 
 AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "")
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 12548fe39cb..5cc32aa8871 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -220,7 +220,7 @@  DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
 DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS streaming_only (0)
+#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p1, 0)
 DEF_SVE_FUNCTION (svclamp, clamp, all_integer, none)
 DEF_SVE_FUNCTION (svpsel_lane, select_pred, all_pred_count, none)
 DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz)
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index a7b29daeba4..fd4bd42b6d9 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -418,7 +418,7 @@  (define_insn "@aarch64_sve_psel<BHSD_BITS>"
 	   (match_operand:SI 3 "register_operand" "Ucj")
 	   (const_int BHSD_BITS)]
 	  UNSPEC_PSEL))]
-  "TARGET_STREAMING"
+  "TARGET_SVE2p1_OR_SME"
   "psel\t%0, %1, %2.<bits_etype>[%w3, 0]"
 )
 
@@ -432,7 +432,7 @@  (define_insn "*aarch64_sve_psel<BHSD_BITS>_plus"
 	     (match_operand:SI 4 "const_int_operand"))
 	   (const_int BHSD_BITS)]
 	  UNSPEC_PSEL))]
-  "TARGET_STREAMING
+  "TARGET_SVE2p1_OR_SME
    && UINTVAL (operands[4]) < 128 / <BHSD_BITS>"
   "psel\t%0, %1, %2.<bits_etype>[%w3, %4]"
 )
@@ -560,7 +560,7 @@  (define_insn "@aarch64_sve_<su>clamp<mode>"
 	    (match_operand:SVE_FULL_I 1 "register_operand")
 	    (match_operand:SVE_FULL_I 2 "register_operand"))
 	  (match_operand:SVE_FULL_I 3 "register_operand")))]
-  "TARGET_STREAMING"
+  "TARGET_SVE2p1_OR_SME"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
      [       w, %0, w, w; *             ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
      [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
@@ -580,7 +580,7 @@  (define_insn_and_split "*aarch64_sve_<su>clamp<mode>_x"
 	       UNSPEC_PRED_X)
 	     (match_operand:SVE_FULL_I 3 "register_operand"))]
 	  UNSPEC_PRED_X))]
-  "TARGET_STREAMING"
+  "TARGET_SVE2p1_OR_SME"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
      [       w, %0, w, w; *             ] #
      [     ?&w,  w, w, w; yes           ] #
@@ -3182,7 +3182,7 @@  (define_insn "@aarch64_pred_<optab><mode>"
 	     [(match_operand:SVE_FULL 2 "register_operand")]
 	     UNSPEC_REVD_ONLY)]
 	  UNSPEC_PRED_X))]
-  "TARGET_STREAMING"
+  "TARGET_SVE2p1_OR_SME"
   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
      [ w        , Upl , 0 ; *              ] revd\t%0.q, %1/m, %2.q
      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;revd\t%0.q, %1/m, %2.q
@@ -3198,7 +3198,7 @@  (define_insn "@cond_<optab><mode>"
 	     UNSPEC_REVD_ONLY)
 	   (match_operand:SVE_FULL 3 "register_operand")]
 	  UNSPEC_SEL))]
-  "TARGET_STREAMING"
+  "TARGET_SVE2p1_OR_SME"
   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
      [ w        , Upl , w , 0  ; *              ] revd\t%0.q, %1/m, %2.q
      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;revd\t%0.q, %1/m, %2.q
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index d17f40ce22e..404efa16c28 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -338,6 +338,9 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* SVE2 SM4 instructions, enabled through +sve2-sm4.  */
 #define TARGET_SVE2_SM4 (AARCH64_HAVE_ISA (SVE2_SM4) && TARGET_NON_STREAMING)
 
+/* SVE2p1 instructions, enabled through +sve2p1.  */
+#define TARGET_SVE2p1 AARCH64_HAVE_ISA (SVE2p1)
+
 /* SME instructions, enabled through +sme.  Note that this does not
    imply anything about the state of PSTATE.SM; instructions that require
    SME and streaming mode should use TARGET_STREAMING instead.  */
@@ -481,6 +484,12 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* fp8 instructions are enabled through +fp8.  */
 #define TARGET_FP8 AARCH64_HAVE_ISA (FP8)
 
+/* Combinatorial tests.  */
+
+/* There's no need to check TARGET_SME for streaming or streaming-compatible
+   functions, since streaming mode itself implies SME.  */
+#define TARGET_SVE2p1_OR_SME (TARGET_SVE2p1 || TARGET_STREAMING)
+
 /* Standard register usage.  */
 
 /* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c
new file mode 100644
index 00000000000..92fef098865
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c
@@ -0,0 +1,40 @@ 
+// { dg-options "-O" }
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p1"
+
+#define TEST(TYPE)							\
+  TYPE									\
+  tied1_##TYPE(TYPE a, TYPE b, TYPE c)					\
+  {									\
+    return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), a, b), c);	\
+  }									\
+									\
+  TYPE									\
+  tied2_##TYPE(TYPE a, TYPE b, TYPE c)					\
+  {									\
+    return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, a), c);	\
+  }
+
+TEST(svint8_t)
+TEST(svint16_t)
+TEST(svint32_t)
+TEST(svint64_t)
+
+TEST(svuint8_t)
+TEST(svuint16_t)
+TEST(svuint32_t)
+TEST(svuint64_t)
+
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z1\.b, z2\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c
new file mode 100644
index 00000000000..f96c0046465
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c
@@ -0,0 +1,34 @@ 
+// { dg-options "-O" }
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p1"
+
+#define TEST(TYPE)							\
+  TYPE									\
+  untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d)				\
+  {									\
+    return svmin_x(svptrue_b8(), svmax_x(svptrue_b8(), b, c), d);	\
+  }
+
+TEST(svint8_t)
+TEST(svint16_t)
+TEST(svint32_t)
+TEST(svint64_t)
+
+TEST(svuint8_t)
+TEST(svuint16_t)
+TEST(svuint32_t)
+TEST(svuint64_t)
+
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.b, z2\.b, z3\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 8 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 75703ddca60..a8833d585c6 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -12100,6 +12100,16 @@  foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
     }]
 }
 
+proc check_effective_target_aarch64_asm_sve2p1_ok { } {
+    if { [istarget aarch64*-*-*] } {
+	return [check_no_compiler_messages aarch64_sve2p1_assembler object {
+	    __asm__ (".arch_extension sve2p1; ld1w {z0.q},p7/z,[x0]");
+	} "-march=armv8-a+sve2p1"]
+    } else {
+	return 0
+    }
+}
+
 proc check_effective_target_aarch64_small { } {
     if { [istarget aarch64*-*-*] } {
 	return [check_no_compiler_messages aarch64_small object {