Message ID | 20241001120933.1269122-2-saurabh.jha@arm.com |
---|---|
State | New |
Headers | show |
Series | Add support for SVE2 faminmax | expand |
<saurabh.jha@arm.com> writes: > The AArch64 FEAT_FAMINMAX extension introduces instructions for > computing the floating point absolute maximum and minimum of the > two vectors element-wise. > > This patch introduces SVE2 faminmax intrinsics. The intrinsics of this > extension are implemented as the following builtin functions: > * sva[max|min]_[m|x|z] > * sva[max|min]_[f16|f32|f64]_[m|x|z] > * sva[max|min]_n_[f16|f32|f64]_[m|x|z] > > gcc/ChangeLog: > > * config/aarch64/aarch64-sve-builtins-base.cc > (svamax): Absolute maximum declaration. > (svamin): Absolute minimum declaration. > * config/aarch64/aarch64-sve-builtins-base.def > (REQUIRED_EXTENSIONS): Add faminmax intrinsics behind a flag. > (svamax): Absolute maximum declaration. > (svamin): Absolute minimum declaration. > * config/aarch64/aarch64-sve-builtins-base.h: Declaring function > bases for the new intrinsics. > * config/aarch64/aarch64.h > (TARGET_SVE_FAMINMAX): New flag for SVE2 faminmax. > * config/aarch64/iterators.md: New unspecs, iterators, and attrs > for the new intrinsics. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/sve2/acle/asm/amax_f16.c: New test. > * gcc.target/aarch64/sve2/acle/asm/amax_f32.c: New test. > * gcc.target/aarch64/sve2/acle/asm/amax_f64.c: New test. > * gcc.target/aarch64/sve2/acle/asm/amin_f16.c: New test. > * gcc.target/aarch64/sve2/acle/asm/amin_f32.c: New test. > * gcc.target/aarch64/sve2/acle/asm/amin_f64.c: New test. > --- > .../aarch64/aarch64-sve-builtins-base.cc | 4 + > .../aarch64/aarch64-sve-builtins-base.def | 5 + > .../aarch64/aarch64-sve-builtins-base.h | 2 + > gcc/config/aarch64/aarch64.h | 1 + > gcc/config/aarch64/iterators.md | 40 +++-- > .../aarch64/sve2/acle/asm/amax_f16.c | 142 ++++++++++++++++++ > .../aarch64/sve2/acle/asm/amax_f32.c | 142 ++++++++++++++++++ > .../aarch64/sve2/acle/asm/amax_f64.c | 142 ++++++++++++++++++ > .../aarch64/sve2/acle/asm/amin_f16.c | 142 ++++++++++++++++++ > .../aarch64/sve2/acle/asm/amin_f32.c | 142 ++++++++++++++++++ > .../aarch64/sve2/acle/asm/amin_f64.c | 142 ++++++++++++++++++ > 11 files changed, 893 insertions(+), 11 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c > > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > index afce52a7e8d..dd4efdf6ca5 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > @@ -3070,6 +3070,10 @@ FUNCTION (svadrb, svadr_bhwd_impl, (0)) > FUNCTION (svadrd, svadr_bhwd_impl, (3)) > FUNCTION (svadrh, svadr_bhwd_impl, (1)) > FUNCTION (svadrw, svadr_bhwd_impl, (2)) > +FUNCTION (svamax, cond_or_uncond_unspec_function, > + (UNSPEC_COND_FAMAX, UNSPEC_FAMAX)) > +FUNCTION (svamin, cond_or_uncond_unspec_function, > + (UNSPEC_COND_FAMIN, UNSPEC_FAMIN)) > FUNCTION (svand, rtx_code_function, (AND, AND)) > FUNCTION (svandv, reduction, (UNSPEC_ANDV)) > FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def > index 65fcba91586..95e04e4393d 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.def > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def > @@ -379,3 +379,8 @@ DEF_SVE_FUNCTION (svzip2q, binary, all_data, none) > DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) > DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) > #undef REQUIRED_EXTENSIONS > + > +#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_FAMINMAX > +DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz) > +DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz) > +#undef REQUIRED_EXTENSIONS > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h > index 5bbf3569c4b..978cf7013f9 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.h > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h > @@ -37,6 +37,8 @@ namespace aarch64_sve > extern const function_base *const svadrd; > extern const function_base *const svadrh; > extern const function_base *const svadrw; > + extern const function_base *const svamax; > + extern const function_base *const svamin; > extern const function_base *const svand; > extern const function_base *const svandv; > extern const function_base *const svasr; > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index 43819adb48c..a496235db42 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -470,6 +470,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED > /* Floating Point Absolute Maximum/Minimum extension instructions are > enabled through +faminmax. */ > #define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX) > +#define TARGET_SVE_FAMINMAX (TARGET_SVE && TARGET_FAMINMAX) > > /* Prefer different predicate registers for the output of a predicated > operation over re-using an existing input predicate. */ > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index c2fcd18306e..cf9ee2639a9 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -841,6 +841,8 @@ > UNSPEC_COND_CMPNE_WIDE ; Used in aarch64-sve.md. > UNSPEC_COND_FABS ; Used in aarch64-sve.md. > UNSPEC_COND_FADD ; Used in aarch64-sve.md. > + UNSPEC_COND_FAMAX ; Used in aarch64-sve.md. > + UNSPEC_COND_FAMIN ; Used in aarch64-sve.md. > UNSPEC_COND_FCADD90 ; Used in aarch64-sve.md. > UNSPEC_COND_FCADD270 ; Used in aarch64-sve.md. > UNSPEC_COND_FCMEQ ; Used in aarch64-sve.md. > @@ -3081,15 +3083,18 @@ > (define_int_iterator SVE_COND_FCVTI [UNSPEC_COND_FCVTZS UNSPEC_COND_FCVTZU]) > (define_int_iterator SVE_COND_ICVTF [UNSPEC_COND_SCVTF UNSPEC_COND_UCVTF]) > > -(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD > - UNSPEC_COND_FDIV > - UNSPEC_COND_FMAX > - UNSPEC_COND_FMAXNM > - UNSPEC_COND_FMIN > - UNSPEC_COND_FMINNM > - UNSPEC_COND_FMUL > - UNSPEC_COND_FMULX > - UNSPEC_COND_FSUB]) > +(define_int_iterator SVE_COND_FP_BINARY > + [UNSPEC_COND_FADD > + (UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX") > + (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX") > + UNSPEC_COND_FDIV > + UNSPEC_COND_FMAX > + UNSPEC_COND_FMAXNM > + UNSPEC_COND_FMIN > + UNSPEC_COND_FMINNM > + UNSPEC_COND_FMUL > + UNSPEC_COND_FMULX > + UNSPEC_COND_FSUB]) > > ;; Same as SVE_COND_FP_BINARY, but without codes that have a dedicated > ;; <optab><mode>3 expander. > @@ -3114,8 +3119,11 @@ > UNSPEC_COND_FMINNM > UNSPEC_COND_FMUL]) > > -(define_int_iterator SVE_COND_FP_BINARY_REG [UNSPEC_COND_FDIV > - UNSPEC_COND_FMULX]) > +(define_int_iterator SVE_COND_FP_BINARY_REG > + [(UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX") > + (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX") > + UNSPEC_COND_FDIV > + UNSPEC_COND_FMULX]) > > (define_int_iterator SVE_COND_FCADD [UNSPEC_COND_FCADD90 > UNSPEC_COND_FCADD270]) > @@ -3694,6 +3702,8 @@ > (UNSPEC_ZIP2Q "zip2q") > (UNSPEC_COND_FABS "abs") > (UNSPEC_COND_FADD "add") > + (UNSPEC_COND_FAMAX "famax") > + (UNSPEC_COND_FAMIN "famin") > (UNSPEC_COND_FCADD90 "cadd90") > (UNSPEC_COND_FCADD270 "cadd270") > (UNSPEC_COND_FCMLA "fcmla") > @@ -4230,6 +4240,8 @@ > (UNSPEC_FTSSEL "ftssel") > (UNSPEC_COND_FABS "fabs") > (UNSPEC_COND_FADD "fadd") > + (UNSPEC_COND_FAMAX "famax") > + (UNSPEC_COND_FAMIN "famin") > (UNSPEC_COND_FCVTLT "fcvtlt") > (UNSPEC_COND_FCVTX "fcvtx") > (UNSPEC_COND_FDIV "fdiv") > @@ -4254,6 +4266,8 @@ > (UNSPEC_COND_FSUB "fsub")]) > > (define_int_attr sve_fp_op_rev [(UNSPEC_COND_FADD "fadd") > + (UNSPEC_COND_FAMAX "famax") > + (UNSPEC_COND_FAMIN "famin") > (UNSPEC_COND_FDIV "fdivr") > (UNSPEC_COND_FMAX "fmax") > (UNSPEC_COND_FMAXNM "fmaxnm") > @@ -4390,6 +4404,8 @@ > ;; <optab><mode>3 pattern. > (define_int_attr sve_pred_fp_rhs1_operand > [(UNSPEC_COND_FADD "register_operand") > + (UNSPEC_COND_FAMAX "register_operand") > + (UNSPEC_COND_FAMIN "register_operand") > (UNSPEC_COND_FDIV "register_operand") > (UNSPEC_COND_FMAX "register_operand") > (UNSPEC_COND_FMAXNM "register_operand") > @@ -4403,6 +4419,8 @@ > ;; <optab><mode>3 pattern. > (define_int_attr sve_pred_fp_rhs2_operand > [(UNSPEC_COND_FADD "aarch64_sve_float_arith_with_sub_operand") > + (UNSPEC_COND_FAMAX "aarch64_sve_float_maxmin_operand") > + (UNSPEC_COND_FAMIN "aarch64_sve_float_maxmin_operand") > (UNSPEC_COND_FDIV "register_operand") > (UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_operand") > (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_operand") > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c > new file mode 100644 > index 00000000000..e5681a0733e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c > @@ -0,0 +1,142 @@ > +/* { dg-do compile } */ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +#include "test_sve_acle.h" > + > +#pragma GCC target "+sve+faminmax" > + > +/* > +** amax_f16_m_tied1: > +** famax z0\.h, p0/m, z0\.h, z1\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f16_m_tied1, svfloat16_t, > + z0 = svamax_f16_m (p0, z0, z1), > + z0 = svamax_m (p0, z0, z1)) > +/* > +** amax_f16_m_tied2: > +** mov z31\.d, z0\.d > +** movprfx z0, z1 > +** famax z0\.h, p0/m, z0\.h, z31\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f16_m_tied2, svfloat16_t, > + z0 = svamax_f16_m (p0, z1, z0), > + z0 = svamax_m (p0, z1, z0)) Sorry for not noticing last time, but: we shouldn't hard-code temporary registers like z31 here. The RA can in principle pick any free register. The same thing applies to the temporary z7 registers. I know it's not very sophisticated, but I think s/max/amax/ on things like gcc.target/aarch64/sve/acle/asm/max_f16.c would give the right result, except for the need for the pragma. Looks good otherwise. Thanks, Richard > +/* > +** amax_f16_m_untied: > +** movprfx z0, z1 > +** famax z0\.h, p0/m, z0\.h, z2\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f16_m_untied, svfloat16_t, > + z0 = svamax_f16_m (p0, z1, z2), > + z0 = svamax_m (p0, z1, z2)) > +/* > +** amax_f16_x_tied1: > +** famax z0\.h, p0/m, z0\.h, z1\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f16_x_tied1, svfloat16_t, > + z0 = svamax_f16_x (p0, z0, z1), > + z0 = svamax_x (p0, z0, z1)) > +/* > +** amax_f16_x_tied2: > +** famax z0\.h, p0/m, z0\.h, z1\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f16_x_tied2, svfloat16_t, > + z0 = svamax_f16_x (p0, z1, z0), > + z0 = svamax_x (p0, z1, z0)) > +/* > +** amax_f16_x_untied: > +** movprfx z0, z1 > +** famax z0\.h, p0/m, z0\.h, z2\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f16_x_untied, svfloat16_t, > + z0 = svamax_f16_x (p0, z1, z2), > + z0 = svamax_x (p0, z1, z2)) > +/* > +** amax_f16_z_tied1: > +** movprfx z0\.h, p0/z, z0\.h > +** famax z0\.h, p0/m, z0\.h, z1\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f16_z_tied1, svfloat16_t, > + z0 = svamax_f16_z (p0, z0, z1), > + z0 = svamax_z (p0, z0, z1)) > +/* > +** amax_f16_z_tied2: > +** movprfx z0\.h, p0/z, z0\.h > +** famax z0\.h, p0/m, z0\.h, z1\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f16_z_tied2, svfloat16_t, > + z0 = svamax_f16_z (p0, z1, z0), > + z0 = svamax_z (p0, z1, z0)) > +/* > +** amax_f16_z_untied: > +** movprfx z0\.h, p0/z, z1\.h > +** famax z0\.h, p0/m, z0\.h, z2\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f16_z_untied, svfloat16_t, > + z0 = svamax_f16_z (p0, z1, z2), > + z0 = svamax_z (p0, z1, z2)) > +/* > +** amax_n_f16_m_tied1: > +** mov z7\.h, h7 > +** famax z0\.h, p0/m, z0\.h, z7\.h > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f16_m_tied1, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamax_n_f16_m (p0, z0, d7), > + z0 = svamax_m (p0, z0, d7)) > +/* > +** amax_n_f16_m_untied: > +** mov z7\.h, h7 > +** movprfx z0, z4 > +** famax z0\.h, p0/m, z0\.h, z7\.h > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f16_m_untied, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamax_n_f16_m (p0, z4, d7), > + z0 = svamax_m (p0, z4, d7)) > +/* > +** amax_n_f16_x_tied1: > +** mov z7\.h, h7 > +** famax z0\.h, p0/m, z0\.h, z7\.h > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f16_x_tied1, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamax_n_f16_x (p0, z0, d7), > + z0 = svamax_x (p0, z0, d7)) > +/* > +** amax_n_f16_x_untied: > +** mov z0\.h, h7 > +** famax z0\.h, p0/m, z0\.h, z4\.h > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f16_x_untied, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamax_n_f16_x (p0, z4, d7), > + z0 = svamax_x (p0, z4, d7)) > +/* > +** amax_n_f16_z_tied1: > +** mov z7\.h, h7 > +** movprfx z0\.h, p0/z, z0\.h > +** famax z0\.h, p0/m, z0\.h, z7\.h > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f16_z_tied1, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamax_n_f16_z (p0, z0, d7), > + z0 = svamax_z (p0, z0, d7)) > +/* > +** amax_n_f16_z_untied: > +** mov z7\.h, h7 > +** movprfx z0\.h, p0/z, z4\.h > +** famax z0\.h, p0/m, z0\.h, z7\.h > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f16_z_untied, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamax_n_f16_z (p0, z4, d7), > + z0 = svamax_z (p0, z4, d7)) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c > new file mode 100644 > index 00000000000..ac6fd227b52 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c > @@ -0,0 +1,142 @@ > +/* { dg-do compile } */ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +#include "test_sve_acle.h" > + > +#pragma GCC target "+sve+faminmax" > + > +/* > +** amax_f32_m_tied1: > +** famax z0\.s, p0/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f32_m_tied1, svfloat32_t, > + z0 = svamax_f32_m (p0, z0, z1), > + z0 = svamax_m (p0, z0, z1)) > +/* > +** amax_f32_m_tied2: > +** mov z31\.d, z0\.d > +** movprfx z0, z1 > +** famax z0\.s, p0/m, z0\.s, z31\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f32_m_tied2, svfloat32_t, > + z0 = svamax_f32_m (p0, z1, z0), > + z0 = svamax_m (p0, z1, z0)) > +/* > +** amax_f32_m_untied: > +** movprfx z0, z1 > +** famax z0\.s, p0/m, z0\.s, z2\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f32_m_untied, svfloat32_t, > + z0 = svamax_f32_m (p0, z1, z2), > + z0 = svamax_m (p0, z1, z2)) > +/* > +** amax_f32_x_tied1: > +** famax z0\.s, p0/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f32_x_tied1, svfloat32_t, > + z0 = svamax_f32_x (p0, z0, z1), > + z0 = svamax_x (p0, z0, z1)) > +/* > +** amax_f32_x_tied2: > +** famax z0\.s, p0/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f32_x_tied2, svfloat32_t, > + z0 = svamax_f32_x (p0, z1, z0), > + z0 = svamax_x (p0, z1, z0)) > +/* > +** amax_f32_x_untied: > +** movprfx z0, z1 > +** famax z0\.s, p0/m, z0\.s, z2\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f32_x_untied, svfloat32_t, > + z0 = svamax_f32_x (p0, z1, z2), > + z0 = svamax_x (p0, z1, z2)) > +/* > +** amax_f32_z_tied1: > +** movprfx z0\.s, p0/z, z0\.s > +** famax z0\.s, p0/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f32_z_tied1, svfloat32_t, > + z0 = svamax_f32_z (p0, z0, z1), > + z0 = svamax_z (p0, z0, z1)) > +/* > +** amax_f32_z_tied2: > +** movprfx z0\.s, p0/z, z0\.s > +** famax z0\.s, p0/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f32_z_tied2, svfloat32_t, > + z0 = svamax_f32_z (p0, z1, z0), > + z0 = svamax_z (p0, z1, z0)) > +/* > +** amax_f32_z_untied: > +** movprfx z0\.s, p0/z, z1\.s > +** famax z0\.s, p0/m, z0\.s, z2\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f32_z_untied, svfloat32_t, > + z0 = svamax_f32_z (p0, z1, z2), > + z0 = svamax_z (p0, z1, z2)) > +/* > +** amax_n_f32_m_tied1: > +** mov z7\.s, s7 > +** famax z0\.s, p0/m, z0\.s, z7\.s > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f32_m_tied1, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamax_n_f32_m (p0, z0, d7), > + z0 = svamax_m (p0, z0, d7)) > +/* > +** amax_n_f32_m_untied: > +** mov z7\.s, s7 > +** movprfx z0, z4 > +** famax z0\.s, p0/m, z0\.s, z7\.s > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f32_m_untied, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamax_n_f32_m (p0, z4, d7), > + z0 = svamax_m (p0, z4, d7)) > +/* > +** amax_n_f32_x_tied1: > +** mov z7\.s, s7 > +** famax z0\.s, p0/m, z0\.s, z7\.s > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f32_x_tied1, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamax_n_f32_x (p0, z0, d7), > + z0 = svamax_x (p0, z0, d7)) > +/* > +** amax_n_f32_x_untied: > +** mov z0\.s, s7 > +** famax z0\.s, p0/m, z0\.s, z4\.s > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f32_x_untied, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamax_n_f32_x (p0, z4, d7), > + z0 = svamax_x (p0, z4, d7)) > +/* > +** amax_n_f32_z_tied1: > +** mov z7\.s, s7 > +** movprfx z0\.s, p0/z, z0\.s > +** famax z0\.s, p0/m, z0\.s, z7\.s > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f32_z_tied1, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamax_n_f32_z (p0, z0, d7), > + z0 = svamax_z (p0, z0, d7)) > +/* > +** amax_n_f32_z_untied: > +** mov z7\.s, s7 > +** movprfx z0\.s, p0/z, z4\.s > +** famax z0\.s, p0/m, z0\.s, z7\.s > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f32_z_untied, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamax_n_f32_z (p0, z4, d7), > + z0 = svamax_z (p0, z4, d7)) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c > new file mode 100644 > index 00000000000..9e711674ea5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c > @@ -0,0 +1,142 @@ > +/* { dg-do compile } */ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +#include "test_sve_acle.h" > + > +#pragma GCC target "+sve+faminmax" > + > +/* > +** amax_f64_m_tied1: > +** famax z0\.d, p0/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f64_m_tied1, svfloat64_t, > + z0 = svamax_f64_m (p0, z0, z1), > + z0 = svamax_m (p0, z0, z1)) > +/* > +** amax_f64_m_tied2: > +** mov z31\.d, z0\.d > +** movprfx z0, z1 > +** famax z0\.d, p0/m, z0\.d, z31\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f64_m_tied2, svfloat64_t, > + z0 = svamax_f64_m (p0, z1, z0), > + z0 = svamax_m (p0, z1, z0)) > +/* > +** amax_f64_m_untied: > +** movprfx z0, z1 > +** famax z0\.d, p0/m, z0\.d, z2\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f64_m_untied, svfloat64_t, > + z0 = svamax_f64_m (p0, z1, z2), > + z0 = svamax_m (p0, z1, z2)) > +/* > +** amax_f64_x_tied1: > +** famax z0\.d, p0/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f64_x_tied1, svfloat64_t, > + z0 = svamax_f64_x (p0, z0, z1), > + z0 = svamax_x (p0, z0, z1)) > +/* > +** amax_f64_x_tied2: > +** famax z0\.d, p0/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f64_x_tied2, svfloat64_t, > + z0 = svamax_f64_x (p0, z1, z0), > + z0 = svamax_x (p0, z1, z0)) > +/* > +** amax_f64_x_untied: > +** movprfx z0, z1 > +** famax z0\.d, p0/m, z0\.d, z2\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f64_x_untied, svfloat64_t, > + z0 = svamax_f64_x (p0, z1, z2), > + z0 = svamax_x (p0, z1, z2)) > +/* > +** amax_f64_z_tied1: > +** movprfx z0\.d, p0/z, z0\.d > +** famax z0\.d, p0/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f64_z_tied1, svfloat64_t, > + z0 = svamax_f64_z (p0, z0, z1), > + z0 = svamax_z (p0, z0, z1)) > +/* > +** amax_f64_z_tied2: > +** movprfx z0\.d, p0/z, z0\.d > +** famax z0\.d, p0/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f64_z_tied2, svfloat64_t, > + z0 = svamax_f64_z (p0, z1, z0), > + z0 = svamax_z (p0, z1, z0)) > +/* > +** amax_f64_z_untied: > +** movprfx z0\.d, p0/z, z1\.d > +** famax z0\.d, p0/m, z0\.d, z2\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amax_f64_z_untied, svfloat64_t, > + z0 = svamax_f64_z (p0, z1, z2), > + z0 = svamax_z (p0, z1, z2)) > +/* > +** amax_n_f64_m_tied1: > +** mov z7\.d, d7 > +** famax z0\.d, p0/m, z0\.d, z7\.d > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f64_m_tied1, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamax_n_f64_m (p0, z0, d7), > + z0 = svamax_m (p0, z0, d7)) > +/* > +** amax_n_f64_m_untied: > +** mov z7\.d, d7 > +** movprfx z0, z4 > +** famax z0\.d, p0/m, z0\.d, z7\.d > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f64_m_untied, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamax_n_f64_m (p0, z4, d7), > + z0 = svamax_m (p0, z4, d7)) > +/* > +** amax_n_f64_x_tied1: > +** mov z7\.d, d7 > +** famax z0\.d, p0/m, z0\.d, z7\.d > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f64_x_tied1, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamax_n_f64_x (p0, z0, d7), > + z0 = svamax_x (p0, z0, d7)) > +/* > +** amax_n_f64_x_untied: > +** mov z0\.d, d7 > +** famax z0\.d, p0/m, z0\.d, z4\.d > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f64_x_untied, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamax_n_f64_x (p0, z4, d7), > + z0 = svamax_x (p0, z4, d7)) > +/* > +** amax_n_f64_z_tied1: > +** mov z7\.d, d7 > +** movprfx z0\.d, p0/z, z0\.d > +** famax z0\.d, p0/m, z0\.d, z7\.d > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f64_z_tied1, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamax_n_f64_z (p0, z0, d7), > + z0 = svamax_z (p0, z0, d7)) > +/* > +** amax_n_f64_z_untied: > +** mov z7\.d, d7 > +** movprfx z0\.d, p0/z, z4\.d > +** famax z0\.d, p0/m, z0\.d, z7\.d > +** ret > +*/ > +TEST_DUAL_ZD (amax_n_f64_z_untied, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamax_n_f64_z (p0, z4, d7), > + z0 = svamax_z (p0, z4, d7)) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c > new file mode 100644 > index 00000000000..3c949df023c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c > @@ -0,0 +1,142 @@ > +/* { dg-do compile } */ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +#include "test_sve_acle.h" > + > +#pragma GCC target "+sve+faminmax" > + > +/* > +** amin_f16_m_tied1: > +** famin z0\.h, p0/m, z0\.h, z1\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f16_m_tied1, svfloat16_t, > + z0 = svamin_f16_m (p0, z0, z1), > + z0 = svamin_m (p0, z0, z1)) > +/* > +** amin_f16_m_tied2: > +** mov z31\.d, z0\.d > +** movprfx z0, z1 > +** famin z0\.h, p0/m, z0\.h, z31\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f16_m_tied2, svfloat16_t, > + z0 = svamin_f16_m (p0, z1, z0), > + z0 = svamin_m (p0, z1, z0)) > +/* > +** amin_f16_m_untied: > +** movprfx z0, z1 > +** famin z0\.h, p0/m, z0\.h, z2\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f16_m_untied, svfloat16_t, > + z0 = svamin_f16_m (p0, z1, z2), > + z0 = svamin_m (p0, z1, z2)) > +/* > +** amin_f16_x_tied1: > +** famin z0\.h, p0/m, z0\.h, z1\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f16_x_tied1, svfloat16_t, > + z0 = svamin_f16_x (p0, z0, z1), > + z0 = svamin_x (p0, z0, z1)) > +/* > +** amin_f16_x_tied2: > +** famin z0\.h, p0/m, z0\.h, z1\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f16_x_tied2, svfloat16_t, > + z0 = svamin_f16_x (p0, z1, z0), > + z0 = svamin_x (p0, z1, z0)) > +/* > +** amin_f16_x_untied: > +** movprfx z0, z1 > +** famin z0\.h, p0/m, z0\.h, z2\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f16_x_untied, svfloat16_t, > + z0 = svamin_f16_x (p0, z1, z2), > + z0 = svamin_x (p0, z1, z2)) > +/* > +** amin_f16_z_tied1: > +** movprfx z0\.h, p0/z, z0\.h > +** famin z0\.h, p0/m, z0\.h, z1\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f16_z_tied1, svfloat16_t, > + z0 = svamin_f16_z (p0, z0, z1), > + z0 = svamin_z (p0, z0, z1)) > +/* > +** amin_f16_z_tied2: > +** movprfx z0\.h, p0/z, z0\.h > +** famin z0\.h, p0/m, z0\.h, z1\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f16_z_tied2, svfloat16_t, > + z0 = svamin_f16_z (p0, z1, z0), > + z0 = svamin_z (p0, z1, z0)) > +/* > +** amin_f16_z_untied: > +** movprfx z0\.h, p0/z, z1\.h > +** famin z0\.h, p0/m, z0\.h, z2\.h > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f16_z_untied, svfloat16_t, > + z0 = svamin_f16_z (p0, z1, z2), > + z0 = svamin_z (p0, z1, z2)) > +/* > +** amin_n_f16_m_tied1: > +** mov z7\.h, h7 > +** famin z0\.h, p0/m, z0\.h, z7\.h > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f16_m_tied1, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamin_n_f16_m (p0, z0, d7), > + z0 = svamin_m (p0, z0, d7)) > +/* > +** amin_n_f16_m_untied: > +** mov z7\.h, h7 > +** movprfx z0, z4 > +** famin z0\.h, p0/m, z0\.h, z7\.h > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f16_m_untied, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamin_n_f16_m (p0, z4, d7), > + z0 = svamin_m (p0, z4, d7)) > +/* > +** amin_n_f16_x_tied1: > +** mov z7\.h, h7 > +** famin z0\.h, p0/m, z0\.h, z7\.h > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f16_x_tied1, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamin_n_f16_x (p0, z0, d7), > + z0 = svamin_x (p0, z0, d7)) > +/* > +** amin_n_f16_x_untied: > +** mov z0\.h, h7 > +** famin z0\.h, p0/m, z0\.h, z4\.h > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f16_x_untied, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamin_n_f16_x (p0, z4, d7), > + z0 = svamin_x (p0, z4, d7)) > +/* > +** amin_n_f16_z_tied1: > +** mov z7\.h, h7 > +** movprfx z0\.h, p0/z, z0\.h > +** famin z0\.h, p0/m, z0\.h, z7\.h > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f16_z_tied1, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamin_n_f16_z (p0, z0, d7), > + z0 = svamin_z (p0, z0, d7)) > +/* > +** amin_n_f16_z_untied: > +** mov z7\.h, h7 > +** movprfx z0\.h, p0/z, z4\.h > +** famin z0\.h, p0/m, z0\.h, z7\.h > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f16_z_untied, svfloat16_t, svfloat16_t, float16_t, > + z0 = svamin_n_f16_z (p0, z4, d7), > + z0 = svamin_z (p0, z4, d7)) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c > new file mode 100644 > index 00000000000..b606c448ea6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c > @@ -0,0 +1,142 @@ > +/* { dg-do compile } */ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +#include "test_sve_acle.h" > + > +#pragma GCC target "+sve+faminmax" > + > +/* > +** amin_f32_m_tied1: > +** famin z0\.s, p0/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f32_m_tied1, svfloat32_t, > + z0 = svamin_f32_m (p0, z0, z1), > + z0 = svamin_m (p0, z0, z1)) > +/* > +** amin_f32_m_tied2: > +** mov z31\.d, z0\.d > +** movprfx z0, z1 > +** famin z0\.s, p0/m, z0\.s, z31\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f32_m_tied2, svfloat32_t, > + z0 = svamin_f32_m (p0, z1, z0), > + z0 = svamin_m (p0, z1, z0)) > +/* > +** amin_f32_m_untied: > +** movprfx z0, z1 > +** famin z0\.s, p0/m, z0\.s, z2\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f32_m_untied, svfloat32_t, > + z0 = svamin_f32_m (p0, z1, z2), > + z0 = svamin_m (p0, z1, z2)) > +/* > +** amin_f32_x_tied1: > +** famin z0\.s, p0/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f32_x_tied1, svfloat32_t, > + z0 = svamin_f32_x (p0, z0, z1), > + z0 = svamin_x (p0, z0, z1)) > +/* > +** amin_f32_x_tied2: > +** famin z0\.s, p0/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f32_x_tied2, svfloat32_t, > + z0 = svamin_f32_x (p0, z1, z0), > + z0 = svamin_x (p0, z1, z0)) > +/* > +** amin_f32_x_untied: > +** movprfx z0, z1 > +** famin z0\.s, p0/m, z0\.s, z2\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f32_x_untied, svfloat32_t, > + z0 = svamin_f32_x (p0, z1, z2), > + z0 = svamin_x (p0, z1, z2)) > +/* > +** amin_f32_z_tied1: > +** movprfx z0\.s, p0/z, z0\.s > +** famin z0\.s, p0/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f32_z_tied1, svfloat32_t, > + z0 = svamin_f32_z (p0, z0, z1), > + z0 = svamin_z (p0, z0, z1)) > +/* > +** amin_f32_z_tied2: > +** movprfx z0\.s, p0/z, z0\.s > +** famin z0\.s, p0/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f32_z_tied2, svfloat32_t, > + z0 = svamin_f32_z (p0, z1, z0), > + z0 = svamin_z (p0, z1, z0)) > +/* > +** amin_f32_z_untied: > +** movprfx z0\.s, p0/z, z1\.s > +** famin z0\.s, p0/m, z0\.s, z2\.s > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f32_z_untied, svfloat32_t, > + z0 = svamin_f32_z (p0, z1, z2), > + z0 = svamin_z (p0, z1, z2)) > +/* > +** amin_n_f32_m_tied1: > +** mov z7\.s, s7 > +** famin z0\.s, p0/m, z0\.s, z7\.s > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f32_m_tied1, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamin_n_f32_m (p0, z0, d7), > + z0 = svamin_m (p0, z0, d7)) > +/* > +** amin_n_f32_m_untied: > +** mov z7\.s, s7 > +** movprfx z0, z4 > +** famin z0\.s, p0/m, z0\.s, z7\.s > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f32_m_untied, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamin_n_f32_m (p0, z4, d7), > + z0 = svamin_m (p0, z4, d7)) > +/* > +** amin_n_f32_x_tied1: > +** mov z7\.s, s7 > +** famin z0\.s, p0/m, z0\.s, z7\.s > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f32_x_tied1, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamin_n_f32_x (p0, z0, d7), > + z0 = svamin_x (p0, z0, d7)) > +/* > +** amin_n_f32_x_untied: > +** mov z0\.s, s7 > +** famin z0\.s, p0/m, z0\.s, z4\.s > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f32_x_untied, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamin_n_f32_x (p0, z4, d7), > + z0 = svamin_x (p0, z4, d7)) > +/* > +** amin_n_f32_z_tied1: > +** mov z7\.s, s7 > +** movprfx z0\.s, p0/z, z0\.s > +** famin z0\.s, p0/m, z0\.s, z7\.s > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f32_z_tied1, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamin_n_f32_z (p0, z0, d7), > + z0 = svamin_z (p0, z0, d7)) > +/* > +** amin_n_f32_z_untied: > +** mov z7\.s, s7 > +** movprfx z0\.s, p0/z, z4\.s > +** famin z0\.s, p0/m, z0\.s, z7\.s > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f32_z_untied, svfloat32_t, svfloat32_t, float32_t, > + z0 = svamin_n_f32_z (p0, z4, d7), > + z0 = svamin_z (p0, z4, d7)) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c > new file mode 100644 > index 00000000000..d91b7200c18 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c > @@ -0,0 +1,142 @@ > +/* { dg-do compile } */ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +#include "test_sve_acle.h" > + > +#pragma GCC target "+sve+faminmax" > + > +/* > +** amin_f64_m_tied1: > +** famin z0\.d, p0/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f64_m_tied1, svfloat64_t, > + z0 = svamin_f64_m (p0, z0, z1), > + z0 = svamin_m (p0, z0, z1)) > +/* > +** amin_f64_m_tied2: > +** mov z31\.d, z0\.d > +** movprfx z0, z1 > +** famin z0\.d, p0/m, z0\.d, z31\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f64_m_tied2, svfloat64_t, > + z0 = svamin_f64_m (p0, z1, z0), > + z0 = svamin_m (p0, z1, z0)) > +/* > +** amin_f64_m_untied: > +** movprfx z0, z1 > +** famin z0\.d, p0/m, z0\.d, z2\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f64_m_untied, svfloat64_t, > + z0 = svamin_f64_m (p0, z1, z2), > + z0 = svamin_m (p0, z1, z2)) > +/* > +** amin_f64_x_tied1: > +** famin z0\.d, p0/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f64_x_tied1, svfloat64_t, > + z0 = svamin_f64_x (p0, z0, z1), > + z0 = svamin_x (p0, z0, z1)) > +/* > +** amin_f64_x_tied2: > +** famin z0\.d, p0/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f64_x_tied2, svfloat64_t, > + z0 = svamin_f64_x (p0, z1, z0), > + z0 = svamin_x (p0, z1, z0)) > +/* > +** amin_f64_x_untied: > +** movprfx z0, z1 > +** famin z0\.d, p0/m, z0\.d, z2\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f64_x_untied, svfloat64_t, > + z0 = svamin_f64_x (p0, z1, z2), > + z0 = svamin_x (p0, z1, z2)) > +/* > +** amin_f64_z_tied1: > +** movprfx z0\.d, p0/z, z0\.d > +** famin z0\.d, p0/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f64_z_tied1, svfloat64_t, > + z0 = svamin_f64_z (p0, z0, z1), > + z0 = svamin_z (p0, z0, z1)) > +/* > +** amin_f64_z_tied2: > +** movprfx z0\.d, p0/z, z0\.d > +** famin z0\.d, p0/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f64_z_tied2, svfloat64_t, > + z0 = svamin_f64_z (p0, z1, z0), > + z0 = svamin_z (p0, z1, z0)) > +/* > +** amin_f64_z_untied: > +** movprfx z0\.d, p0/z, z1\.d > +** famin z0\.d, p0/m, z0\.d, z2\.d > +** ret > +*/ > +TEST_UNIFORM_Z (amin_f64_z_untied, svfloat64_t, > + z0 = svamin_f64_z (p0, z1, z2), > + z0 = svamin_z (p0, z1, z2)) > +/* > +** amin_n_f64_m_tied1: > +** mov z7\.d, d7 > +** famin z0\.d, p0/m, z0\.d, z7\.d > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f64_m_tied1, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamin_n_f64_m (p0, z0, d7), > + z0 = svamin_m (p0, z0, d7)) > +/* > +** amin_n_f64_m_untied: > +** mov z7\.d, d7 > +** movprfx z0, z4 > +** famin z0\.d, p0/m, z0\.d, z7\.d > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f64_m_untied, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamin_n_f64_m (p0, z4, d7), > + z0 = svamin_m (p0, z4, d7)) > +/* > +** amin_n_f64_x_tied1: > +** mov z7\.d, d7 > +** famin z0\.d, p0/m, z0\.d, z7\.d > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f64_x_tied1, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamin_n_f64_x (p0, z0, d7), > + z0 = svamin_x (p0, z0, d7)) > +/* > +** amin_n_f64_x_untied: > +** mov z0\.d, d7 > +** famin z0\.d, p0/m, z0\.d, z4\.d > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f64_x_untied, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamin_n_f64_x (p0, z4, d7), > + z0 = svamin_x (p0, z4, d7)) > +/* > +** amin_n_f64_z_tied1: > +** mov z7\.d, d7 > +** movprfx z0\.d, p0/z, z0\.d > +** famin z0\.d, p0/m, z0\.d, z7\.d > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f64_z_tied1, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamin_n_f64_z (p0, z0, d7), > + z0 = svamin_z (p0, z0, d7)) > +/* > +** amin_n_f64_z_untied: > +** mov z7\.d, d7 > +** movprfx z0\.d, p0/z, z4\.d > +** famin z0\.d, p0/m, z0\.d, z7\.d > +** ret > +*/ > +TEST_DUAL_ZD (amin_n_f64_z_untied, svfloat64_t, svfloat64_t, float64_t, > + z0 = svamin_n_f64_z (p0, z4, d7), > + z0 = svamin_z (p0, z4, d7))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index afce52a7e8d..dd4efdf6ca5 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -3070,6 +3070,10 @@ FUNCTION (svadrb, svadr_bhwd_impl, (0)) FUNCTION (svadrd, svadr_bhwd_impl, (3)) FUNCTION (svadrh, svadr_bhwd_impl, (1)) FUNCTION (svadrw, svadr_bhwd_impl, (2)) +FUNCTION (svamax, cond_or_uncond_unspec_function, + (UNSPEC_COND_FAMAX, UNSPEC_FAMAX)) +FUNCTION (svamin, cond_or_uncond_unspec_function, + (UNSPEC_COND_FAMIN, UNSPEC_FAMIN)) FUNCTION (svand, rtx_code_function, (AND, AND)) FUNCTION (svandv, reduction, (UNSPEC_ANDV)) FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def index 65fcba91586..95e04e4393d 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def @@ -379,3 +379,8 @@ DEF_SVE_FUNCTION (svzip2q, binary, all_data, none) DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) #undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_FAMINMAX +DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz) +DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h index 5bbf3569c4b..978cf7013f9 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h @@ -37,6 +37,8 @@ namespace aarch64_sve extern const function_base *const svadrd; extern const function_base *const svadrh; extern const function_base *const svadrw; + extern const function_base *const svamax; + extern const function_base *const svamin; extern const function_base *const svand; extern const function_base *const svandv; extern const function_base *const svasr; diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 43819adb48c..a496235db42 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -470,6 +470,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* Floating Point Absolute Maximum/Minimum extension instructions are enabled through +faminmax. */ #define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX) +#define TARGET_SVE_FAMINMAX (TARGET_SVE && TARGET_FAMINMAX) /* Prefer different predicate registers for the output of a predicated operation over re-using an existing input predicate. */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index c2fcd18306e..cf9ee2639a9 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -841,6 +841,8 @@ UNSPEC_COND_CMPNE_WIDE ; Used in aarch64-sve.md. UNSPEC_COND_FABS ; Used in aarch64-sve.md. UNSPEC_COND_FADD ; Used in aarch64-sve.md. + UNSPEC_COND_FAMAX ; Used in aarch64-sve.md. + UNSPEC_COND_FAMIN ; Used in aarch64-sve.md. UNSPEC_COND_FCADD90 ; Used in aarch64-sve.md. UNSPEC_COND_FCADD270 ; Used in aarch64-sve.md. UNSPEC_COND_FCMEQ ; Used in aarch64-sve.md. @@ -3081,15 +3083,18 @@ (define_int_iterator SVE_COND_FCVTI [UNSPEC_COND_FCVTZS UNSPEC_COND_FCVTZU]) (define_int_iterator SVE_COND_ICVTF [UNSPEC_COND_SCVTF UNSPEC_COND_UCVTF]) -(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD - UNSPEC_COND_FDIV - UNSPEC_COND_FMAX - UNSPEC_COND_FMAXNM - UNSPEC_COND_FMIN - UNSPEC_COND_FMINNM - UNSPEC_COND_FMUL - UNSPEC_COND_FMULX - UNSPEC_COND_FSUB]) +(define_int_iterator SVE_COND_FP_BINARY + [UNSPEC_COND_FADD + (UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX") + (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX") + UNSPEC_COND_FDIV + UNSPEC_COND_FMAX + UNSPEC_COND_FMAXNM + UNSPEC_COND_FMIN + UNSPEC_COND_FMINNM + UNSPEC_COND_FMUL + UNSPEC_COND_FMULX + UNSPEC_COND_FSUB]) ;; Same as SVE_COND_FP_BINARY, but without codes that have a dedicated ;; <optab><mode>3 expander. @@ -3114,8 +3119,11 @@ UNSPEC_COND_FMINNM UNSPEC_COND_FMUL]) -(define_int_iterator SVE_COND_FP_BINARY_REG [UNSPEC_COND_FDIV - UNSPEC_COND_FMULX]) +(define_int_iterator SVE_COND_FP_BINARY_REG + [(UNSPEC_COND_FAMAX "TARGET_SVE_FAMINMAX") + (UNSPEC_COND_FAMIN "TARGET_SVE_FAMINMAX") + UNSPEC_COND_FDIV + UNSPEC_COND_FMULX]) (define_int_iterator SVE_COND_FCADD [UNSPEC_COND_FCADD90 UNSPEC_COND_FCADD270]) @@ -3694,6 +3702,8 @@ (UNSPEC_ZIP2Q "zip2q") (UNSPEC_COND_FABS "abs") (UNSPEC_COND_FADD "add") + (UNSPEC_COND_FAMAX "famax") + (UNSPEC_COND_FAMIN "famin") (UNSPEC_COND_FCADD90 "cadd90") (UNSPEC_COND_FCADD270 "cadd270") (UNSPEC_COND_FCMLA "fcmla") @@ -4230,6 +4240,8 @@ (UNSPEC_FTSSEL "ftssel") (UNSPEC_COND_FABS "fabs") (UNSPEC_COND_FADD "fadd") + (UNSPEC_COND_FAMAX "famax") + (UNSPEC_COND_FAMIN "famin") (UNSPEC_COND_FCVTLT "fcvtlt") (UNSPEC_COND_FCVTX "fcvtx") (UNSPEC_COND_FDIV "fdiv") @@ -4254,6 +4266,8 @@ (UNSPEC_COND_FSUB "fsub")]) (define_int_attr sve_fp_op_rev [(UNSPEC_COND_FADD "fadd") + (UNSPEC_COND_FAMAX "famax") + (UNSPEC_COND_FAMIN "famin") (UNSPEC_COND_FDIV "fdivr") (UNSPEC_COND_FMAX "fmax") (UNSPEC_COND_FMAXNM "fmaxnm") @@ -4390,6 +4404,8 @@ ;; <optab><mode>3 pattern. (define_int_attr sve_pred_fp_rhs1_operand [(UNSPEC_COND_FADD "register_operand") + (UNSPEC_COND_FAMAX "register_operand") + (UNSPEC_COND_FAMIN "register_operand") (UNSPEC_COND_FDIV "register_operand") (UNSPEC_COND_FMAX "register_operand") (UNSPEC_COND_FMAXNM "register_operand") @@ -4403,6 +4419,8 @@ ;; <optab><mode>3 pattern. (define_int_attr sve_pred_fp_rhs2_operand [(UNSPEC_COND_FADD "aarch64_sve_float_arith_with_sub_operand") + (UNSPEC_COND_FAMAX "aarch64_sve_float_maxmin_operand") + (UNSPEC_COND_FAMIN "aarch64_sve_float_maxmin_operand") (UNSPEC_COND_FDIV "register_operand") (UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_operand") (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_operand") diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c new file mode 100644 index 00000000000..e5681a0733e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amax_f16_m_tied1: +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_m_tied1, svfloat16_t, + z0 = svamax_f16_m (p0, z0, z1), + z0 = svamax_m (p0, z0, z1)) +/* +** amax_f16_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famax z0\.h, p0/m, z0\.h, z31\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_m_tied2, svfloat16_t, + z0 = svamax_f16_m (p0, z1, z0), + z0 = svamax_m (p0, z1, z0)) +/* +** amax_f16_m_untied: +** movprfx z0, z1 +** famax z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_m_untied, svfloat16_t, + z0 = svamax_f16_m (p0, z1, z2), + z0 = svamax_m (p0, z1, z2)) +/* +** amax_f16_x_tied1: +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_x_tied1, svfloat16_t, + z0 = svamax_f16_x (p0, z0, z1), + z0 = svamax_x (p0, z0, z1)) +/* +** amax_f16_x_tied2: +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_x_tied2, svfloat16_t, + z0 = svamax_f16_x (p0, z1, z0), + z0 = svamax_x (p0, z1, z0)) +/* +** amax_f16_x_untied: +** movprfx z0, z1 +** famax z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_x_untied, svfloat16_t, + z0 = svamax_f16_x (p0, z1, z2), + z0 = svamax_x (p0, z1, z2)) +/* +** amax_f16_z_tied1: +** movprfx z0\.h, p0/z, z0\.h +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_z_tied1, svfloat16_t, + z0 = svamax_f16_z (p0, z0, z1), + z0 = svamax_z (p0, z0, z1)) +/* +** amax_f16_z_tied2: +** movprfx z0\.h, p0/z, z0\.h +** famax z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_z_tied2, svfloat16_t, + z0 = svamax_f16_z (p0, z1, z0), + z0 = svamax_z (p0, z1, z0)) +/* +** amax_f16_z_untied: +** movprfx z0\.h, p0/z, z1\.h +** famax z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amax_f16_z_untied, svfloat16_t, + z0 = svamax_f16_z (p0, z1, z2), + z0 = svamax_z (p0, z1, z2)) +/* +** amax_n_f16_m_tied1: +** mov z7\.h, h7 +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_m_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_m (p0, z0, d7), + z0 = svamax_m (p0, z0, d7)) +/* +** amax_n_f16_m_untied: +** mov z7\.h, h7 +** movprfx z0, z4 +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_m_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_m (p0, z4, d7), + z0 = svamax_m (p0, z4, d7)) +/* +** amax_n_f16_x_tied1: +** mov z7\.h, h7 +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_x_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_x (p0, z0, d7), + z0 = svamax_x (p0, z0, d7)) +/* +** amax_n_f16_x_untied: +** mov z0\.h, h7 +** famax z0\.h, p0/m, z0\.h, z4\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_x_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_x (p0, z4, d7), + z0 = svamax_x (p0, z4, d7)) +/* +** amax_n_f16_z_tied1: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z0\.h +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_z_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_z (p0, z0, d7), + z0 = svamax_z (p0, z0, d7)) +/* +** amax_n_f16_z_untied: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z4\.h +** famax z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amax_n_f16_z_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamax_n_f16_z (p0, z4, d7), + z0 = svamax_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c new file mode 100644 index 00000000000..ac6fd227b52 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amax_f32_m_tied1: +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_m_tied1, svfloat32_t, + z0 = svamax_f32_m (p0, z0, z1), + z0 = svamax_m (p0, z0, z1)) +/* +** amax_f32_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famax z0\.s, p0/m, z0\.s, z31\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_m_tied2, svfloat32_t, + z0 = svamax_f32_m (p0, z1, z0), + z0 = svamax_m (p0, z1, z0)) +/* +** amax_f32_m_untied: +** movprfx z0, z1 +** famax z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_m_untied, svfloat32_t, + z0 = svamax_f32_m (p0, z1, z2), + z0 = svamax_m (p0, z1, z2)) +/* +** amax_f32_x_tied1: +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_x_tied1, svfloat32_t, + z0 = svamax_f32_x (p0, z0, z1), + z0 = svamax_x (p0, z0, z1)) +/* +** amax_f32_x_tied2: +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_x_tied2, svfloat32_t, + z0 = svamax_f32_x (p0, z1, z0), + z0 = svamax_x (p0, z1, z0)) +/* +** amax_f32_x_untied: +** movprfx z0, z1 +** famax z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_x_untied, svfloat32_t, + z0 = svamax_f32_x (p0, z1, z2), + z0 = svamax_x (p0, z1, z2)) +/* +** amax_f32_z_tied1: +** movprfx z0\.s, p0/z, z0\.s +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_z_tied1, svfloat32_t, + z0 = svamax_f32_z (p0, z0, z1), + z0 = svamax_z (p0, z0, z1)) +/* +** amax_f32_z_tied2: +** movprfx z0\.s, p0/z, z0\.s +** famax z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_z_tied2, svfloat32_t, + z0 = svamax_f32_z (p0, z1, z0), + z0 = svamax_z (p0, z1, z0)) +/* +** amax_f32_z_untied: +** movprfx z0\.s, p0/z, z1\.s +** famax z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amax_f32_z_untied, svfloat32_t, + z0 = svamax_f32_z (p0, z1, z2), + z0 = svamax_z (p0, z1, z2)) +/* +** amax_n_f32_m_tied1: +** mov z7\.s, s7 +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_m_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_m (p0, z0, d7), + z0 = svamax_m (p0, z0, d7)) +/* +** amax_n_f32_m_untied: +** mov z7\.s, s7 +** movprfx z0, z4 +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_m_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_m (p0, z4, d7), + z0 = svamax_m (p0, z4, d7)) +/* +** amax_n_f32_x_tied1: +** mov z7\.s, s7 +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_x_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_x (p0, z0, d7), + z0 = svamax_x (p0, z0, d7)) +/* +** amax_n_f32_x_untied: +** mov z0\.s, s7 +** famax z0\.s, p0/m, z0\.s, z4\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_x_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_x (p0, z4, d7), + z0 = svamax_x (p0, z4, d7)) +/* +** amax_n_f32_z_tied1: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z0\.s +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_z_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_z (p0, z0, d7), + z0 = svamax_z (p0, z0, d7)) +/* +** amax_n_f32_z_untied: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z4\.s +** famax z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amax_n_f32_z_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamax_n_f32_z (p0, z4, d7), + z0 = svamax_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c new file mode 100644 index 00000000000..9e711674ea5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amax_f64_m_tied1: +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_m_tied1, svfloat64_t, + z0 = svamax_f64_m (p0, z0, z1), + z0 = svamax_m (p0, z0, z1)) +/* +** amax_f64_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famax z0\.d, p0/m, z0\.d, z31\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_m_tied2, svfloat64_t, + z0 = svamax_f64_m (p0, z1, z0), + z0 = svamax_m (p0, z1, z0)) +/* +** amax_f64_m_untied: +** movprfx z0, z1 +** famax z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_m_untied, svfloat64_t, + z0 = svamax_f64_m (p0, z1, z2), + z0 = svamax_m (p0, z1, z2)) +/* +** amax_f64_x_tied1: +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_x_tied1, svfloat64_t, + z0 = svamax_f64_x (p0, z0, z1), + z0 = svamax_x (p0, z0, z1)) +/* +** amax_f64_x_tied2: +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_x_tied2, svfloat64_t, + z0 = svamax_f64_x (p0, z1, z0), + z0 = svamax_x (p0, z1, z0)) +/* +** amax_f64_x_untied: +** movprfx z0, z1 +** famax z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_x_untied, svfloat64_t, + z0 = svamax_f64_x (p0, z1, z2), + z0 = svamax_x (p0, z1, z2)) +/* +** amax_f64_z_tied1: +** movprfx z0\.d, p0/z, z0\.d +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_z_tied1, svfloat64_t, + z0 = svamax_f64_z (p0, z0, z1), + z0 = svamax_z (p0, z0, z1)) +/* +** amax_f64_z_tied2: +** movprfx z0\.d, p0/z, z0\.d +** famax z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_z_tied2, svfloat64_t, + z0 = svamax_f64_z (p0, z1, z0), + z0 = svamax_z (p0, z1, z0)) +/* +** amax_f64_z_untied: +** movprfx z0\.d, p0/z, z1\.d +** famax z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amax_f64_z_untied, svfloat64_t, + z0 = svamax_f64_z (p0, z1, z2), + z0 = svamax_z (p0, z1, z2)) +/* +** amax_n_f64_m_tied1: +** mov z7\.d, d7 +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_m_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_m (p0, z0, d7), + z0 = svamax_m (p0, z0, d7)) +/* +** amax_n_f64_m_untied: +** mov z7\.d, d7 +** movprfx z0, z4 +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_m_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_m (p0, z4, d7), + z0 = svamax_m (p0, z4, d7)) +/* +** amax_n_f64_x_tied1: +** mov z7\.d, d7 +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_x_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_x (p0, z0, d7), + z0 = svamax_x (p0, z0, d7)) +/* +** amax_n_f64_x_untied: +** mov z0\.d, d7 +** famax z0\.d, p0/m, z0\.d, z4\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_x_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_x (p0, z4, d7), + z0 = svamax_x (p0, z4, d7)) +/* +** amax_n_f64_z_tied1: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z0\.d +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_z_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_z (p0, z0, d7), + z0 = svamax_z (p0, z0, d7)) +/* +** amax_n_f64_z_untied: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z4\.d +** famax z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amax_n_f64_z_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamax_n_f64_z (p0, z4, d7), + z0 = svamax_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c new file mode 100644 index 00000000000..3c949df023c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amin_f16_m_tied1: +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_m_tied1, svfloat16_t, + z0 = svamin_f16_m (p0, z0, z1), + z0 = svamin_m (p0, z0, z1)) +/* +** amin_f16_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famin z0\.h, p0/m, z0\.h, z31\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_m_tied2, svfloat16_t, + z0 = svamin_f16_m (p0, z1, z0), + z0 = svamin_m (p0, z1, z0)) +/* +** amin_f16_m_untied: +** movprfx z0, z1 +** famin z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_m_untied, svfloat16_t, + z0 = svamin_f16_m (p0, z1, z2), + z0 = svamin_m (p0, z1, z2)) +/* +** amin_f16_x_tied1: +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_x_tied1, svfloat16_t, + z0 = svamin_f16_x (p0, z0, z1), + z0 = svamin_x (p0, z0, z1)) +/* +** amin_f16_x_tied2: +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_x_tied2, svfloat16_t, + z0 = svamin_f16_x (p0, z1, z0), + z0 = svamin_x (p0, z1, z0)) +/* +** amin_f16_x_untied: +** movprfx z0, z1 +** famin z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_x_untied, svfloat16_t, + z0 = svamin_f16_x (p0, z1, z2), + z0 = svamin_x (p0, z1, z2)) +/* +** amin_f16_z_tied1: +** movprfx z0\.h, p0/z, z0\.h +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_z_tied1, svfloat16_t, + z0 = svamin_f16_z (p0, z0, z1), + z0 = svamin_z (p0, z0, z1)) +/* +** amin_f16_z_tied2: +** movprfx z0\.h, p0/z, z0\.h +** famin z0\.h, p0/m, z0\.h, z1\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_z_tied2, svfloat16_t, + z0 = svamin_f16_z (p0, z1, z0), + z0 = svamin_z (p0, z1, z0)) +/* +** amin_f16_z_untied: +** movprfx z0\.h, p0/z, z1\.h +** famin z0\.h, p0/m, z0\.h, z2\.h +** ret +*/ +TEST_UNIFORM_Z (amin_f16_z_untied, svfloat16_t, + z0 = svamin_f16_z (p0, z1, z2), + z0 = svamin_z (p0, z1, z2)) +/* +** amin_n_f16_m_tied1: +** mov z7\.h, h7 +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_m_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_m (p0, z0, d7), + z0 = svamin_m (p0, z0, d7)) +/* +** amin_n_f16_m_untied: +** mov z7\.h, h7 +** movprfx z0, z4 +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_m_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_m (p0, z4, d7), + z0 = svamin_m (p0, z4, d7)) +/* +** amin_n_f16_x_tied1: +** mov z7\.h, h7 +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_x_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_x (p0, z0, d7), + z0 = svamin_x (p0, z0, d7)) +/* +** amin_n_f16_x_untied: +** mov z0\.h, h7 +** famin z0\.h, p0/m, z0\.h, z4\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_x_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_x (p0, z4, d7), + z0 = svamin_x (p0, z4, d7)) +/* +** amin_n_f16_z_tied1: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z0\.h +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_z_tied1, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_z (p0, z0, d7), + z0 = svamin_z (p0, z0, d7)) +/* +** amin_n_f16_z_untied: +** mov z7\.h, h7 +** movprfx z0\.h, p0/z, z4\.h +** famin z0\.h, p0/m, z0\.h, z7\.h +** ret +*/ +TEST_DUAL_ZD (amin_n_f16_z_untied, svfloat16_t, svfloat16_t, float16_t, + z0 = svamin_n_f16_z (p0, z4, d7), + z0 = svamin_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c new file mode 100644 index 00000000000..b606c448ea6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amin_f32_m_tied1: +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_m_tied1, svfloat32_t, + z0 = svamin_f32_m (p0, z0, z1), + z0 = svamin_m (p0, z0, z1)) +/* +** amin_f32_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famin z0\.s, p0/m, z0\.s, z31\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_m_tied2, svfloat32_t, + z0 = svamin_f32_m (p0, z1, z0), + z0 = svamin_m (p0, z1, z0)) +/* +** amin_f32_m_untied: +** movprfx z0, z1 +** famin z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_m_untied, svfloat32_t, + z0 = svamin_f32_m (p0, z1, z2), + z0 = svamin_m (p0, z1, z2)) +/* +** amin_f32_x_tied1: +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_x_tied1, svfloat32_t, + z0 = svamin_f32_x (p0, z0, z1), + z0 = svamin_x (p0, z0, z1)) +/* +** amin_f32_x_tied2: +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_x_tied2, svfloat32_t, + z0 = svamin_f32_x (p0, z1, z0), + z0 = svamin_x (p0, z1, z0)) +/* +** amin_f32_x_untied: +** movprfx z0, z1 +** famin z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_x_untied, svfloat32_t, + z0 = svamin_f32_x (p0, z1, z2), + z0 = svamin_x (p0, z1, z2)) +/* +** amin_f32_z_tied1: +** movprfx z0\.s, p0/z, z0\.s +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_z_tied1, svfloat32_t, + z0 = svamin_f32_z (p0, z0, z1), + z0 = svamin_z (p0, z0, z1)) +/* +** amin_f32_z_tied2: +** movprfx z0\.s, p0/z, z0\.s +** famin z0\.s, p0/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_z_tied2, svfloat32_t, + z0 = svamin_f32_z (p0, z1, z0), + z0 = svamin_z (p0, z1, z0)) +/* +** amin_f32_z_untied: +** movprfx z0\.s, p0/z, z1\.s +** famin z0\.s, p0/m, z0\.s, z2\.s +** ret +*/ +TEST_UNIFORM_Z (amin_f32_z_untied, svfloat32_t, + z0 = svamin_f32_z (p0, z1, z2), + z0 = svamin_z (p0, z1, z2)) +/* +** amin_n_f32_m_tied1: +** mov z7\.s, s7 +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_m_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_m (p0, z0, d7), + z0 = svamin_m (p0, z0, d7)) +/* +** amin_n_f32_m_untied: +** mov z7\.s, s7 +** movprfx z0, z4 +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_m_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_m (p0, z4, d7), + z0 = svamin_m (p0, z4, d7)) +/* +** amin_n_f32_x_tied1: +** mov z7\.s, s7 +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_x_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_x (p0, z0, d7), + z0 = svamin_x (p0, z0, d7)) +/* +** amin_n_f32_x_untied: +** mov z0\.s, s7 +** famin z0\.s, p0/m, z0\.s, z4\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_x_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_x (p0, z4, d7), + z0 = svamin_x (p0, z4, d7)) +/* +** amin_n_f32_z_tied1: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z0\.s +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_z_tied1, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_z (p0, z0, d7), + z0 = svamin_z (p0, z0, d7)) +/* +** amin_n_f32_z_untied: +** mov z7\.s, s7 +** movprfx z0\.s, p0/z, z4\.s +** famin z0\.s, p0/m, z0\.s, z7\.s +** ret +*/ +TEST_DUAL_ZD (amin_n_f32_z_untied, svfloat32_t, svfloat32_t, float32_t, + z0 = svamin_n_f32_z (p0, z4, d7), + z0 = svamin_z (p0, z4, d7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c new file mode 100644 index 00000000000..d91b7200c18 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ +#include "test_sve_acle.h" + +#pragma GCC target "+sve+faminmax" + +/* +** amin_f64_m_tied1: +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_m_tied1, svfloat64_t, + z0 = svamin_f64_m (p0, z0, z1), + z0 = svamin_m (p0, z0, z1)) +/* +** amin_f64_m_tied2: +** mov z31\.d, z0\.d +** movprfx z0, z1 +** famin z0\.d, p0/m, z0\.d, z31\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_m_tied2, svfloat64_t, + z0 = svamin_f64_m (p0, z1, z0), + z0 = svamin_m (p0, z1, z0)) +/* +** amin_f64_m_untied: +** movprfx z0, z1 +** famin z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_m_untied, svfloat64_t, + z0 = svamin_f64_m (p0, z1, z2), + z0 = svamin_m (p0, z1, z2)) +/* +** amin_f64_x_tied1: +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_x_tied1, svfloat64_t, + z0 = svamin_f64_x (p0, z0, z1), + z0 = svamin_x (p0, z0, z1)) +/* +** amin_f64_x_tied2: +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_x_tied2, svfloat64_t, + z0 = svamin_f64_x (p0, z1, z0), + z0 = svamin_x (p0, z1, z0)) +/* +** amin_f64_x_untied: +** movprfx z0, z1 +** famin z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_x_untied, svfloat64_t, + z0 = svamin_f64_x (p0, z1, z2), + z0 = svamin_x (p0, z1, z2)) +/* +** amin_f64_z_tied1: +** movprfx z0\.d, p0/z, z0\.d +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_z_tied1, svfloat64_t, + z0 = svamin_f64_z (p0, z0, z1), + z0 = svamin_z (p0, z0, z1)) +/* +** amin_f64_z_tied2: +** movprfx z0\.d, p0/z, z0\.d +** famin z0\.d, p0/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_z_tied2, svfloat64_t, + z0 = svamin_f64_z (p0, z1, z0), + z0 = svamin_z (p0, z1, z0)) +/* +** amin_f64_z_untied: +** movprfx z0\.d, p0/z, z1\.d +** famin z0\.d, p0/m, z0\.d, z2\.d +** ret +*/ +TEST_UNIFORM_Z (amin_f64_z_untied, svfloat64_t, + z0 = svamin_f64_z (p0, z1, z2), + z0 = svamin_z (p0, z1, z2)) +/* +** amin_n_f64_m_tied1: +** mov z7\.d, d7 +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_m_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_m (p0, z0, d7), + z0 = svamin_m (p0, z0, d7)) +/* +** amin_n_f64_m_untied: +** mov z7\.d, d7 +** movprfx z0, z4 +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_m_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_m (p0, z4, d7), + z0 = svamin_m (p0, z4, d7)) +/* +** amin_n_f64_x_tied1: +** mov z7\.d, d7 +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_x_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_x (p0, z0, d7), + z0 = svamin_x (p0, z0, d7)) +/* +** amin_n_f64_x_untied: +** mov z0\.d, d7 +** famin z0\.d, p0/m, z0\.d, z4\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_x_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_x (p0, z4, d7), + z0 = svamin_x (p0, z4, d7)) +/* +** amin_n_f64_z_tied1: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z0\.d +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_z_tied1, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_z (p0, z0, d7), + z0 = svamin_z (p0, z0, d7)) +/* +** amin_n_f64_z_untied: +** mov z7\.d, d7 +** movprfx z0\.d, p0/z, z4\.d +** famin z0\.d, p0/m, z0\.d, z7\.d +** ret +*/ +TEST_DUAL_ZD (amin_n_f64_z_untied, svfloat64_t, svfloat64_t, float64_t, + z0 = svamin_n_f64_z (p0, z4, d7), + z0 = svamin_z (p0, z4, d7))