Message ID | 20240830111626.70300-2-saurabh.jha@arm.com |
---|---|
State | New |
Headers | show |
Series | aarch64: Add support for AdvSIMD faminmax | expand |
<saurabh.jha@arm.com> writes: > The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and > mandatory from Armv9.5-a. It introduces instructions for computing the > floating point absolute maximum and minimum of the two vectors element-wise. > > This patch introduces AdvSIMD faminmax intrinsics. The intrinsics of > this extension are implemented as the following builtin functions: > * vamax_f16 > * vamaxq_f16 > * vamax_f32 > * vamaxq_f32 > * vamaxq_f64 > * vamin_f16 > * vaminq_f16 > * vamin_f32 > * vaminq_f32 > * vaminq_f64 > > We are defining a new way to add AArch64 AdvSIMD intrinsics by listing > all the intrinsics in a .def file and then using that .def file to > initialise various data structures. This would lead to more concise code > and easier addition of the new AdvSIMD intrinsics in future. > > The faminmax intrinsics are defined using the new approach. Thanks, this is looking almost ready from my POV, but some comments below. > > gcc/ChangeLog: > > * config/aarch64/aarch64-builtins.cc > (ENTRY): Macro to parse the contents of > aarch64-simd-pragma-builtins.def. > (enum aarch64_builtins): New enum values for faminmax builtins > via aarch64-simd-pragma-builtins.def. > (struct aarch64_pragma_builtins_data): Struct to hold data from > aarch64-simd-pragma-builtins.def. > (aarch64_init_pragma_builtins): New function to define pragma builtins. > (aarch64_get_pragma_builtin): New function to get a row of > aarch64_pragma_builtins, given code. > (handle_arm_neon_h): Modify to call > aarch64_init_pragma_builtins. > (aarch64_general_check_builtin_call): Modify to check whether > required flag is being used for pragma builtins. > (aarch64_expand_pragma_builtin): New function to emit > instructions of pragma builtins. > (aarch64_general_expand_builtin): Modify to call > aarch64_expand_pragma_builtin. > * config/aarch64/aarch64-option-extensions.def > (AARCH64_OPT_EXTENSION): Introduce new flag for this > extension. > * config/aarch64/aarch64-simd.md > (@aarch64_<faminmax_uns_op><mode>): Instruction pattern for > faminmax intrinsics. > * config/aarch64/aarch64.h > (TARGET_FAMINMAX): Introduce new flag for this extension. > * config/aarch64/iterators.md: New iterators and unspecs. > * config/arm/types.md: Introduce neon_fp_aminmax<q> attributes. > * doc/invoke.texi: Document extension in AArch64 Options. > * config/aarch64/aarch64-simd-pragma-builtins.def: New file to > list pragma builtins. (It looks like contrib/gcc-changelog/git_check_commit.py is going to complain about the formatting at commit time, due to the mixture of spaces & tabs.) > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/simd/faminmax-builtins-no-flag.c: New test. > * gcc.target/aarch64/simd/faminmax-builtins.c: New test. > --- > gcc/config/aarch64/aarch64-builtins.cc | 84 +++++++++++++ > .../aarch64/aarch64-option-extensions.def | 2 + > .../aarch64/aarch64-simd-pragma-builtins.def | 31 +++++ > gcc/config/aarch64/aarch64-simd.md | 11 ++ > gcc/config/aarch64/aarch64.h | 4 + > gcc/config/aarch64/iterators.md | 9 ++ > gcc/config/arm/types.md | 6 + > gcc/doc/invoke.texi | 2 + > .../aarch64/simd/faminmax-builtins-no-flag.c | 10 ++ > .../aarch64/simd/faminmax-builtins.c | 115 ++++++++++++++++++ > 10 files changed, 274 insertions(+) > create mode 100644 gcc/config/aarch64/aarch64-simd-pragma-builtins.def > create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc > index eb878b933fe..a4905dd0aae 100644 > --- a/gcc/config/aarch64/aarch64-builtins.cc > +++ b/gcc/config/aarch64/aarch64-builtins.cc > @@ -757,6 +757,10 @@ typedef struct > #define VAR1(T, N, MAP, FLAG, A) \ > AARCH64_SIMD_BUILTIN_##T##_##N##A, > > +#undef ENTRY > +#define ENTRY(N, M, U, F) \ > + AARCH64_##N, > + > enum aarch64_builtins > { > AARCH64_BUILTIN_MIN, > @@ -829,6 +833,10 @@ enum aarch64_builtins > AARCH64_RBIT, > AARCH64_RBITL, > AARCH64_RBITLL, > + /* Pragma builtins. */ > + AARCH64_PRAGMA_BUILTIN_START, > +#include "aarch64-simd-pragma-builtins.def" > + AARCH64_PRAGMA_BUILTIN_END, > /* System register builtins. */ > AARCH64_RSR, > AARCH64_RSRP, > @@ -947,6 +955,7 @@ const char *aarch64_scalar_builtin_types[] = { > > extern GTY(()) aarch64_simd_type_info aarch64_simd_types[]; > > +#undef ENTRY > #define ENTRY(E, M, Q, G) \ > {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q}, > struct aarch64_simd_type_info aarch64_simd_types [] = { > @@ -1547,6 +1556,50 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma) > } > } > > +/* Initialize pragma builtins. */ It might be better to put this directly above aarch64_init_pragma_builtins. The stuff in between is shared between several routines. > + > +struct aarch64_pragma_builtins_data > +{ > + const char *name; > + machine_mode mode; > + int unspec; > + aarch64_feature_flags required_extensions; > +}; > + > +#undef ENTRY > +#define ENTRY(N, M, U, F) \ > + {#N, E_##M##mode, U, F}, > + > +static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = { > +#include "aarch64-simd-pragma-builtins.def" > +}; > + > +static void > +aarch64_init_pragma_builtins () > +{ > + for (size_t i = 0; i < ARRAY_SIZE (aarch64_pragma_builtins); ++i) > + { > + auto data = aarch64_pragma_builtins[i]; > + auto type = aarch64_simd_builtin_type (data.mode, qualifier_none); > + auto fntype = build_function_type_list (type, type, type, NULL_TREE); > + auto code = AARCH64_PRAGMA_BUILTIN_START + i + 1; > + const char *name = data.name; Guess this is personal preference, sorry, but: I think it would be easier to read without this temporary "name" variable. > + aarch64_builtin_decls[code] > + = aarch64_general_simulate_builtin (name, fntype, code); > + } > +} > + > +static const aarch64_pragma_builtins_data * > +aarch64_get_pragma_builtin (int code) The function needs a comment, maybe: /* If the builtin function with code CODE has an entry in aarch64_pragma_builtins, return its entry, otherwise return null. */ > +{ > + if (!(code > AARCH64_PRAGMA_BUILTIN_START > + && code < AARCH64_PRAGMA_BUILTIN_END)) > + return NULL; > + > + auto idx = code - (AARCH64_PRAGMA_BUILTIN_START + 1); > + return &aarch64_pragma_builtins[idx]; > +} > + > /* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type > indexed by TYPE_INDEX. */ > static void > @@ -1640,6 +1693,7 @@ handle_arm_neon_h (void) > > aarch64_init_simd_builtin_functions (true); > aarch64_init_simd_intrinsics (); > + aarch64_init_pragma_builtins (); > } > > static void > @@ -2326,6 +2380,12 @@ aarch64_general_check_builtin_call (location_t location, vec<location_t>, > return aarch64_check_required_extensions (location, decl, > AARCH64_FL_MEMTAG); > > + if (auto builtin_data = aarch64_get_pragma_builtin (code)) > + { > + auto flags = builtin_data->required_extensions; > + return aarch64_check_required_extensions (location, decl, flags); > + } > + > return true; > } > > @@ -3189,6 +3249,27 @@ aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) > return ops[0].value; > } > > +static rtx > +aarch64_expand_pragma_builtin (unsigned int fcode, tree exp, rtx target) I think we should pass the builtin data as an argument here: static rtx aarch64_expand_pragma_builtin (unsigned int fcode, tree exp, rtx target, const aarch64_pragma_builtins_data &builtin_data) > +{ > + auto builtins_data > + = aarch64_pragma_builtins[fcode - (AARCH64_PRAGMA_BUILTIN_START + 1)]; > + > + expand_operand ops[3]; > + auto mode = builtins_data.mode; > + auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0)); > + auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1)); > + create_output_operand (&ops[0], target, mode); > + create_input_operand (&ops[1], op1, mode); > + create_input_operand (&ops[2], op2, mode); > + > + auto unspec = builtins_data.unspec; > + auto icode = code_for_aarch64 (unspec, mode); > + expand_insn (icode, 3, ops); > + > + return target; > +} > + > /* Expand an expression EXP as fpsr or fpcr setter (depending on > UNSPEC) using MODE. */ > static void > @@ -3368,6 +3449,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, > if (fcode >= AARCH64_REV16 > && fcode <= AARCH64_RBITLL) > return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); > + if (fcode > AARCH64_PRAGMA_BUILTIN_START > + && fcode < AARCH64_PRAGMA_BUILTIN_END) > + return aarch64_expand_pragma_builtin (fcode, exp, target); ...and then make this: if (auto builtin_data = aarch64_get_pragma_builtin (code)) return aarch64_expand_pragma_builtin (fcode, exp, target, builtin_data); > > gcc_unreachable (); > } > diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def > index 6998627f377..8279f5a76ea 100644 > --- a/gcc/config/aarch64/aarch64-option-extensions.def > +++ b/gcc/config/aarch64/aarch64-option-extensions.def > @@ -234,6 +234,8 @@ AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") > > AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8") > > +AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax") > + > #undef AARCH64_OPT_FMV_EXTENSION > #undef AARCH64_OPT_EXTENSION > #undef AARCH64_FMV_FEATURE > diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def > new file mode 100644 > index 00000000000..be7029c4424 > --- /dev/null > +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def > @@ -0,0 +1,31 @@ > +/* AArch64 SIMD pragma builtins > + Copyright (C) 2024 Free Software Foundation, Inc. > + Contributed by ARM Ltd. > + > + This file is part of GCC. > + > + GCC is free software; you can redistribute it and/or modify it > + under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3, or (at your option) > + any later version. > + > + GCC is distributed in the hope that it will be useful, but > + WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with GCC; see the file COPYING3. If not see > + <http://www.gnu.org/licenses/>. */ > + > + // faminmax > + ENTRY (vamax_f16, V4HF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) > + ENTRY (vamaxq_f16, V8HF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) > + ENTRY (vamax_f32, V2SF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) > + ENTRY (vamaxq_f32, V4SF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) > + ENTRY (vamaxq_f64, V2DF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) > + ENTRY (vamin_f16, V4HF, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX) > + ENTRY (vaminq_f16, V8HF, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX) > + ENTRY (vamin_f32, V2SF, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX) > + ENTRY (vaminq_f32, V4SF, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX) > + ENTRY (vaminq_f64, V2DF, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX) Sorry for not noticing last time, but: no need to indent the ENTRY lines. I was originally going to treat this as a follow-on, but since there'll be another round anyway, could you add an extra argument to ENTRY for the type signature. Maybe immediately after the name. Thus: ENTRY (vamax_f16, binary, V4HF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) Then define something like: static tree aarch64_fntype_binary (const aarch64_pragma_builtins_data &builtin_data) that returns the fntype calculated above. aarch64_pragma_builtins_data would then store this function pointer, and aarch64_init_pragma_builtins would call it to get the function type. Also, I think the ENTRY pattern above is likely to be relatively common, so how about adding a macro to generate all forms programmatically? #define ENTRY_VHSDF(NAME, TYPE, UNSPEC, EXTENSIONS) \ ENTRY (NAME##_f16, binary, V4HF, UNSPEC, EXTENSIONS) \ ENTRY (NAME##q_f16, binary, V8HF, UNSPEC, EXTENSIONS) \ ENTRY (NAME##_f32, binary, V2SF, UNSPEC, EXTENSIONS) \ ENTRY (NAME##q_f32, binary, V4SF, UNSPEC, EXTENSIONS) \ ENTRY (NAME##q_f64, binary, V2DF, UNSPEC, EXTENSIONS) with: #undef ENTRY_VHSDF at the end of the file. At some point we might want to generate things like ENTRY_VHSDF automatically, as for aarch64-builtin-iterators.h, but that's definitely follow-on work. Thanks, Richard > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index 23c03a96371..7542c81ed91 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -9910,3 +9910,14 @@ > "shl\\t%d0, %d1, #16" > [(set_attr "type" "neon_shift_imm")] > ) > + > +;; faminmax > +(define_insn "@aarch64_<faminmax_uns_op><mode>" > + [(set (match_operand:VHSDF 0 "register_operand" "=w") > + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") > + (match_operand:VHSDF 2 "register_operand" "w")] > + FAMINMAX_UNS))] > + "TARGET_FAMINMAX" > + "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" > + [(set_attr "type" "neon_fp_aminmax<q>")] > +) > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index 2dfb999bea5..de14f57071a 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -457,6 +457,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED > enabled through +gcs. */ > #define TARGET_GCS AARCH64_HAVE_ISA (GCS) > > +/* Floating Point Absolute Maximum/Minimum extension instructions are > + enabled through +faminmax. */ > +#define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX) > + > /* Prefer different predicate registers for the output of a predicated > operation over re-using an existing input predicate. */ > #define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index 20a318e023b..17ac5e073aa 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -1057,6 +1057,8 @@ > UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. > UNSPEC_BFCVT ; Used in aarch64-simd.md. > UNSPEC_FCVTXN ; Used in aarch64-simd.md. > + UNSPEC_FAMAX ; Used in aarch64-simd.md. > + UNSPEC_FAMIN ; Used in aarch64-simd.md. > > ;; All used in aarch64-sve2.md > UNSPEC_FCVTN > @@ -4463,3 +4465,10 @@ > (UNSPECV_SET_FPCR "fpcr")]) > > (define_int_attr bits_etype [(8 "b") (16 "h") (32 "s") (64 "d")]) > + > +;; Iterators and attributes for faminmax > + > +(define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN]) > + > +(define_int_attr faminmax_uns_op > + [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")]) > diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md > index 9527bdb9e87..d8de9dbc9d1 100644 > --- a/gcc/config/arm/types.md > +++ b/gcc/config/arm/types.md > @@ -492,6 +492,8 @@ > ; neon_fp_reduc_minmax_s_q > ; neon_fp_reduc_minmax_d > ; neon_fp_reduc_minmax_d_q > +; neon_fp_aminmax > +; neon_fp_aminmax_q > ; neon_fp_cvt_narrow_s_q > ; neon_fp_cvt_narrow_d_q > ; neon_fp_cvt_widen_h > @@ -1044,6 +1046,8 @@ > neon_fp_reduc_minmax_d,\ > neon_fp_reduc_minmax_d_q,\ > \ > + neon_fp_aminmax,\ > + neon_fp_aminmax_q,\ > neon_fp_cvt_narrow_s_q,\ > neon_fp_cvt_narrow_d_q,\ > neon_fp_cvt_widen_h,\ > @@ -1264,6 +1268,8 @@ > neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s, > neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\ > neon_fp_reduc_minmax_d_q,\ > + neon_fp_aminmax, neon_fp_aminmax_q,\ > + neon_fp_aminmax, neon_fp_aminmax_q,\ > neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\ > neon_fp_cvt_widen_h, neon_fp_cvt_widen_s, neon_fp_to_int_s,\ > neon_fp_to_int_s_q, neon_int_to_fp_s, neon_int_to_fp_s_q,\ > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 32b772d2a8a..2c509f62d98 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -21865,6 +21865,8 @@ Enable support for Armv8.9-a/9.4-a translation hardening extension. > Enable the RCpc3 (Release Consistency) extension. > @item fp8 > Enable the fp8 (8-bit floating point) extension. > +@item faminmax > +Enable the Floating Point Absolute Maximum/Minimum extension. > > @end table > > diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c > new file mode 100644 > index 00000000000..63ed1508c23 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c > @@ -0,0 +1,10 @@ > +/* { dg-do assemble} */ > +/* { dg-additional-options "-march=armv9-a" } */ > + > +#include "arm_neon.h" > + > +void > +test (float32x4_t a, float32x4_t b) > +{ > + vamaxq_f32 (a, b); /* { dg-error {ACLE function 'vamaxq_f32' requires ISA extension 'faminmax'} } */ > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c > new file mode 100644 > index 00000000000..7e4f3eba81a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c > @@ -0,0 +1,115 @@ > +/* { dg-do assemble} */ > +/* { dg-additional-options "-O3 -march=armv9-a+faminmax" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include "arm_neon.h" > + > +/* > +** test_vamax_f16: > +** famax v0.4h, v0.4h, v1.4h > +** ret > +*/ > +float16x4_t > +test_vamax_f16 (float16x4_t a, float16x4_t b) > +{ > + return vamax_f16 (a, b); > +} > + > +/* > +** test_vamaxq_f16: > +** famax v0.8h, v0.8h, v1.8h > +** ret > +*/ > +float16x8_t > +test_vamaxq_f16 (float16x8_t a, float16x8_t b) > +{ > + return vamaxq_f16 (a, b); > +} > + > +/* > +** test_vamax_f32: > +** famax v0.2s, v0.2s, v1.2s > +** ret > +*/ > +float32x2_t > +test_vamax_f32 (float32x2_t a, float32x2_t b) > +{ > + return vamax_f32 (a, b); > +} > + > +/* > +** test_vamaxq_f32: > +** famax v0.4s, v0.4s, v1.4s > +** ret > +*/ > +float32x4_t > +test_vamaxq_f32 (float32x4_t a, float32x4_t b) > +{ > + return vamaxq_f32 (a, b); > +} > + > +/* > +** test_vamaxq_f64: > +** famax v0.2d, v0.2d, v1.2d > +** ret > +*/ > +float64x2_t > +test_vamaxq_f64 (float64x2_t a, float64x2_t b) > +{ > + return vamaxq_f64 (a, b); > +} > + > +/* > +** test_vamin_f16: > +** famin v0.4h, v0.4h, v1.4h > +** ret > +*/ > +float16x4_t > +test_vamin_f16 (float16x4_t a, float16x4_t b) > +{ > + return vamin_f16 (a, b); > +} > + > +/* > +** test_vaminq_f16: > +** famin v0.8h, v0.8h, v1.8h > +** ret > +*/ > +float16x8_t > +test_vaminq_f16 (float16x8_t a, float16x8_t b) > +{ > + return vaminq_f16 (a, b); > +} > + > +/* > +** test_vamin_f32: > +** famin v0.2s, v0.2s, v1.2s > +** ret > +*/ > +float32x2_t > +test_vamin_f32 (float32x2_t a, float32x2_t b) > +{ > + return vamin_f32 (a, b); > +} > + > +/* > +** test_vaminq_f32: > +** famin v0.4s, v0.4s, v1.4s > +** ret > +*/ > +float32x4_t > +test_vaminq_f32 (float32x4_t a, float32x4_t b) > +{ > + return vaminq_f32 (a, b); > +} > + > +/* > +** test_vaminq_f64: > +** famin v0.2d, v0.2d, v1.2d > +** ret > +*/ > +float64x2_t > +test_vaminq_f64 (float64x2_t a, float64x2_t b) > +{ > + return vaminq_f64 (a, b); > +}
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index eb878b933fe..a4905dd0aae 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -757,6 +757,10 @@ typedef struct #define VAR1(T, N, MAP, FLAG, A) \ AARCH64_SIMD_BUILTIN_##T##_##N##A, +#undef ENTRY +#define ENTRY(N, M, U, F) \ + AARCH64_##N, + enum aarch64_builtins { AARCH64_BUILTIN_MIN, @@ -829,6 +833,10 @@ enum aarch64_builtins AARCH64_RBIT, AARCH64_RBITL, AARCH64_RBITLL, + /* Pragma builtins. */ + AARCH64_PRAGMA_BUILTIN_START, +#include "aarch64-simd-pragma-builtins.def" + AARCH64_PRAGMA_BUILTIN_END, /* System register builtins. */ AARCH64_RSR, AARCH64_RSRP, @@ -947,6 +955,7 @@ const char *aarch64_scalar_builtin_types[] = { extern GTY(()) aarch64_simd_type_info aarch64_simd_types[]; +#undef ENTRY #define ENTRY(E, M, Q, G) \ {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q}, struct aarch64_simd_type_info aarch64_simd_types [] = { @@ -1547,6 +1556,50 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma) } } +/* Initialize pragma builtins. */ + +struct aarch64_pragma_builtins_data +{ + const char *name; + machine_mode mode; + int unspec; + aarch64_feature_flags required_extensions; +}; + +#undef ENTRY +#define ENTRY(N, M, U, F) \ + {#N, E_##M##mode, U, F}, + +static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = { +#include "aarch64-simd-pragma-builtins.def" +}; + +static void +aarch64_init_pragma_builtins () +{ + for (size_t i = 0; i < ARRAY_SIZE (aarch64_pragma_builtins); ++i) + { + auto data = aarch64_pragma_builtins[i]; + auto type = aarch64_simd_builtin_type (data.mode, qualifier_none); + auto fntype = build_function_type_list (type, type, type, NULL_TREE); + auto code = AARCH64_PRAGMA_BUILTIN_START + i + 1; + const char *name = data.name; + aarch64_builtin_decls[code] + = aarch64_general_simulate_builtin (name, fntype, code); + } +} + +static const aarch64_pragma_builtins_data * +aarch64_get_pragma_builtin (int code) +{ + if (!(code > AARCH64_PRAGMA_BUILTIN_START + && code < AARCH64_PRAGMA_BUILTIN_END)) + return NULL; + + auto idx = code - (AARCH64_PRAGMA_BUILTIN_START + 1); + return &aarch64_pragma_builtins[idx]; +} + /* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type indexed by TYPE_INDEX. */ static void @@ -1640,6 +1693,7 @@ handle_arm_neon_h (void) aarch64_init_simd_builtin_functions (true); aarch64_init_simd_intrinsics (); + aarch64_init_pragma_builtins (); } static void @@ -2326,6 +2380,12 @@ aarch64_general_check_builtin_call (location_t location, vec<location_t>, return aarch64_check_required_extensions (location, decl, AARCH64_FL_MEMTAG); + if (auto builtin_data = aarch64_get_pragma_builtin (code)) + { + auto flags = builtin_data->required_extensions; + return aarch64_check_required_extensions (location, decl, flags); + } + return true; } @@ -3189,6 +3249,27 @@ aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) return ops[0].value; } +static rtx +aarch64_expand_pragma_builtin (unsigned int fcode, tree exp, rtx target) +{ + auto builtins_data + = aarch64_pragma_builtins[fcode - (AARCH64_PRAGMA_BUILTIN_START + 1)]; + + expand_operand ops[3]; + auto mode = builtins_data.mode; + auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0)); + auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1)); + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], op1, mode); + create_input_operand (&ops[2], op2, mode); + + auto unspec = builtins_data.unspec; + auto icode = code_for_aarch64 (unspec, mode); + expand_insn (icode, 3, ops); + + return target; +} + /* Expand an expression EXP as fpsr or fpcr setter (depending on UNSPEC) using MODE. */ static void @@ -3368,6 +3449,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, if (fcode >= AARCH64_REV16 && fcode <= AARCH64_RBITLL) return aarch64_expand_builtin_data_intrinsic (fcode, exp, target); + if (fcode > AARCH64_PRAGMA_BUILTIN_START + && fcode < AARCH64_PRAGMA_BUILTIN_END) + return aarch64_expand_pragma_builtin (fcode, exp, target); gcc_unreachable (); } diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 6998627f377..8279f5a76ea 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -234,6 +234,8 @@ AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8") +AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax") + #undef AARCH64_OPT_FMV_EXTENSION #undef AARCH64_OPT_EXTENSION #undef AARCH64_FMV_FEATURE diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def new file mode 100644 index 00000000000..be7029c4424 --- /dev/null +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -0,0 +1,31 @@ +/* AArch64 SIMD pragma builtins + Copyright (C) 2024 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + + // faminmax + ENTRY (vamax_f16, V4HF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) + ENTRY (vamaxq_f16, V8HF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) + ENTRY (vamax_f32, V2SF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) + ENTRY (vamaxq_f32, V4SF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) + ENTRY (vamaxq_f64, V2DF, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX) + ENTRY (vamin_f16, V4HF, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX) + ENTRY (vaminq_f16, V8HF, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX) + ENTRY (vamin_f32, V2SF, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX) + ENTRY (vaminq_f32, V4SF, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX) + ENTRY (vaminq_f64, V2DF, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 23c03a96371..7542c81ed91 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9910,3 +9910,14 @@ "shl\\t%d0, %d1, #16" [(set_attr "type" "neon_shift_imm")] ) + +;; faminmax +(define_insn "@aarch64_<faminmax_uns_op><mode>" + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") + (match_operand:VHSDF 2 "register_operand" "w")] + FAMINMAX_UNS))] + "TARGET_FAMINMAX" + "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_fp_aminmax<q>")] +) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 2dfb999bea5..de14f57071a 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -457,6 +457,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED enabled through +gcs. */ #define TARGET_GCS AARCH64_HAVE_ISA (GCS) +/* Floating Point Absolute Maximum/Minimum extension instructions are + enabled through +faminmax. */ +#define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX) + /* Prefer different predicate registers for the output of a predicated operation over re-using an existing input predicate. */ #define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 20a318e023b..17ac5e073aa 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1057,6 +1057,8 @@ UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. UNSPEC_BFCVT ; Used in aarch64-simd.md. UNSPEC_FCVTXN ; Used in aarch64-simd.md. + UNSPEC_FAMAX ; Used in aarch64-simd.md. + UNSPEC_FAMIN ; Used in aarch64-simd.md. ;; All used in aarch64-sve2.md UNSPEC_FCVTN @@ -4463,3 +4465,10 @@ (UNSPECV_SET_FPCR "fpcr")]) (define_int_attr bits_etype [(8 "b") (16 "h") (32 "s") (64 "d")]) + +;; Iterators and attributes for faminmax + +(define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN]) + +(define_int_attr faminmax_uns_op + [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")]) diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md index 9527bdb9e87..d8de9dbc9d1 100644 --- a/gcc/config/arm/types.md +++ b/gcc/config/arm/types.md @@ -492,6 +492,8 @@ ; neon_fp_reduc_minmax_s_q ; neon_fp_reduc_minmax_d ; neon_fp_reduc_minmax_d_q +; neon_fp_aminmax +; neon_fp_aminmax_q ; neon_fp_cvt_narrow_s_q ; neon_fp_cvt_narrow_d_q ; neon_fp_cvt_widen_h @@ -1044,6 +1046,8 @@ neon_fp_reduc_minmax_d,\ neon_fp_reduc_minmax_d_q,\ \ + neon_fp_aminmax,\ + neon_fp_aminmax_q,\ neon_fp_cvt_narrow_s_q,\ neon_fp_cvt_narrow_d_q,\ neon_fp_cvt_widen_h,\ @@ -1264,6 +1268,8 @@ neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\ neon_fp_reduc_minmax_d_q,\ + neon_fp_aminmax, neon_fp_aminmax_q,\ + neon_fp_aminmax, neon_fp_aminmax_q,\ neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\ neon_fp_cvt_widen_h, neon_fp_cvt_widen_s, neon_fp_to_int_s,\ neon_fp_to_int_s_q, neon_int_to_fp_s, neon_int_to_fp_s_q,\ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 32b772d2a8a..2c509f62d98 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21865,6 +21865,8 @@ Enable support for Armv8.9-a/9.4-a translation hardening extension. Enable the RCpc3 (Release Consistency) extension. @item fp8 Enable the fp8 (8-bit floating point) extension. +@item faminmax +Enable the Floating Point Absolute Maximum/Minimum extension. @end table diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c new file mode 100644 index 00000000000..63ed1508c23 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c @@ -0,0 +1,10 @@ +/* { dg-do assemble} */ +/* { dg-additional-options "-march=armv9-a" } */ + +#include "arm_neon.h" + +void +test (float32x4_t a, float32x4_t b) +{ + vamaxq_f32 (a, b); /* { dg-error {ACLE function 'vamaxq_f32' requires ISA extension 'faminmax'} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c new file mode 100644 index 00000000000..7e4f3eba81a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c @@ -0,0 +1,115 @@ +/* { dg-do assemble} */ +/* { dg-additional-options "-O3 -march=armv9-a+faminmax" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +** test_vamax_f16: +** famax v0.4h, v0.4h, v1.4h +** ret +*/ +float16x4_t +test_vamax_f16 (float16x4_t a, float16x4_t b) +{ + return vamax_f16 (a, b); +} + +/* +** test_vamaxq_f16: +** famax v0.8h, v0.8h, v1.8h +** ret +*/ +float16x8_t +test_vamaxq_f16 (float16x8_t a, float16x8_t b) +{ + return vamaxq_f16 (a, b); +} + +/* +** test_vamax_f32: +** famax v0.2s, v0.2s, v1.2s +** ret +*/ +float32x2_t +test_vamax_f32 (float32x2_t a, float32x2_t b) +{ + return vamax_f32 (a, b); +} + +/* +** test_vamaxq_f32: +** famax v0.4s, v0.4s, v1.4s +** ret +*/ +float32x4_t +test_vamaxq_f32 (float32x4_t a, float32x4_t b) +{ + return vamaxq_f32 (a, b); +} + +/* +** test_vamaxq_f64: +** famax v0.2d, v0.2d, v1.2d +** ret +*/ +float64x2_t +test_vamaxq_f64 (float64x2_t a, float64x2_t b) +{ + return vamaxq_f64 (a, b); +} + +/* +** test_vamin_f16: +** famin v0.4h, v0.4h, v1.4h +** ret +*/ +float16x4_t +test_vamin_f16 (float16x4_t a, float16x4_t b) +{ + return vamin_f16 (a, b); +} + +/* +** test_vaminq_f16: +** famin v0.8h, v0.8h, v1.8h +** ret +*/ +float16x8_t +test_vaminq_f16 (float16x8_t a, float16x8_t b) +{ + return vaminq_f16 (a, b); +} + +/* +** test_vamin_f32: +** famin v0.2s, v0.2s, v1.2s +** ret +*/ +float32x2_t +test_vamin_f32 (float32x2_t a, float32x2_t b) +{ + return vamin_f32 (a, b); +} + +/* +** test_vaminq_f32: +** famin v0.4s, v0.4s, v1.4s +** ret +*/ +float32x4_t +test_vaminq_f32 (float32x4_t a, float32x4_t b) +{ + return vaminq_f32 (a, b); +} + +/* +** test_vaminq_f64: +** famin v0.2d, v0.2d, v1.2d +** ret +*/ +float64x2_t +test_vaminq_f64 (float64x2_t a, float64x2_t b) +{ + return vaminq_f64 (a, b); +}