Message ID | 20241106075018.3989144-2-vladimir.miloserdov@arm.com |
---|---|
State | New |
Headers | show |
Series | aarch64: Refactor infra for advsimd intrinsics, add AdvSIMD LUTI2 | expand |
<vladimir.miloserdov@arm.com> writes: > This patch refactors the infrastructure for defining advsimd pragma > intrinsics, adding support for more flexible type and signature > handling in future SIMD extensions. > > A new simd_type structure is introduced, which allows for consistent > mode and qualifier management across various advsimd operations. > > gcc/ChangeLog: > > * config/aarch64/aarch64-builtins.cc (ENTRY): Modify to > include modes and qualifiers for simd_type structure. > (ENTRY_VHSDF): Move to aarch64-builtins.cc to decouple. > (struct simd_type): New structure for managing mode and > qualifier combinations for SIMD types. > (struct aarch64_pragma_builtins_data): Replace mode with > simd_type to support multiple argument types for intrinsics. > (aarch64_fntype): Modify to handle different shapes type. > (aarch64_expand_pragma_builtin): Modify to handle different > shapes type. > > * config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_BINARY): > Move from aarch64-builtins.cc. > (ENTRY_VHSDF): Move from aarch64-builtins.cc. > (REQUIRED_EXTENSIONS): New macro. Thanks for doing this. > --- > gcc/config/aarch64/aarch64-builtins.cc | 81 +++++++++++++++---- > .../aarch64/aarch64-simd-pragma-builtins.def | 15 ++-- > 2 files changed, 77 insertions(+), 19 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc > index 97bde7c15d3..ad82c680c6a 100644 > --- a/gcc/config/aarch64/aarch64-builtins.cc > +++ b/gcc/config/aarch64/aarch64-builtins.cc > @@ -780,7 +780,7 @@ typedef struct > AARCH64_SIMD_BUILTIN_##T##_##N##A, > > #undef ENTRY > -#define ENTRY(N, S, M, U) \ > +#define ENTRY(N, S, M0, M1, M2, M3, U) \ > AARCH64_##N, > > enum aarch64_builtins > @@ -1593,10 +1593,49 @@ enum class aarch64_builtin_signatures > binary, > }; > > +namespace { > + > +struct simd_type { > + machine_mode mode; > + aarch64_type_qualifiers qualifiers; > +}; > + > +namespace simd_types { > + > + constexpr simd_type s8 { V8QImode, qualifier_none }; > + constexpr simd_type u8 { V8QImode, qualifier_unsigned }; > + constexpr simd_type s8q { V16QImode, qualifier_none }; > + constexpr simd_type u8q { V16QImode, qualifier_unsigned }; > + > + constexpr simd_type s16 { V4HImode, qualifier_none }; > + constexpr simd_type u16 { V4HImode, qualifier_unsigned }; > + constexpr simd_type s16q { V8HImode, qualifier_none }; > + constexpr simd_type u16q { V8HImode, qualifier_unsigned }; > + > + constexpr simd_type p8 { V8QImode, qualifier_poly }; > + constexpr simd_type p8q { V16QImode, qualifier_poly }; > + constexpr simd_type p16 { V4HImode, qualifier_poly }; > + constexpr simd_type p16q { V8HImode, qualifier_poly }; > + > + constexpr simd_type f16 { V4HFmode, qualifier_none }; > + constexpr simd_type f16q { V8HFmode, qualifier_none }; > + constexpr simd_type f32 { V2SFmode, qualifier_none }; > + constexpr simd_type f32q { V4SFmode, qualifier_none }; > + constexpr simd_type f64q { V2DFmode, qualifier_none }; > + > + constexpr simd_type bf16 { V4BFmode, qualifier_none }; > + constexpr simd_type bf16q { V8BFmode, qualifier_none }; > + > + constexpr simd_type none { VOIDmode, qualifier_none }; > +} > + > +} > + > #undef ENTRY > -#define ENTRY(N, S, M, U) \ > - {#N, aarch64_builtin_signatures::S, E_##M##mode, U, \ > - aarch64_required_extensions::REQUIRED_EXTENSIONS}, > +#define ENTRY(N, S, T0, T1, T2, T3, U) \ > + {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \ > + simd_types::T2, simd_types::T3, U, \ > + aarch64_required_extensions::REQUIRED_EXTENSIONS}, > > /* Initialize pragma builtins. */ > > @@ -1604,7 +1643,7 @@ struct aarch64_pragma_builtins_data > { > const char *name; > aarch64_builtin_signatures signature; > - machine_mode mode; > + simd_type types[4]; > int unspec; > aarch64_required_extensions required_extensions; > }; > @@ -1616,11 +1655,19 @@ static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = { > static tree > aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data) > { > - auto type = aarch64_simd_builtin_type (builtin_data.mode, qualifier_none); > + tree type0, type1, type2; > + > switch (builtin_data.signature) > { > case aarch64_builtin_signatures::binary: > - return build_function_type_list (type, type, type, NULL_TREE); > + type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode, > + builtin_data.types[0].qualifiers); Rather than repeat this for each type, how about adding a helper to simd_type. Something like: tree type () const { return aarch64_simd_builtin_type (mode, qualifiers); } Then we can just use builtin_data.types[0].type () Also, it looks like this array is going to be used often enough that it would be worth having a shorthand: auto &types = builtin_data.types; That should make the code compact enough that we don't need the type0, type1, etc. assignments. > + type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode, > + builtin_data.types[1].qualifiers); > + type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode, > + builtin_data.types[2].qualifiers); > + return build_function_type_list (type0, type1, type2, NULL_TREE); > + > default: > gcc_unreachable (); > } > @@ -3337,17 +3384,24 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, > const aarch64_pragma_builtins_data *builtin_data) > { > expand_operand ops[3]; > - auto mode = builtin_data->mode; > auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0)); > auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1)); > - create_output_operand (&ops[0], target, mode); > - create_input_operand (&ops[1], op1, mode); > - create_input_operand (&ops[2], op2, mode); > + create_output_operand (&ops[0], target, builtin_data->types[0].mode); > + create_input_operand (&ops[1], op1, builtin_data->types[1].mode); > + create_input_operand (&ops[2], op2, builtin_data->types[2].mode); > > auto unspec = builtin_data->unspec; > - auto icode = code_for_aarch64 (unspec, mode); > - expand_insn (icode, 3, ops); > + insn_code icode; > > + switch (builtin_data->signature) > + { > + case aarch64_builtin_signatures::binary: > + icode = code_for_aarch64 (unspec, builtin_data->types[0].mode); > + expand_insn (icode, 3, ops); > + break; > + default: > + gcc_unreachable(); > + } > return target; > } > > @@ -4186,7 +4240,6 @@ aarch64_resolve_overloaded_builtin_general (location_t loc, tree function, > #undef CF3 > #undef CF4 > #undef CF10 > -#undef ENTRY_VHSDF Ooops, my bad! > #undef VAR1 > #undef VAR2 > #undef VAR3 > diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def > index d66642eaa0a..c669919fa04 100644 > --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def > +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def > @@ -18,13 +18,18 @@ > along with GCC; see the file COPYING3. If not see > <http://www.gnu.org/licenses/>. */ > > + Nit: unnecessary extra line. Otherwise it looks good to me, thanks. Richard > +#undef ENTRY_BINARY > +#define ENTRY_BINARY(N, S, T0, T1, T2, U) \ > + ENTRY (N, S, T0, T1, T2, none, U) > + > #undef ENTRY_VHSDF > #define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC) \ > - ENTRY (NAME##_f16, SIGNATURE, V4HF, UNSPEC) \ > - ENTRY (NAME##q_f16, SIGNATURE, V8HF, UNSPEC) \ > - ENTRY (NAME##_f32, SIGNATURE, V2SF, UNSPEC) \ > - ENTRY (NAME##q_f32, SIGNATURE, V4SF, UNSPEC) \ > - ENTRY (NAME##q_f64, SIGNATURE, V2DF, UNSPEC) > + ENTRY_BINARY (NAME##_f16, SIGNATURE, f16, f16, f16, UNSPEC) \ > + ENTRY_BINARY (NAME##q_f16, SIGNATURE, f16q, f16q, f16q, UNSPEC) \ > + ENTRY_BINARY (NAME##_f32, SIGNATURE, f32, f32, f32, UNSPEC) \ > + ENTRY_BINARY (NAME##q_f32, SIGNATURE, f32q, f32q, f32q, UNSPEC) \ > + ENTRY_BINARY (NAME##q_f64, SIGNATURE, f64q, f64q, f64q, UNSPEC) > > // faminmax > #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 97bde7c15d3..ad82c680c6a 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -780,7 +780,7 @@ typedef struct AARCH64_SIMD_BUILTIN_##T##_##N##A, #undef ENTRY -#define ENTRY(N, S, M, U) \ +#define ENTRY(N, S, M0, M1, M2, M3, U) \ AARCH64_##N, enum aarch64_builtins @@ -1593,10 +1593,49 @@ enum class aarch64_builtin_signatures binary, }; +namespace { + +struct simd_type { + machine_mode mode; + aarch64_type_qualifiers qualifiers; +}; + +namespace simd_types { + + constexpr simd_type s8 { V8QImode, qualifier_none }; + constexpr simd_type u8 { V8QImode, qualifier_unsigned }; + constexpr simd_type s8q { V16QImode, qualifier_none }; + constexpr simd_type u8q { V16QImode, qualifier_unsigned }; + + constexpr simd_type s16 { V4HImode, qualifier_none }; + constexpr simd_type u16 { V4HImode, qualifier_unsigned }; + constexpr simd_type s16q { V8HImode, qualifier_none }; + constexpr simd_type u16q { V8HImode, qualifier_unsigned }; + + constexpr simd_type p8 { V8QImode, qualifier_poly }; + constexpr simd_type p8q { V16QImode, qualifier_poly }; + constexpr simd_type p16 { V4HImode, qualifier_poly }; + constexpr simd_type p16q { V8HImode, qualifier_poly }; + + constexpr simd_type f16 { V4HFmode, qualifier_none }; + constexpr simd_type f16q { V8HFmode, qualifier_none }; + constexpr simd_type f32 { V2SFmode, qualifier_none }; + constexpr simd_type f32q { V4SFmode, qualifier_none }; + constexpr simd_type f64q { V2DFmode, qualifier_none }; + + constexpr simd_type bf16 { V4BFmode, qualifier_none }; + constexpr simd_type bf16q { V8BFmode, qualifier_none }; + + constexpr simd_type none { VOIDmode, qualifier_none }; +} + +} + #undef ENTRY -#define ENTRY(N, S, M, U) \ - {#N, aarch64_builtin_signatures::S, E_##M##mode, U, \ - aarch64_required_extensions::REQUIRED_EXTENSIONS}, +#define ENTRY(N, S, T0, T1, T2, T3, U) \ + {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \ + simd_types::T2, simd_types::T3, U, \ + aarch64_required_extensions::REQUIRED_EXTENSIONS}, /* Initialize pragma builtins. */ @@ -1604,7 +1643,7 @@ struct aarch64_pragma_builtins_data { const char *name; aarch64_builtin_signatures signature; - machine_mode mode; + simd_type types[4]; int unspec; aarch64_required_extensions required_extensions; }; @@ -1616,11 +1655,19 @@ static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = { static tree aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data) { - auto type = aarch64_simd_builtin_type (builtin_data.mode, qualifier_none); + tree type0, type1, type2; + switch (builtin_data.signature) { case aarch64_builtin_signatures::binary: - return build_function_type_list (type, type, type, NULL_TREE); + type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode, + builtin_data.types[0].qualifiers); + type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode, + builtin_data.types[1].qualifiers); + type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode, + builtin_data.types[2].qualifiers); + return build_function_type_list (type0, type1, type2, NULL_TREE); + default: gcc_unreachable (); } @@ -3337,17 +3384,24 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, const aarch64_pragma_builtins_data *builtin_data) { expand_operand ops[3]; - auto mode = builtin_data->mode; auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0)); auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1)); - create_output_operand (&ops[0], target, mode); - create_input_operand (&ops[1], op1, mode); - create_input_operand (&ops[2], op2, mode); + create_output_operand (&ops[0], target, builtin_data->types[0].mode); + create_input_operand (&ops[1], op1, builtin_data->types[1].mode); + create_input_operand (&ops[2], op2, builtin_data->types[2].mode); auto unspec = builtin_data->unspec; - auto icode = code_for_aarch64 (unspec, mode); - expand_insn (icode, 3, ops); + insn_code icode; + switch (builtin_data->signature) + { + case aarch64_builtin_signatures::binary: + icode = code_for_aarch64 (unspec, builtin_data->types[0].mode); + expand_insn (icode, 3, ops); + break; + default: + gcc_unreachable(); + } return target; } @@ -4186,7 +4240,6 @@ aarch64_resolve_overloaded_builtin_general (location_t loc, tree function, #undef CF3 #undef CF4 #undef CF10 -#undef ENTRY_VHSDF #undef VAR1 #undef VAR2 #undef VAR3 diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def index d66642eaa0a..c669919fa04 100644 --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -18,13 +18,18 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ + +#undef ENTRY_BINARY +#define ENTRY_BINARY(N, S, T0, T1, T2, U) \ + ENTRY (N, S, T0, T1, T2, none, U) + #undef ENTRY_VHSDF #define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC) \ - ENTRY (NAME##_f16, SIGNATURE, V4HF, UNSPEC) \ - ENTRY (NAME##q_f16, SIGNATURE, V8HF, UNSPEC) \ - ENTRY (NAME##_f32, SIGNATURE, V2SF, UNSPEC) \ - ENTRY (NAME##q_f32, SIGNATURE, V4SF, UNSPEC) \ - ENTRY (NAME##q_f64, SIGNATURE, V2DF, UNSPEC) + ENTRY_BINARY (NAME##_f16, SIGNATURE, f16, f16, f16, UNSPEC) \ + ENTRY_BINARY (NAME##q_f16, SIGNATURE, f16q, f16q, f16q, UNSPEC) \ + ENTRY_BINARY (NAME##_f32, SIGNATURE, f32, f32, f32, UNSPEC) \ + ENTRY_BINARY (NAME##q_f32, SIGNATURE, f32q, f32q, f32q, UNSPEC) \ + ENTRY_BINARY (NAME##q_f64, SIGNATURE, f64q, f64q, f64q, UNSPEC) // faminmax #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)