diff mbox series

[1/2] aarch64: Refactor infrastructure for advsimd intrinsics

Message ID 20241106075018.3989144-2-vladimir.miloserdov@arm.com
State New
Headers show
Series aarch64: Refactor infra for advsimd intrinsics, add AdvSIMD LUTI2 | expand

Commit Message

Vladimir Miloserdov Nov. 6, 2024, 7:50 a.m. UTC
This patch refactors the infrastructure for defining advsimd pragma
intrinsics, adding support for more flexible type and signature
handling in future SIMD extensions.

A new simd_type structure is introduced, which allows for consistent
mode and qualifier management across various advsimd operations.

gcc/ChangeLog:

	* config/aarch64/aarch64-builtins.cc (ENTRY): Modify to
	include modes and qualifiers for simd_type structure.
	(ENTRY_VHSDF): Move to aarch64-builtins.cc to decouple.
	(struct simd_type): New structure for managing mode and
	qualifier combinations for SIMD types.
	(struct aarch64_pragma_builtins_data): Replace mode with
	simd_type to support multiple argument types for intrinsics.
	(aarch64_fntype): Modify to handle different shapes type.
	(aarch64_expand_pragma_builtin): Modify to handle different
	shapes type.

	* config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_BINARY):
	Move from aarch64-builtins.cc.
	(ENTRY_VHSDF): Move from aarch64-builtins.cc.
	(REQUIRED_EXTENSIONS): New macro.
---
 gcc/config/aarch64/aarch64-builtins.cc        | 81 +++++++++++++++----
 .../aarch64/aarch64-simd-pragma-builtins.def  | 15 ++--
 2 files changed, 77 insertions(+), 19 deletions(-)

Comments

Richard Sandiford Nov. 6, 2024, 7:23 p.m. UTC | #1
<vladimir.miloserdov@arm.com> writes:
> This patch refactors the infrastructure for defining advsimd pragma
> intrinsics, adding support for more flexible type and signature
> handling in future SIMD extensions.
>
> A new simd_type structure is introduced, which allows for consistent
> mode and qualifier management across various advsimd operations.
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64-builtins.cc (ENTRY): Modify to
> 	include modes and qualifiers for simd_type structure.
> 	(ENTRY_VHSDF): Move to aarch64-builtins.cc to decouple.
> 	(struct simd_type): New structure for managing mode and
> 	qualifier combinations for SIMD types.
> 	(struct aarch64_pragma_builtins_data): Replace mode with
> 	simd_type to support multiple argument types for intrinsics.
> 	(aarch64_fntype): Modify to handle different shapes type.
> 	(aarch64_expand_pragma_builtin): Modify to handle different
> 	shapes type.
>
> 	* config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_BINARY):
> 	Move from aarch64-builtins.cc.
> 	(ENTRY_VHSDF): Move from aarch64-builtins.cc.
> 	(REQUIRED_EXTENSIONS): New macro.

Thanks for doing this.

> ---
>  gcc/config/aarch64/aarch64-builtins.cc        | 81 +++++++++++++++----
>  .../aarch64/aarch64-simd-pragma-builtins.def  | 15 ++--
>  2 files changed, 77 insertions(+), 19 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
> index 97bde7c15d3..ad82c680c6a 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -780,7 +780,7 @@ typedef struct
>    AARCH64_SIMD_BUILTIN_##T##_##N##A,
>  
>  #undef ENTRY
> -#define ENTRY(N, S, M, U) \
> +#define ENTRY(N, S, M0, M1, M2, M3, U) \
>    AARCH64_##N,
>  
>  enum aarch64_builtins
> @@ -1593,10 +1593,49 @@ enum class aarch64_builtin_signatures
>    binary,
>  };
>  
> +namespace {
> +
> +struct simd_type {
> +  machine_mode mode;
> +  aarch64_type_qualifiers qualifiers;
> +};
> +
> +namespace simd_types {
> +
> +  constexpr simd_type s8 { V8QImode, qualifier_none };
> +  constexpr simd_type u8 { V8QImode, qualifier_unsigned };
> +  constexpr simd_type s8q { V16QImode, qualifier_none };
> +  constexpr simd_type u8q { V16QImode, qualifier_unsigned };
> +
> +  constexpr simd_type s16 { V4HImode, qualifier_none };
> +  constexpr simd_type u16 { V4HImode, qualifier_unsigned };
> +  constexpr simd_type s16q { V8HImode, qualifier_none };
> +  constexpr simd_type u16q { V8HImode, qualifier_unsigned };
> +
> +  constexpr simd_type p8 { V8QImode, qualifier_poly };
> +  constexpr simd_type p8q { V16QImode, qualifier_poly };
> +  constexpr simd_type p16 { V4HImode, qualifier_poly };
> +  constexpr simd_type p16q { V8HImode, qualifier_poly };
> +
> +  constexpr simd_type f16 { V4HFmode, qualifier_none };
> +  constexpr simd_type f16q { V8HFmode, qualifier_none };
> +  constexpr simd_type f32 { V2SFmode, qualifier_none };
> +  constexpr simd_type f32q { V4SFmode, qualifier_none };
> +  constexpr simd_type f64q { V2DFmode, qualifier_none };
> +
> +  constexpr simd_type bf16 { V4BFmode, qualifier_none };
> +  constexpr simd_type bf16q { V8BFmode, qualifier_none };
> +
> +  constexpr simd_type none { VOIDmode, qualifier_none };
> +}
> +
> +}
> +
>  #undef ENTRY
> -#define ENTRY(N, S, M, U) \
> -  {#N, aarch64_builtin_signatures::S, E_##M##mode, U, \
> -   aarch64_required_extensions::REQUIRED_EXTENSIONS},
> +#define ENTRY(N, S, T0, T1, T2, T3, U) \
> +  {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \
> +    simd_types::T2, simd_types::T3, U, \
> +    aarch64_required_extensions::REQUIRED_EXTENSIONS},
>  
>  /* Initialize pragma builtins.  */
>  
> @@ -1604,7 +1643,7 @@ struct aarch64_pragma_builtins_data
>  {
>    const char *name;
>    aarch64_builtin_signatures signature;
> -  machine_mode mode;
> +  simd_type types[4];
>    int unspec;
>    aarch64_required_extensions required_extensions;
>  };
> @@ -1616,11 +1655,19 @@ static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = {
>  static tree
>  aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
>  {
> -  auto type = aarch64_simd_builtin_type (builtin_data.mode, qualifier_none);
> +  tree type0, type1, type2;
> +
>    switch (builtin_data.signature)
>      {
>      case aarch64_builtin_signatures::binary:
> -      return build_function_type_list (type, type, type, NULL_TREE);
> +      type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode,
> +	builtin_data.types[0].qualifiers);

Rather than repeat this for each type, how about adding a helper to
simd_type.  Something like:

  tree type () const { return aarch64_simd_builtin_type (mode, qualifiers); }

Then we can just use builtin_data.types[0].type ()

Also, it looks like this array is going to be used often enough that
it would be worth having a shorthand:

  auto &types = builtin_data.types;

That should make the code compact enough that we don't need the
type0, type1, etc. assignments.

> +      type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode,
> +	builtin_data.types[1].qualifiers);
> +      type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode,
> +	builtin_data.types[2].qualifiers);
> +      return build_function_type_list (type0, type1, type2, NULL_TREE);
> +
>      default:
>        gcc_unreachable ();
>      }
> @@ -3337,17 +3384,24 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
>  			       const aarch64_pragma_builtins_data *builtin_data)
>  {
>    expand_operand ops[3];
> -  auto mode = builtin_data->mode;
>    auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0));
>    auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1));
> -  create_output_operand (&ops[0], target, mode);
> -  create_input_operand (&ops[1], op1, mode);
> -  create_input_operand (&ops[2], op2, mode);
> +  create_output_operand (&ops[0], target, builtin_data->types[0].mode);
> +  create_input_operand (&ops[1], op1, builtin_data->types[1].mode);
> +  create_input_operand (&ops[2], op2, builtin_data->types[2].mode);
>  
>    auto unspec = builtin_data->unspec;
> -  auto icode = code_for_aarch64 (unspec, mode);
> -  expand_insn (icode, 3, ops);
> +  insn_code icode;
>  
> +  switch (builtin_data->signature)
> +    {
> +    case aarch64_builtin_signatures::binary:
> +      icode = code_for_aarch64 (unspec, builtin_data->types[0].mode);
> +      expand_insn (icode, 3, ops);
> +      break;
> +    default:
> +      gcc_unreachable();
> +    }
>    return target;
>  }
>  
> @@ -4186,7 +4240,6 @@ aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
>  #undef CF3
>  #undef CF4
>  #undef CF10
> -#undef ENTRY_VHSDF

Ooops, my bad!

>  #undef VAR1
>  #undef VAR2
>  #undef VAR3
> diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
> index d66642eaa0a..c669919fa04 100644
> --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
> +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
> @@ -18,13 +18,18 @@
>     along with GCC; see the file COPYING3.  If not see
>     <http://www.gnu.org/licenses/>.  */
>  
> +

Nit: unnecessary extra line.

Otherwise it looks good to me, thanks.

Richard

> +#undef ENTRY_BINARY
> +#define ENTRY_BINARY(N, S, T0, T1, T2, U) \
> +  ENTRY (N, S, T0, T1, T2, none, U)
> +
>  #undef ENTRY_VHSDF
>  #define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC) \
> -  ENTRY (NAME##_f16, SIGNATURE, V4HF, UNSPEC) \
> -  ENTRY (NAME##q_f16, SIGNATURE, V8HF, UNSPEC) \
> -  ENTRY (NAME##_f32, SIGNATURE, V2SF, UNSPEC) \
> -  ENTRY (NAME##q_f32, SIGNATURE, V4SF, UNSPEC) \
> -  ENTRY (NAME##q_f64, SIGNATURE, V2DF, UNSPEC)
> +  ENTRY_BINARY (NAME##_f16, SIGNATURE, f16, f16, f16, UNSPEC) \
> +  ENTRY_BINARY (NAME##q_f16, SIGNATURE, f16q, f16q, f16q, UNSPEC) \
> +  ENTRY_BINARY (NAME##_f32, SIGNATURE, f32, f32, f32, UNSPEC) \
> +  ENTRY_BINARY (NAME##q_f32, SIGNATURE, f32q, f32q, f32q, UNSPEC) \
> +  ENTRY_BINARY (NAME##q_f64, SIGNATURE, f64q, f64q, f64q, UNSPEC)
>  
>  // faminmax
>  #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 97bde7c15d3..ad82c680c6a 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -780,7 +780,7 @@  typedef struct
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
 
 #undef ENTRY
-#define ENTRY(N, S, M, U) \
+#define ENTRY(N, S, M0, M1, M2, M3, U) \
   AARCH64_##N,
 
 enum aarch64_builtins
@@ -1593,10 +1593,49 @@  enum class aarch64_builtin_signatures
   binary,
 };
 
+namespace {
+
+struct simd_type {
+  machine_mode mode;
+  aarch64_type_qualifiers qualifiers;
+};
+
+namespace simd_types {
+
+  constexpr simd_type s8 { V8QImode, qualifier_none };
+  constexpr simd_type u8 { V8QImode, qualifier_unsigned };
+  constexpr simd_type s8q { V16QImode, qualifier_none };
+  constexpr simd_type u8q { V16QImode, qualifier_unsigned };
+
+  constexpr simd_type s16 { V4HImode, qualifier_none };
+  constexpr simd_type u16 { V4HImode, qualifier_unsigned };
+  constexpr simd_type s16q { V8HImode, qualifier_none };
+  constexpr simd_type u16q { V8HImode, qualifier_unsigned };
+
+  constexpr simd_type p8 { V8QImode, qualifier_poly };
+  constexpr simd_type p8q { V16QImode, qualifier_poly };
+  constexpr simd_type p16 { V4HImode, qualifier_poly };
+  constexpr simd_type p16q { V8HImode, qualifier_poly };
+
+  constexpr simd_type f16 { V4HFmode, qualifier_none };
+  constexpr simd_type f16q { V8HFmode, qualifier_none };
+  constexpr simd_type f32 { V2SFmode, qualifier_none };
+  constexpr simd_type f32q { V4SFmode, qualifier_none };
+  constexpr simd_type f64q { V2DFmode, qualifier_none };
+
+  constexpr simd_type bf16 { V4BFmode, qualifier_none };
+  constexpr simd_type bf16q { V8BFmode, qualifier_none };
+
+  constexpr simd_type none { VOIDmode, qualifier_none };
+}
+
+}
+
 #undef ENTRY
-#define ENTRY(N, S, M, U) \
-  {#N, aarch64_builtin_signatures::S, E_##M##mode, U, \
-   aarch64_required_extensions::REQUIRED_EXTENSIONS},
+#define ENTRY(N, S, T0, T1, T2, T3, U) \
+  {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \
+    simd_types::T2, simd_types::T3, U, \
+    aarch64_required_extensions::REQUIRED_EXTENSIONS},
 
 /* Initialize pragma builtins.  */
 
@@ -1604,7 +1643,7 @@  struct aarch64_pragma_builtins_data
 {
   const char *name;
   aarch64_builtin_signatures signature;
-  machine_mode mode;
+  simd_type types[4];
   int unspec;
   aarch64_required_extensions required_extensions;
 };
@@ -1616,11 +1655,19 @@  static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = {
 static tree
 aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
 {
-  auto type = aarch64_simd_builtin_type (builtin_data.mode, qualifier_none);
+  tree type0, type1, type2;
+
   switch (builtin_data.signature)
     {
     case aarch64_builtin_signatures::binary:
-      return build_function_type_list (type, type, type, NULL_TREE);
+      type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode,
+	builtin_data.types[0].qualifiers);
+      type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode,
+	builtin_data.types[1].qualifiers);
+      type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode,
+	builtin_data.types[2].qualifiers);
+      return build_function_type_list (type0, type1, type2, NULL_TREE);
+
     default:
       gcc_unreachable ();
     }
@@ -3337,17 +3384,24 @@  aarch64_expand_pragma_builtin (tree exp, rtx target,
 			       const aarch64_pragma_builtins_data *builtin_data)
 {
   expand_operand ops[3];
-  auto mode = builtin_data->mode;
   auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0));
   auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1));
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], op1, mode);
-  create_input_operand (&ops[2], op2, mode);
+  create_output_operand (&ops[0], target, builtin_data->types[0].mode);
+  create_input_operand (&ops[1], op1, builtin_data->types[1].mode);
+  create_input_operand (&ops[2], op2, builtin_data->types[2].mode);
 
   auto unspec = builtin_data->unspec;
-  auto icode = code_for_aarch64 (unspec, mode);
-  expand_insn (icode, 3, ops);
+  insn_code icode;
 
+  switch (builtin_data->signature)
+    {
+    case aarch64_builtin_signatures::binary:
+      icode = code_for_aarch64 (unspec, builtin_data->types[0].mode);
+      expand_insn (icode, 3, ops);
+      break;
+    default:
+      gcc_unreachable();
+    }
   return target;
 }
 
@@ -4186,7 +4240,6 @@  aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
 #undef CF3
 #undef CF4
 #undef CF10
-#undef ENTRY_VHSDF
 #undef VAR1
 #undef VAR2
 #undef VAR3
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
index d66642eaa0a..c669919fa04 100644
--- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -18,13 +18,18 @@ 
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
+
+#undef ENTRY_BINARY
+#define ENTRY_BINARY(N, S, T0, T1, T2, U) \
+  ENTRY (N, S, T0, T1, T2, none, U)
+
 #undef ENTRY_VHSDF
 #define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC) \
-  ENTRY (NAME##_f16, SIGNATURE, V4HF, UNSPEC) \
-  ENTRY (NAME##q_f16, SIGNATURE, V8HF, UNSPEC) \
-  ENTRY (NAME##_f32, SIGNATURE, V2SF, UNSPEC) \
-  ENTRY (NAME##q_f32, SIGNATURE, V4SF, UNSPEC) \
-  ENTRY (NAME##q_f64, SIGNATURE, V2DF, UNSPEC)
+  ENTRY_BINARY (NAME##_f16, SIGNATURE, f16, f16, f16, UNSPEC) \
+  ENTRY_BINARY (NAME##q_f16, SIGNATURE, f16q, f16q, f16q, UNSPEC) \
+  ENTRY_BINARY (NAME##_f32, SIGNATURE, f32, f32, f32, UNSPEC) \
+  ENTRY_BINARY (NAME##q_f32, SIGNATURE, f32q, f32q, f32q, UNSPEC) \
+  ENTRY_BINARY (NAME##q_f64, SIGNATURE, f64q, f64q, f64q, UNSPEC)
 
 // faminmax
 #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)