diff mbox series

aarch64: Add support for mfloat8x{8|16}_t types

Message ID c7098d49-20f5-e25a-f62b-4b609459c7e5@e124511.cambridge.arm.com
State New
Headers show
Series aarch64: Add support for mfloat8x{8|16}_t types | expand

Commit Message

Andrew Carlotti Oct. 1, 2024, 1:58 p.m. UTC
I've tested this with a hacked in FP8 intrinsic.  Is this patch ok for master,
or should it wait until we've implemented the intrinsics?

gcc/ChangeLog:

	* config/aarch64/aarch64-builtins.cc (MODE_d_mf8): New.
	(MODE_q_mf8): New.
	(QUAL_mf8): New.
	(aarch64_lookup_simd_type_in_table): Match modal_float bit.
	(aarch64_init_simd_builtin_types): Initialise FP8 simd types.
	* config/aarch64/aarch64-builtins.h
	(enum aarch64_type_qualifiers): Add qualifier_modal_float bit.
	* config/aarch64/aarch64-simd-builtin-types.def:
	Add Mfloat8x{8|16}_t types.
	* config/aarch64/arm_neon.h: Add mfloat8x{8|16}_t typedefs.

Comments

Richard Sandiford Oct. 1, 2024, 2:25 p.m. UTC | #1
Andrew Carlotti <andrew.carlotti@arm.com> writes:
> I've tested this with a hacked in FP8 intrinsic.

Looks good.

> Is this patch ok for master,
> or should it wait until we've implemented the intrinsics?

IMO it would be OK to apply...

> @@ -1190,6 +1194,10 @@ aarch64_init_simd_builtin_types (void)
>    aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node;
>    aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node;
>  
> +  /* Init FP8 element types.  */
> +  aarch64_simd_types[Mfloat8x8_t].eltype = aarch64_mfp8_type_node;
> +  aarch64_simd_types[Mfloat8x16_t].eltype = aarch64_mfp8_type_node;
> +
>    for (i = 0; i < nelts; i++)
>      {
>        tree eltype = aarch64_simd_types[i].eltype;
> diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
> index 6111cd0d4fe1136feabb36a4077cf86d13b835e2..83b2da2e7dc0962c1e5957e25c8f6232c2148fe5 100644
> --- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
> @@ -52,3 +52,5 @@
>    ENTRY (Float64x2_t, V2DF, none, 13)
>    ENTRY (Bfloat16x4_t, V4BF, none, 14)
>    ENTRY (Bfloat16x8_t, V8BF, none, 14)
> +  ENTRY (Mfloat8x8_t, V8QI, modal_float, 13)
> +  ENTRY (Mfloat8x16_t, V16QI, modal_float, 14)
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 0092314cf7586bf77c8272cff8156608f56eaedc..63f860886c3152e3de7f29833119f9f65b6774a2 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -73,6 +73,8 @@ typedef __Poly64_t poly64_t;
>  typedef __Poly128_t poly128_t;
>  
>  typedef __mfp8 mfloat8_t;
> +typedef __Mfloat8x8_t mfloat8x8_t;
> +typedef __Mfloat8x16_t mfloat8x16_t;
>  
>  typedef __fp16 float16_t;
>  typedef float float32_t;

...this part.  But then we should be able to test that independently,
e.g. by adding it to gcc.target/aarch64/movv2x16qi.c & co.

I think...

> diff --git a/gcc/config/aarch64/aarch64-builtins.h b/gcc/config/aarch64/aarch64-builtins.h
> index e326fe666769cedd6c06d0752ed30b9359745ac9..00db7a74885db4d97ed365e8e3e2d7cf7d8410a4 100644
> --- a/gcc/config/aarch64/aarch64-builtins.h
> +++ b/gcc/config/aarch64/aarch64-builtins.h
> @@ -54,6 +54,8 @@ enum aarch64_type_qualifiers
>    /* Lane indices selected in quadtuplets. - must be in range, and flipped for
>       bigendian.  */
>    qualifier_lane_quadtup_index = 0x1000,
> +  /* Modal FP types.  */
> +  qualifier_modal_float = 0x2000,
>  };
>  
>  #define ENTRY(E, M, Q, G) E,
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
> index 7d17df05a0f7e8c42596af7f1de4652b59563fe0..f6921ac1d5619e85d4239cd93d1bc84c1d55d4df 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -133,6 +133,7 @@
>  #define MODE_d_f16 E_V4HFmode
>  #define MODE_d_f32 E_V2SFmode
>  #define MODE_d_f64 E_V1DFmode
> +#define MODE_d_mf8 E_V8QImode
>  #define MODE_d_s8 E_V8QImode
>  #define MODE_d_s16 E_V4HImode
>  #define MODE_d_s32 E_V2SImode
> @@ -148,6 +149,7 @@
>  #define MODE_q_f16 E_V8HFmode
>  #define MODE_q_f32 E_V4SFmode
>  #define MODE_q_f64 E_V2DFmode
> +#define MODE_q_mf8 E_V16QImode
>  #define MODE_q_s8 E_V16QImode
>  #define MODE_q_s16 E_V8HImode
>  #define MODE_q_s32 E_V4SImode
> @@ -177,6 +179,7 @@
>  #define QUAL_p16 qualifier_poly
>  #define QUAL_p64 qualifier_poly
>  #define QUAL_p128 qualifier_poly
> +#define QUAL_mf8 qualifier_modal_float
>  
>  #define LENGTH_d ""
>  #define LENGTH_q "q"
> @@ -1087,7 +1090,8 @@ aarch64_lookup_simd_type_in_table (machine_mode mode,
>  {
>    int i;
>    int nelts = ARRAY_SIZE (aarch64_simd_types);
> -  int q = qualifiers & (qualifier_poly | qualifier_unsigned);
> +  int q = qualifiers
> +    & (qualifier_poly | qualifier_unsigned | qualifier_modal_float);
>  
>    for (i = 0; i < nelts; i++)
>      {

...this is probably better left to the patch that needs it.

Let's see what others think though.

Thanks,
Richard
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-builtins.h b/gcc/config/aarch64/aarch64-builtins.h
index e326fe666769cedd6c06d0752ed30b9359745ac9..00db7a74885db4d97ed365e8e3e2d7cf7d8410a4 100644
--- a/gcc/config/aarch64/aarch64-builtins.h
+++ b/gcc/config/aarch64/aarch64-builtins.h
@@ -54,6 +54,8 @@  enum aarch64_type_qualifiers
   /* Lane indices selected in quadtuplets. - must be in range, and flipped for
      bigendian.  */
   qualifier_lane_quadtup_index = 0x1000,
+  /* Modal FP types.  */
+  qualifier_modal_float = 0x2000,
 };
 
 #define ENTRY(E, M, Q, G) E,
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 7d17df05a0f7e8c42596af7f1de4652b59563fe0..f6921ac1d5619e85d4239cd93d1bc84c1d55d4df 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -133,6 +133,7 @@ 
 #define MODE_d_f16 E_V4HFmode
 #define MODE_d_f32 E_V2SFmode
 #define MODE_d_f64 E_V1DFmode
+#define MODE_d_mf8 E_V8QImode
 #define MODE_d_s8 E_V8QImode
 #define MODE_d_s16 E_V4HImode
 #define MODE_d_s32 E_V2SImode
@@ -148,6 +149,7 @@ 
 #define MODE_q_f16 E_V8HFmode
 #define MODE_q_f32 E_V4SFmode
 #define MODE_q_f64 E_V2DFmode
+#define MODE_q_mf8 E_V16QImode
 #define MODE_q_s8 E_V16QImode
 #define MODE_q_s16 E_V8HImode
 #define MODE_q_s32 E_V4SImode
@@ -177,6 +179,7 @@ 
 #define QUAL_p16 qualifier_poly
 #define QUAL_p64 qualifier_poly
 #define QUAL_p128 qualifier_poly
+#define QUAL_mf8 qualifier_modal_float
 
 #define LENGTH_d ""
 #define LENGTH_q "q"
@@ -1087,7 +1090,8 @@  aarch64_lookup_simd_type_in_table (machine_mode mode,
 {
   int i;
   int nelts = ARRAY_SIZE (aarch64_simd_types);
-  int q = qualifiers & (qualifier_poly | qualifier_unsigned);
+  int q = qualifiers
+    & (qualifier_poly | qualifier_unsigned | qualifier_modal_float);
 
   for (i = 0; i < nelts; i++)
     {
@@ -1190,6 +1194,10 @@  aarch64_init_simd_builtin_types (void)
   aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node;
   aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node;
 
+  /* Init FP8 element types.  */
+  aarch64_simd_types[Mfloat8x8_t].eltype = aarch64_mfp8_type_node;
+  aarch64_simd_types[Mfloat8x16_t].eltype = aarch64_mfp8_type_node;
+
   for (i = 0; i < nelts; i++)
     {
       tree eltype = aarch64_simd_types[i].eltype;
diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
index 6111cd0d4fe1136feabb36a4077cf86d13b835e2..83b2da2e7dc0962c1e5957e25c8f6232c2148fe5 100644
--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
+++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
@@ -52,3 +52,5 @@ 
   ENTRY (Float64x2_t, V2DF, none, 13)
   ENTRY (Bfloat16x4_t, V4BF, none, 14)
   ENTRY (Bfloat16x8_t, V8BF, none, 14)
+  ENTRY (Mfloat8x8_t, V8QI, modal_float, 13)
+  ENTRY (Mfloat8x16_t, V16QI, modal_float, 14)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 0092314cf7586bf77c8272cff8156608f56eaedc..63f860886c3152e3de7f29833119f9f65b6774a2 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -73,6 +73,8 @@  typedef __Poly64_t poly64_t;
 typedef __Poly128_t poly128_t;
 
 typedef __mfp8 mfloat8_t;
+typedef __Mfloat8x8_t mfloat8x8_t;
+typedef __Mfloat8x16_t mfloat8x16_t;
 
 typedef __fp16 float16_t;
 typedef float float32_t;