diff mbox series

[v2,3/3] aarch64: Add fpm register helper functions.

Message ID 20240725142548.699792-4-claudio.bantaloukas@arm.com
State New
Headers show
Series aarch64: Add initial support for +fp8 arch extensions | expand

Commit Message

Claudio Bantaloukas July 25, 2024, 2:25 p.m. UTC
The ACLE declares several helper types and functions to
facilitate construction of `fpm` arguments.

gcc/ChangeLog:

	* config/aarch64/arm_acle.h (fpm_t): New type representing fpmr values.
	(enum __ARM_FPM_FORMAT): New enum representing valid fp8 formats.
	(enum __ARM_FPM_OVERFLOW): New enum representing how some fp8
	calculations work.
	(arm_fpm_init): New.
	(arm_set_fpm_src1_format): Likewise.
	(arm_set_fpm_src2_format): Likewise.
	(arm_set_fpm_dst_format): Likewise.
	(arm_set_fpm_overflow_cvt): Likewise.
	(arm_set_fpm_overflow_mul): Likewise.
	(arm_set_fpm_lscale): Likewise.
	(arm_set_fpm_lscale2): Likewise.
	(arm_set_fpm_nscale): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/acle/fp8-helpers.c: New test of fpmr helper functions.
---
 gcc/config/aarch64/arm_acle.h                 | 33 ++++++++++++
 .../gcc.target/aarch64/acle/fp8-helpers.c     | 52 +++++++++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c

Comments

Kyrylo Tkachov July 26, 2024, 8:13 a.m. UTC | #1
Hi Claudio,

> On 25 Jul 2024, at 16:25, Claudio Bantaloukas <claudio.bantaloukas@arm.com> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> The ACLE declares several helper types and functions to
> facilitate construction of `fpm` arguments.
> 
> gcc/ChangeLog:
> 
>        * config/aarch64/arm_acle.h (fpm_t): New type representing fpmr values.
>        (enum __ARM_FPM_FORMAT): New enum representing valid fp8 formats.
>        (enum __ARM_FPM_OVERFLOW): New enum representing how some fp8
>        calculations work.
>        (arm_fpm_init): New.
>        (arm_set_fpm_src1_format): Likewise.
>        (arm_set_fpm_src2_format): Likewise.
>        (arm_set_fpm_dst_format): Likewise.
>        (arm_set_fpm_overflow_cvt): Likewise.
>        (arm_set_fpm_overflow_mul): Likewise.
>        (arm_set_fpm_lscale): Likewise.
>        (arm_set_fpm_lscale2): Likewise.
>        (arm_set_fpm_nscale): Likewise.
> 

Thanks for the updates, these ChangeLog entries should now be updated to reflect the names.
One question I just asked on the ACLE PR for FP8, are these helpers supposed to be defined in arm_acle.h? I think it’s reasonable, but I didn’t see explicit wording for it in ACLE so I’d like to clarify.
Thanks,
Kyrill

> gcc/testsuite/ChangeLog:
> 
>        * gcc.target/aarch64/acle/fp8-helpers.c: New test of fpmr helper functions.
> ---
> gcc/config/aarch64/arm_acle.h                 | 33 ++++++++++++
> .../gcc.target/aarch64/acle/fp8-helpers.c     | 52 +++++++++++++++++++
> 2 files changed, 85 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c
> 
> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> index 2aa681090fa..fd4fa855b90 100644
> --- a/gcc/config/aarch64/arm_acle.h
> +++ b/gcc/config/aarch64/arm_acle.h
> @@ -385,6 +385,39 @@ __rndrrs (uint64_t *__res)
> 
> #pragma GCC pop_options
> 
> +typedef uint64_t fpm_t;
> +
> +enum __ARM_FPM_FORMAT
> +{
> +  __ARM_FPM_E5M2,
> +  __ARM_FPM_E4M3,
> +};
> +
> +enum __ARM_FPM_OVERFLOW
> +{
> +  __ARM_FPM_INFNAN,
> +  __ARM_FPM_SATURATE,
> +};
> +
> +#define __arm_fpm_init() (0)
> +
> +#define __arm_set_fpm_src1_format(__fpm, __format) \
> +  ((__fpm & ~(uint64_t)0x7) | (__format & (uint64_t)0x7))
> +#define __arm_set_fpm_src2_format(__fpm, __format) \
> +  ((__fpm & ~((uint64_t)0x7 << 3)) | ((__format & (uint64_t)0x7) << 3))
> +#define __arm_set_fpm_dst_format(__fpm, __format) \
> +  ((__fpm & ~((uint64_t)0x7 << 6)) | ((__format & (uint64_t)0x7) << 6))
> +#define __arm_set_fpm_overflow_cvt(__fpm, __behaviour) \
> +  ((__fpm & ~((uint64_t)0x1 << 15)) | ((__behaviour & (uint64_t)0x1) << 15))
> +#define __arm_set_fpm_overflow_mul(__fpm, __behaviour) \
> +  ((__fpm & ~((uint64_t)0x1 << 14)) | ((__behaviour & (uint64_t)0x1) << 14))
> +#define __arm_set_fpm_lscale(__fpm, __scale) \
> +  ((__fpm & ~((uint64_t)0x7f << 16)) | ((__scale & (uint64_t)0x7f) << 16))
> +#define __arm_set_fpm_lscale2(__fpm, __scale) \
> +  ((__fpm & ~((uint64_t)0x3f << 32)) | ((__scale & (uint64_t)0x3f) << 32))
> +#define __arm_set_fpm_nscale(__fpm, __scale) \
> +  ((__fpm & ~((uint64_t)0xff << 24)) | ((__scale & (uint64_t)0xff) << 24))
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c b/gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c
> new file mode 100644
> index 00000000000..e235c3621d1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c
> @@ -0,0 +1,52 @@
> +/* Test the fp8 ACLE helper functions.  */
> +/* { dg-do compile } */
> +/* { dg-options "-std=c90 -pedantic-errors -O1 -march=armv9.4-a+fp8" } */
> +
> +#include <arm_acle.h>
> +
> +void
> +test_prepare_fpmr_sysreg ()
> +{
> +
> +#define _S_EQ(expr, expected)                                                  \
> +  _Static_assert (expr == expected, #expr " == " #expected)
> +
> +  _S_EQ (__arm_fpm_init (), 0);
> +
> +  /* Bits [2:0] */
> +  _S_EQ (__arm_set_fpm_src1_format (__arm_fpm_init (), __ARM_FPM_E5M2), 0);
> +  _S_EQ (__arm_set_fpm_src1_format (__arm_fpm_init (), __ARM_FPM_E4M3), 0x1);
> +
> +  /* Bits [5:3] */
> +  _S_EQ (__arm_set_fpm_src2_format (__arm_fpm_init (), __ARM_FPM_E5M2), 0);
> +  _S_EQ (__arm_set_fpm_src2_format (__arm_fpm_init (), __ARM_FPM_E4M3), 0x8);
> +
> +  /* Bits [8:6] */
> +  _S_EQ (__arm_set_fpm_dst_format (__arm_fpm_init (), __ARM_FPM_E5M2), 0);
> +  _S_EQ (__arm_set_fpm_dst_format (__arm_fpm_init (), __ARM_FPM_E4M3), 0x40);
> +
> +  /* Bit 14 */
> +  _S_EQ (__arm_set_fpm_overflow_mul (__arm_fpm_init (), __ARM_FPM_INFNAN), 0);
> +  _S_EQ (__arm_set_fpm_overflow_mul (__arm_fpm_init (), __ARM_FPM_SATURATE),
> + 0x4000);
> +
> +  /* Bit 15 */
> +  _S_EQ (__arm_set_fpm_overflow_cvt (__arm_fpm_init (), __ARM_FPM_INFNAN), 0);
> +  _S_EQ (__arm_set_fpm_overflow_cvt (__arm_fpm_init (), __ARM_FPM_SATURATE),
> + 0x8000);
> +
> +  /* Bits [22:16] */
> +  _S_EQ (__arm_set_fpm_lscale (__arm_fpm_init (), 0), 0);
> +  _S_EQ (__arm_set_fpm_lscale (__arm_fpm_init (), 127), 0x7F0000);
> +
> +  /* Bits [37:32] */
> +  _S_EQ (__arm_set_fpm_lscale2 (__arm_fpm_init (), 0), 0);
> +  _S_EQ (__arm_set_fpm_lscale2 (__arm_fpm_init (), 63), 0x3F00000000);
> +
> +  /* Bits [31:24] */
> +  _S_EQ (__arm_set_fpm_nscale (__arm_fpm_init (), 0), 0);
> +  _S_EQ (__arm_set_fpm_nscale (__arm_fpm_init (), 127), 0x7F000000);
> +  _S_EQ (__arm_set_fpm_nscale (__arm_fpm_init (), -128), 0x80000000);
> +
> +#undef _S_EQ
> +}
Claudio Bantaloukas July 26, 2024, 10:38 a.m. UTC | #2
On 26/07/2024 09:13, Kyrylo Tkachov wrote:
> Hi Claudio,
> 
>> On 25 Jul 2024, at 16:25, Claudio Bantaloukas <claudio.bantaloukas@arm.com> wrote:
>>
>> External email: Use caution opening links or attachments
>>
>>
>> The ACLE declares several helper types and functions to
>> facilitate construction of `fpm` arguments.
>>
>> gcc/ChangeLog:
>>
>>         * config/aarch64/arm_acle.h (fpm_t): New type representing fpmr values.
>>         (enum __ARM_FPM_FORMAT): New enum representing valid fp8 formats.
>>         (enum __ARM_FPM_OVERFLOW): New enum representing how some fp8
>>         calculations work.
>>         (arm_fpm_init): New.
>>         (arm_set_fpm_src1_format): Likewise.
>>         (arm_set_fpm_src2_format): Likewise.
>>         (arm_set_fpm_dst_format): Likewise.
>>         (arm_set_fpm_overflow_cvt): Likewise.
>>         (arm_set_fpm_overflow_mul): Likewise.
>>         (arm_set_fpm_lscale): Likewise.
>>         (arm_set_fpm_lscale2): Likewise.
>>         (arm_set_fpm_nscale): Likewise.
>>
> 
> Thanks for the updates, these ChangeLog entries should now be updated to reflect the names.
> One question I just asked on the ACLE PR for FP8, are these helpers supposed to be defined in arm_acle.h? I think it’s reasonable, but I didn’t see explicit wording for it in ACLE so I’d like to clarify.

Hi Kyrill,
we had an internal discussion and will update the acle to mention that 
these helpers will be available when including any one of the 
arm_neon.h, arm_sve.h, or arm_sme.h headers, regardless of arch version 
in the command line.
I'm working on an update to this patch that takes this into account and 
will update the Changelog accordingly.

Cheers,
Claudio

> Thanks,
> Kyrill
> 
>> gcc/testsuite/ChangeLog:
>>
>>         * gcc.target/aarch64/acle/fp8-helpers.c: New test of fpmr helper functions.
>> ---
>> gcc/config/aarch64/arm_acle.h                 | 33 ++++++++++++
>> .../gcc.target/aarch64/acle/fp8-helpers.c     | 52 +++++++++++++++++++
>> 2 files changed, 85 insertions(+)
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c
>>
>> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
>> index 2aa681090fa..fd4fa855b90 100644
>> --- a/gcc/config/aarch64/arm_acle.h
>> +++ b/gcc/config/aarch64/arm_acle.h
>> @@ -385,6 +385,39 @@ __rndrrs (uint64_t *__res)
>>
>> #pragma GCC pop_options
>>
>> +typedef uint64_t fpm_t;
>> +
>> +enum __ARM_FPM_FORMAT
>> +{
>> +  __ARM_FPM_E5M2,
>> +  __ARM_FPM_E4M3,
>> +};
>> +
>> +enum __ARM_FPM_OVERFLOW
>> +{
>> +  __ARM_FPM_INFNAN,
>> +  __ARM_FPM_SATURATE,
>> +};
>> +
>> +#define __arm_fpm_init() (0)
>> +
>> +#define __arm_set_fpm_src1_format(__fpm, __format) \
>> +  ((__fpm & ~(uint64_t)0x7) | (__format & (uint64_t)0x7))
>> +#define __arm_set_fpm_src2_format(__fpm, __format) \
>> +  ((__fpm & ~((uint64_t)0x7 << 3)) | ((__format & (uint64_t)0x7) << 3))
>> +#define __arm_set_fpm_dst_format(__fpm, __format) \
>> +  ((__fpm & ~((uint64_t)0x7 << 6)) | ((__format & (uint64_t)0x7) << 6))
>> +#define __arm_set_fpm_overflow_cvt(__fpm, __behaviour) \
>> +  ((__fpm & ~((uint64_t)0x1 << 15)) | ((__behaviour & (uint64_t)0x1) << 15))
>> +#define __arm_set_fpm_overflow_mul(__fpm, __behaviour) \
>> +  ((__fpm & ~((uint64_t)0x1 << 14)) | ((__behaviour & (uint64_t)0x1) << 14))
>> +#define __arm_set_fpm_lscale(__fpm, __scale) \
>> +  ((__fpm & ~((uint64_t)0x7f << 16)) | ((__scale & (uint64_t)0x7f) << 16))
>> +#define __arm_set_fpm_lscale2(__fpm, __scale) \
>> +  ((__fpm & ~((uint64_t)0x3f << 32)) | ((__scale & (uint64_t)0x3f) << 32))
>> +#define __arm_set_fpm_nscale(__fpm, __scale) \
>> +  ((__fpm & ~((uint64_t)0xff << 24)) | ((__scale & (uint64_t)0xff) << 24))
>> +
>> #ifdef __cplusplus
>> }
>> #endif
>> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c b/gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c
>> new file mode 100644
>> index 00000000000..e235c3621d1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c
>> @@ -0,0 +1,52 @@
>> +/* Test the fp8 ACLE helper functions.  */
>> +/* { dg-do compile } */
>> +/* { dg-options "-std=c90 -pedantic-errors -O1 -march=armv9.4-a+fp8" } */
>> +
>> +#include <arm_acle.h>
>> +
>> +void
>> +test_prepare_fpmr_sysreg ()
>> +{
>> +
>> +#define _S_EQ(expr, expected)                                                  \
>> +  _Static_assert (expr == expected, #expr " == " #expected)
>> +
>> +  _S_EQ (__arm_fpm_init (), 0);
>> +
>> +  /* Bits [2:0] */
>> +  _S_EQ (__arm_set_fpm_src1_format (__arm_fpm_init (), __ARM_FPM_E5M2), 0);
>> +  _S_EQ (__arm_set_fpm_src1_format (__arm_fpm_init (), __ARM_FPM_E4M3), 0x1);
>> +
>> +  /* Bits [5:3] */
>> +  _S_EQ (__arm_set_fpm_src2_format (__arm_fpm_init (), __ARM_FPM_E5M2), 0);
>> +  _S_EQ (__arm_set_fpm_src2_format (__arm_fpm_init (), __ARM_FPM_E4M3), 0x8);
>> +
>> +  /* Bits [8:6] */
>> +  _S_EQ (__arm_set_fpm_dst_format (__arm_fpm_init (), __ARM_FPM_E5M2), 0);
>> +  _S_EQ (__arm_set_fpm_dst_format (__arm_fpm_init (), __ARM_FPM_E4M3), 0x40);
>> +
>> +  /* Bit 14 */
>> +  _S_EQ (__arm_set_fpm_overflow_mul (__arm_fpm_init (), __ARM_FPM_INFNAN), 0);
>> +  _S_EQ (__arm_set_fpm_overflow_mul (__arm_fpm_init (), __ARM_FPM_SATURATE),
>> + 0x4000);
>> +
>> +  /* Bit 15 */
>> +  _S_EQ (__arm_set_fpm_overflow_cvt (__arm_fpm_init (), __ARM_FPM_INFNAN), 0);
>> +  _S_EQ (__arm_set_fpm_overflow_cvt (__arm_fpm_init (), __ARM_FPM_SATURATE),
>> + 0x8000);
>> +
>> +  /* Bits [22:16] */
>> +  _S_EQ (__arm_set_fpm_lscale (__arm_fpm_init (), 0), 0);
>> +  _S_EQ (__arm_set_fpm_lscale (__arm_fpm_init (), 127), 0x7F0000);
>> +
>> +  /* Bits [37:32] */
>> +  _S_EQ (__arm_set_fpm_lscale2 (__arm_fpm_init (), 0), 0);
>> +  _S_EQ (__arm_set_fpm_lscale2 (__arm_fpm_init (), 63), 0x3F00000000);
>> +
>> +  /* Bits [31:24] */
>> +  _S_EQ (__arm_set_fpm_nscale (__arm_fpm_init (), 0), 0);
>> +  _S_EQ (__arm_set_fpm_nscale (__arm_fpm_init (), 127), 0x7F000000);
>> +  _S_EQ (__arm_set_fpm_nscale (__arm_fpm_init (), -128), 0x80000000);
>> +
>> +#undef _S_EQ
>> +}
>
diff mbox series

Patch

diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 2aa681090fa..fd4fa855b90 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -385,6 +385,39 @@  __rndrrs (uint64_t *__res)
 
 #pragma GCC pop_options
 
+typedef uint64_t fpm_t;
+
+enum __ARM_FPM_FORMAT
+{
+  __ARM_FPM_E5M2,
+  __ARM_FPM_E4M3,
+};
+
+enum __ARM_FPM_OVERFLOW
+{
+  __ARM_FPM_INFNAN,
+  __ARM_FPM_SATURATE,
+};
+
+#define __arm_fpm_init() (0)
+
+#define __arm_set_fpm_src1_format(__fpm, __format) \
+  ((__fpm & ~(uint64_t)0x7) | (__format & (uint64_t)0x7))
+#define __arm_set_fpm_src2_format(__fpm, __format) \
+  ((__fpm & ~((uint64_t)0x7 << 3)) | ((__format & (uint64_t)0x7) << 3))
+#define __arm_set_fpm_dst_format(__fpm, __format) \
+  ((__fpm & ~((uint64_t)0x7 << 6)) | ((__format & (uint64_t)0x7) << 6))
+#define __arm_set_fpm_overflow_cvt(__fpm, __behaviour) \
+  ((__fpm & ~((uint64_t)0x1 << 15)) | ((__behaviour & (uint64_t)0x1) << 15))
+#define __arm_set_fpm_overflow_mul(__fpm, __behaviour) \
+  ((__fpm & ~((uint64_t)0x1 << 14)) | ((__behaviour & (uint64_t)0x1) << 14))
+#define __arm_set_fpm_lscale(__fpm, __scale) \
+  ((__fpm & ~((uint64_t)0x7f << 16)) | ((__scale & (uint64_t)0x7f) << 16))
+#define __arm_set_fpm_lscale2(__fpm, __scale) \
+  ((__fpm & ~((uint64_t)0x3f << 32)) | ((__scale & (uint64_t)0x3f) << 32))
+#define __arm_set_fpm_nscale(__fpm, __scale) \
+  ((__fpm & ~((uint64_t)0xff << 24)) | ((__scale & (uint64_t)0xff) << 24))
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c b/gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c
new file mode 100644
index 00000000000..e235c3621d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8-helpers.c
@@ -0,0 +1,52 @@ 
+/* Test the fp8 ACLE helper functions.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c90 -pedantic-errors -O1 -march=armv9.4-a+fp8" } */
+
+#include <arm_acle.h>
+
+void
+test_prepare_fpmr_sysreg ()
+{
+
+#define _S_EQ(expr, expected)                                                  \
+  _Static_assert (expr == expected, #expr " == " #expected)
+
+  _S_EQ (__arm_fpm_init (), 0);
+
+  /* Bits [2:0] */
+  _S_EQ (__arm_set_fpm_src1_format (__arm_fpm_init (), __ARM_FPM_E5M2), 0);
+  _S_EQ (__arm_set_fpm_src1_format (__arm_fpm_init (), __ARM_FPM_E4M3), 0x1);
+
+  /* Bits [5:3] */
+  _S_EQ (__arm_set_fpm_src2_format (__arm_fpm_init (), __ARM_FPM_E5M2), 0);
+  _S_EQ (__arm_set_fpm_src2_format (__arm_fpm_init (), __ARM_FPM_E4M3), 0x8);
+
+  /* Bits [8:6] */
+  _S_EQ (__arm_set_fpm_dst_format (__arm_fpm_init (), __ARM_FPM_E5M2), 0);
+  _S_EQ (__arm_set_fpm_dst_format (__arm_fpm_init (), __ARM_FPM_E4M3), 0x40);
+
+  /* Bit 14 */
+  _S_EQ (__arm_set_fpm_overflow_mul (__arm_fpm_init (), __ARM_FPM_INFNAN), 0);
+  _S_EQ (__arm_set_fpm_overflow_mul (__arm_fpm_init (), __ARM_FPM_SATURATE),
+	 0x4000);
+
+  /* Bit 15 */
+  _S_EQ (__arm_set_fpm_overflow_cvt (__arm_fpm_init (), __ARM_FPM_INFNAN), 0);
+  _S_EQ (__arm_set_fpm_overflow_cvt (__arm_fpm_init (), __ARM_FPM_SATURATE),
+	 0x8000);
+
+  /* Bits [22:16] */
+  _S_EQ (__arm_set_fpm_lscale (__arm_fpm_init (), 0), 0);
+  _S_EQ (__arm_set_fpm_lscale (__arm_fpm_init (), 127), 0x7F0000);
+
+  /* Bits [37:32] */
+  _S_EQ (__arm_set_fpm_lscale2 (__arm_fpm_init (), 0), 0);
+  _S_EQ (__arm_set_fpm_lscale2 (__arm_fpm_init (), 63), 0x3F00000000);
+
+  /* Bits [31:24] */
+  _S_EQ (__arm_set_fpm_nscale (__arm_fpm_init (), 0), 0);
+  _S_EQ (__arm_set_fpm_nscale (__arm_fpm_init (), 127), 0x7F000000);
+  _S_EQ (__arm_set_fpm_nscale (__arm_fpm_init (), -128), 0x80000000);
+
+#undef _S_EQ
+}