diff mbox series

[v2,2/3] aarch64: Add support for moving fpm system register

Message ID 20240725142548.699792-3-claudio.bantaloukas@arm.com
State New
Headers show
Series aarch64: Add initial support for +fp8 arch extensions | expand

Commit Message

Claudio Bantaloukas July 25, 2024, 2:25 p.m. UTC
Unlike most system registers, fpmr can be heavily written to in code that
exercises the fp8 functionality. That is because every fp8 instrinsic call
can potentially change the value of fpmr.
Rather than just use a an unspec, we treat the fpmr system register like
all other registers and use a move operation to read and write to it.

We introduce a new class of moveable system registers that, currently,
only accepts fpmr and a new constraint, Umv, that allows us to
selectively use mrs and msr instructions when expanding rtl for them.
Given that there is code that depends on "real" registers coming before
"fake" ones, we introduce a new constant FPM_REGNUM that uses an
existing value and renumber registers below that.
This requires us to update the bitmaps that describe which registers
belong to each register class.

gcc/ChangeLog:

	* config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add
	support for MOVEABLE_SYSREGS class.
	(aarch64_hard_regno_mode_ok): Allow reads and writes to fpmr.
	(aarch64_regno_regclass): Support MOVEABLE_SYSREGS class.
	(aarch64_class_max_nregs): Likewise.
	* config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr.
	(CALL_REALLY_USED_REGISTERS): Likewise.
	(REGISTER_NAMES): Likewise.
	(enum reg_class): Add MOVEABLE_SYSREGS class.
	(REG_CLASS_NAMES): Likewise.
	(REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr,
	the new MOVEABLE_REGS class and renumbering of registers.
	* config/aarch64/aarch64.md: (FPM_REGNUM): added new register
	number, reusing old value.
	(FFR_REGNUM): Renumber.
	(FFRT_REGNUM): Likewise.
	(LOWERING_REGNUM): Likewise.
	(TPIDR2_BLOCK_REGNUM): Likewise.
	(SME_STATE_REGNUM): Likewise.
	(TPIDR2_SETUP_REGNUM): Likewise.
	(ZA_FREE_REGNUM): Likewise.
	(ZA_SAVED_REGNUM): Likewise.
	(ZA_REGNUM): Likewise.
	(ZT0_REGNUM): Likewise.
	(*mov<mode>_aarch64): Add support for moveable sysregs.
	(*movsi_aarch64): Likewise.
	(*movdi_aarch64): Likewise.
	* config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/acle/fp8.c: New tests.
---
 gcc/config/aarch64/aarch64.cc               |   8 ++
 gcc/config/aarch64/aarch64.h                |  14 ++-
 gcc/config/aarch64/aarch64.md               |  30 ++++--
 gcc/config/aarch64/constraints.md           |   3 +
 gcc/testsuite/gcc.target/aarch64/acle/fp8.c | 103 ++++++++++++++++++++
 5 files changed, 144 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e0cf382998c..9810f2c0390 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2018,6 +2018,7 @@  aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
     case PR_HI_REGS:
       return mode == VNx32BImode ? 2 : 1;
 
+    case MOVEABLE_SYSREGS:
     case FFR_REGS:
     case PR_AND_FFR_REGS:
     case FAKE_REGS:
@@ -2045,6 +2046,9 @@  aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
     /* This must have the same size as _Unwind_Word.  */
     return mode == DImode;
 
+  if (regno == FPM_REGNUM)
+    return mode == QImode || mode == HImode || mode == SImode || mode == DImode;
+
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
   if (vec_flags == VEC_SVE_PRED)
     return pr_or_ffr_regnum_p (regno);
@@ -12680,6 +12684,9 @@  aarch64_regno_regclass (unsigned regno)
   if (PR_REGNUM_P (regno))
     return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
 
+  if (regno == FPM_REGNUM)
+    return MOVEABLE_SYSREGS;
+
   if (regno == FFR_REGNUM || regno == FFRT_REGNUM)
     return FFR_REGS;
 
@@ -13068,6 +13075,7 @@  aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
     case PR_HI_REGS:
       return mode == VNx32BImode ? 2 : 1;
 
+    case MOVEABLE_SYSREGS:
     case STACK_REG:
     case FFR_REGS:
     case PR_AND_FFR_REGS:
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2e75c6b81e2..2dfb999bea5 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -523,6 +523,7 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
     1, 1, 1, 1,			/* SFP, AP, CC, VG */	\
     0, 0, 0, 0,   0, 0, 0, 0,   /* P0 - P7 */           \
     0, 0, 0, 0,   0, 0, 0, 0,   /* P8 - P15 */          \
+    1,				/* FPMR */		\
     1, 1,			/* FFR and FFRT */	\
     1, 1, 1, 1, 1, 1, 1, 1	/* Fake registers */	\
   }
@@ -547,6 +548,7 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
     1, 1, 1, 0,			/* SFP, AP, CC, VG */	\
     1, 1, 1, 1,   1, 1, 1, 1,	/* P0 - P7 */		\
     1, 1, 1, 1,   1, 1, 1, 1,	/* P8 - P15 */		\
+    1,				/* FPMR */		\
     1, 1,			/* FFR and FFRT */	\
     0, 0, 0, 0, 0, 0, 0, 0	/* Fake registers */	\
   }
@@ -564,6 +566,7 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
     "sfp", "ap",  "cc",  "vg",					\
     "p0",  "p1",  "p2",  "p3",  "p4",  "p5",  "p6",  "p7",	\
     "p8",  "p9",  "p10", "p11", "p12", "p13", "p14", "p15",	\
+    "fpmr",							\
     "ffr", "ffrt",						\
     "lowering", "tpidr2_block", "sme_state", "tpidr2_setup",	\
     "za_free", "za_saved", "za", "zt0"				\
@@ -775,6 +778,7 @@  enum reg_class
   PR_REGS,
   FFR_REGS,
   PR_AND_FFR_REGS,
+  MOVEABLE_SYSREGS,
   FAKE_REGS,
   ALL_REGS,
   LIM_REG_CLASSES		/* Last */
@@ -801,6 +805,7 @@  enum reg_class
   "PR_REGS",					\
   "FFR_REGS",					\
   "PR_AND_FFR_REGS",				\
+  "MOVEABLE_SYSREGS",				\
   "FAKE_REGS",					\
   "ALL_REGS"					\
 }
@@ -822,10 +827,11 @@  enum reg_class
   { 0x00000000, 0x00000000, 0x00000ff0 },	/* PR_LO_REGS */	\
   { 0x00000000, 0x00000000, 0x000ff000 },	/* PR_HI_REGS */	\
   { 0x00000000, 0x00000000, 0x000ffff0 },	/* PR_REGS */		\
-  { 0x00000000, 0x00000000, 0x00300000 },	/* FFR_REGS */		\
-  { 0x00000000, 0x00000000, 0x003ffff0 },	/* PR_AND_FFR_REGS */	\
-  { 0x00000000, 0x00000000, 0x3fc00000 },	/* FAKE_REGS */		\
-  { 0xffffffff, 0xffffffff, 0x000fffff }	/* ALL_REGS */		\
+  { 0x00000000, 0x00000000, 0x00600000 },	/* FFR_REGS */		\
+  { 0x00000000, 0x00000000, 0x006ffff0 },	/* PR_AND_FFR_REGS */	\
+  { 0x00000000, 0x00000000, 0x00100000 },	/* MOVEABLE_SYSREGS */	\
+  { 0x00000000, 0x00000000, 0x7f800000 },	/* FAKE_REGS */		\
+  { 0xffffffff, 0xffffffff, 0x001fffff }	/* ALL_REGS */		\
 }
 
 #define REGNO_REG_CLASS(REGNO)	aarch64_regno_regclass (REGNO)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 94ff0eefa77..22e57ee7ccf 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -107,10 +107,14 @@  (define_constants
     (P14_REGNUM		82)
     (P15_REGNUM		83)
     (LAST_SAVED_REGNUM	83)
-    (FFR_REGNUM		84)
+
+    ;; Floating Point Mode Register, used in FP8 insns.
+    (FPM_REGNUM		84)
+
+    (FFR_REGNUM		85)
     ;; "FFR token": a fake register used for representing the scheduling
     ;; restrictions on FFR-related operations.
-    (FFRT_REGNUM	85)
+    (FFRT_REGNUM	86)
 
     ;; ----------------------------------------------------------------
     ;; Fake registers
@@ -122,17 +126,17 @@  (define_constants
     ;; ABI-related lowering is needed.  These placeholders read and
     ;; write this register.  Instructions that depend on the lowering
     ;; read the register.
-    (LOWERING_REGNUM 86)
+    (LOWERING_REGNUM 87)
 
     ;; Represents the contents of the current function's TPIDR2 block,
     ;; in abstract form.
-    (TPIDR2_BLOCK_REGNUM 87)
+    (TPIDR2_BLOCK_REGNUM 88)
 
     ;; Holds the value that the current function wants PSTATE.ZA to be.
     ;; The actual value can sometimes vary, because it does not track
     ;; changes to PSTATE.ZA that happen during a lazy save and restore.
     ;; Those effects are instead tracked by ZA_SAVED_REGNUM.
-    (SME_STATE_REGNUM 88)
+    (SME_STATE_REGNUM 89)
 
     ;; Instructions write to this register if they set TPIDR2_EL0 to a
     ;; well-defined value.  Instructions read from the register if they
@@ -140,14 +144,14 @@  (define_constants
     ;;
     ;; The register does not model the architected TPIDR2_ELO, just the
     ;; current function's management of it.
-    (TPIDR2_SETUP_REGNUM 89)
+    (TPIDR2_SETUP_REGNUM 90)
 
     ;; Represents the property "has an incoming lazy save been committed?".
-    (ZA_FREE_REGNUM 90)
+    (ZA_FREE_REGNUM 91)
 
     ;; Represents the property "are the current function's ZA contents
     ;; stored in the lazy save buffer, rather than in ZA itself?".
-    (ZA_SAVED_REGNUM 91)
+    (ZA_SAVED_REGNUM 92)
 
     ;; Represents the contents of the current function's ZA state in
     ;; abstract form.  At various times in the function, these contents
@@ -155,10 +159,10 @@  (define_constants
     ;;
     ;; The contents persist even when the architected ZA is off.  Private-ZA
     ;; functions have no effect on its contents.
-    (ZA_REGNUM 92)
+    (ZA_REGNUM 93)
 
     ;; Similarly represents the contents of the current function's ZT0 state.
-    (ZT0_REGNUM 93)
+    (ZT0_REGNUM 94)
 
     (FIRST_FAKE_REGNUM	LOWERING_REGNUM)
     (LAST_FAKE_REGNUM	ZT0_REGNUM)
@@ -1405,6 +1409,8 @@  (define_insn "*mov<mode>_aarch64"
      [w, r Z  ; neon_from_gp<q>, nosimd     ] fmov\t%s0, %w1
      [w, w    ; neon_dup       , simd       ] dup\t%<Vetype>0, %1.<v>[0]
      [w, w    ; neon_dup       , nosimd     ] fmov\t%s0, %s1
+     [Umv, r  ; mrs            , *          ] msr\t%0, %x1
+     [r, Umv  ; mrs            , *          ] mrs\t%x0, %1
   }
 )
 
@@ -1467,6 +1473,8 @@  (define_insn_and_split "*movsi_aarch64"
      [r  , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
      [w  , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
      [w  , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
+     [Umv, r  ; mrs      , *   , 8] msr\t%0, %x1
+     [r, Umv  ; mrs      , *   , 8] mrs\t%x0, %1
   }
   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
@@ -1505,6 +1513,8 @@  (define_insn_and_split "*movdi_aarch64"
      [w, w  ; fmov     , fp  , 4] fmov\t%d0, %d1
      [w, Dd ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
      [w, Dx ; neon_move, simd, 8] #
+     [Umv, r; mrs      , *   , 8] msr\t%0, %1
+     [r, Umv; mrs      , *   , 8] mrs\t%0, %1
   }
   "CONST_INT_P (operands[1])
    && REG_P (operands[0])
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index a2569cea510..0c81fb28f7e 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -77,6 +77,9 @@  (define_register_constraint "Upl" "PR_LO_REGS"
 (define_register_constraint "Uph" "PR_HI_REGS"
   "SVE predicate registers p8 - p15.")
 
+(define_register_constraint "Umv" "MOVEABLE_SYSREGS"
+  "@internal System Registers suitable for moving rather than requiring an unspec msr")
+
 (define_constraint "c"
  "@internal The condition code register."
   (match_operand 0 "cc_register"))
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
index 4113758aa25..1a5c3d7e8fd 100644
--- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
+++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
@@ -18,4 +18,107 @@ 
 #error "__ARM_FEATURE_FP8 feature macro defined."
 #endif
 
+/*
+**test_write_fpmr_sysreg_asm_64:
+**	msr	fpmr, x0
+**	ret
+*/
+void
+test_write_fpmr_sysreg_asm_64 (uint64_t val)
+{
+  register uint64_t fpmr asm ("fpmr") = val;
+  asm volatile ("" ::"Umv"(fpmr));
+}
+
+/*
+**test_write_fpmr_sysreg_asm_32:
+**	uxtw	x0, w0
+**	msr	fpmr, x0
+**	ret
+*/
+void
+test_write_fpmr_sysreg_asm_32 (uint32_t val)
+{
+  register uint64_t fpmr asm ("fpmr") = val;
+  asm volatile ("" ::"Umv"(fpmr));
+}
+
+/*
+**test_write_fpmr_sysreg_asm_16:
+**	and	x0, x0, 65535
+**	msr	fpmr, x0
+**	ret
+*/
+void
+test_write_fpmr_sysreg_asm_16 (uint16_t val)
+{
+  register uint64_t fpmr asm ("fpmr") = val;
+  asm volatile ("" ::"Umv"(fpmr));
+}
+
+/*
+**test_write_fpmr_sysreg_asm_8:
+**	and	x0, x0, 255
+**	msr	fpmr, x0
+**	ret
+*/
+void
+test_write_fpmr_sysreg_asm_8 (uint8_t val)
+{
+  register uint64_t fpmr asm ("fpmr") = val;
+  asm volatile ("" ::"Umv"(fpmr));
+}
+
+/*
+**test_read_fpmr_sysreg_asm_64:
+**	mrs	x0, fpmr
+**	ret
+*/
+uint64_t
+test_read_fpmr_sysreg_asm_64 ()
+{
+  register uint64_t fpmr asm ("fpmr");
+  asm volatile ("" : "=Umv"(fpmr) :);
+  return fpmr;
+}
+
+/*
+**test_read_fpmr_sysreg_asm_32:
+**	mrs	x0, fpmr
+**	ret
+*/
+uint32_t
+test_read_fpmr_sysreg_asm_32 ()
+{
+  register uint32_t fpmr asm ("fpmr");
+  asm volatile ("" : "=Umv"(fpmr) :);
+  return fpmr;
+}
+
+/*
+**test_read_fpmr_sysreg_asm_16:
+**	mrs	x0, fpmr
+**	ret
+*/
+uint16_t
+test_read_fpmr_sysreg_asm_16 ()
+{
+  register uint16_t fpmr asm ("fpmr");
+  asm volatile ("" : "=Umv"(fpmr) :);
+  return fpmr;
+}
+
+/*
+**test_read_fpmr_sysreg_asm_8:
+**	mrs	x0, fpmr
+**	ret
+*/
+uint8_t
+test_read_fpmr_sysreg_asm_8 ()
+{
+  register uint8_t fpmr asm ("fpmr");
+  asm volatile ("" : "=Umv"(fpmr) :);
+  return fpmr;
+}
+
 #pragma GCC pop_options