Message ID | 20240726163254.1174686-3-claudio.bantaloukas@arm.com |
---|---|
State | New |
Headers | show |
Series | aarch64: Add initial support for +fp8 arch extensions | expand |
Claudio Bantaloukas <claudio.bantaloukas@arm.com> writes: > Unlike most system registers, fpmr can be heavily written to in code that > exercises the fp8 functionality. That is because every fp8 instrinsic call > can potentially change the value of fpmr. > Rather than just use a an unspec, we treat the fpmr system register like Typo: s/a an/an/ > all other registers and use a move operation to read and write to it. > > We introduce a new class of moveable system registers that, currently, > only accepts fpmr and a new constraint, Umv, that allows us to > selectively use mrs and msr instructions when expanding rtl for them. > Given that there is code that depends on "real" registers coming before > "fake" ones, we introduce a new constant FPM_REGNUM that uses an > existing value and renumber registers below that. > This requires us to update the bitmaps that describe which registers > belong to each register class. > > gcc/ChangeLog: > > * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add > support for MOVEABLE_SYSREGS class. > (aarch64_hard_regno_mode_ok): Allow reads and writes to fpmr. > (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class. > (aarch64_class_max_nregs): Likewise. > * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr. > (CALL_REALLY_USED_REGISTERS): Likewise. > (REGISTER_NAMES): Likewise. > (enum reg_class): Add MOVEABLE_SYSREGS class. > (REG_CLASS_NAMES): Likewise. > (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr, > the new MOVEABLE_REGS class and renumbering of registers. > * config/aarch64/aarch64.md: (FPM_REGNUM): added new register > number, reusing old value. > (FFR_REGNUM): Renumber. > (FFRT_REGNUM): Likewise. > (LOWERING_REGNUM): Likewise. > (TPIDR2_BLOCK_REGNUM): Likewise. > (SME_STATE_REGNUM): Likewise. > (TPIDR2_SETUP_REGNUM): Likewise. > (ZA_FREE_REGNUM): Likewise. > (ZA_SAVED_REGNUM): Likewise. > (ZA_REGNUM): Likewise. > (ZT0_REGNUM): Likewise. > (*mov<mode>_aarch64): Add support for moveable sysregs. > (*movsi_aarch64): Likewise. > (*movdi_aarch64): Likewise. > * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/acle/fp8.c: New tests. > [...] > @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64" > [w, r Z ; neon_from_gp<q>, nosimd ] fmov\t%s0, %w1 > [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0] > [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1 > + [Umv, r ; mrs , * ] msr\t%0, %x1 > + [r, Umv ; mrs , * ] mrs\t%x0, %1 > } > ) > > @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64" > [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 > [w , w ; fmov , fp , 4] fmov\t%s0, %s1 > [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); > + [Umv, r ; mrs , * , 8] msr\t%0, %x1 > + [r, Umv ; mrs , * , 8] mrs\t%x0, %1 The lengths should be 4 rather than 8. > } > "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) > && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" > @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64" > [w, w ; fmov , fp , 4] fmov\t%d0, %d1 > [w, Dd ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); > [w, Dx ; neon_move, simd, 8] # > + [Umv, r; mrs , * , 8] msr\t%0, %1 > + [r, Umv; mrs , * , 8] mrs\t%0, %1 Similarly here. > } > "CONST_INT_P (operands[1]) > && REG_P (operands[0]) > [...] > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > index 459442be155..1a5c3d7e8fd 100644 > --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > @@ -1,6 +1,7 @@ > /* Test the fp8 ACLE intrinsics family. */ > /* { dg-do compile } */ > /* { dg-options "-O1 -march=armv8-a" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > > #include <arm_acle.h> > > @@ -17,4 +18,107 @@ > #error "__ARM_FEATURE_FP8 feature macro defined." > #endif > > +/* > +**test_write_fpmr_sysreg_asm_64: > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_64 (uint64_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_write_fpmr_sysreg_asm_32: > +** uxtw x0, w0 > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_32 (uint32_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; By using uint64_t rather than uint32_t, these tests are testing movdi rather than the smaller move patterns. I think it should be uint32_t instead. We should then have just an MSR, without an extension. Similarly for the 16-bit and 8-bit cases. LGTM with those changes, but please give others a day or so to comment. Thanks, Richard > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_write_fpmr_sysreg_asm_16: > +** and x0, x0, 65535 > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_16 (uint16_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_write_fpmr_sysreg_asm_8: > +** and x0, x0, 255 > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_8 (uint8_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_read_fpmr_sysreg_asm_64: > +** mrs x0, fpmr > +** ret > +*/ > +uint64_t > +test_read_fpmr_sysreg_asm_64 () > +{ > + register uint64_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > +/* > +**test_read_fpmr_sysreg_asm_32: > +** mrs x0, fpmr > +** ret > +*/ > +uint32_t > +test_read_fpmr_sysreg_asm_32 () > +{ > + register uint32_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > +/* > +**test_read_fpmr_sysreg_asm_16: > +** mrs x0, fpmr > +** ret > +*/ > +uint16_t > +test_read_fpmr_sysreg_asm_16 () > +{ > + register uint16_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > +/* > +**test_read_fpmr_sysreg_asm_8: > +** mrs x0, fpmr > +** ret > +*/ > +uint8_t > +test_read_fpmr_sysreg_asm_8 () > +{ > + register uint8_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > #pragma GCC pop_options
On 29/07/2024 13:13, Richard Sandiford wrote: > Claudio Bantaloukas <claudio.bantaloukas@arm.com> writes: >> Unlike most system registers, fpmr can be heavily written to in code that >> exercises the fp8 functionality. That is because every fp8 instrinsic call >> can potentially change the value of fpmr. >> Rather than just use a an unspec, we treat the fpmr system register like > > Typo: s/a an/an/ Thanks for the catch, will repost along with the requested changes below Cheers, Claudio > >> all other registers and use a move operation to read and write to it. >> >> We introduce a new class of moveable system registers that, currently, >> only accepts fpmr and a new constraint, Umv, that allows us to >> selectively use mrs and msr instructions when expanding rtl for them. >> Given that there is code that depends on "real" registers coming before >> "fake" ones, we introduce a new constant FPM_REGNUM that uses an >> existing value and renumber registers below that. >> This requires us to update the bitmaps that describe which registers >> belong to each register class. >> >> gcc/ChangeLog: >> >> * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add >> support for MOVEABLE_SYSREGS class. >> (aarch64_hard_regno_mode_ok): Allow reads and writes to fpmr. >> (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class. >> (aarch64_class_max_nregs): Likewise. >> * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr. >> (CALL_REALLY_USED_REGISTERS): Likewise. >> (REGISTER_NAMES): Likewise. >> (enum reg_class): Add MOVEABLE_SYSREGS class. >> (REG_CLASS_NAMES): Likewise. >> (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr, >> the new MOVEABLE_REGS class and renumbering of registers. >> * config/aarch64/aarch64.md: (FPM_REGNUM): added new register >> number, reusing old value. >> (FFR_REGNUM): Renumber. >> (FFRT_REGNUM): Likewise. >> (LOWERING_REGNUM): Likewise. >> (TPIDR2_BLOCK_REGNUM): Likewise. >> (SME_STATE_REGNUM): Likewise. >> (TPIDR2_SETUP_REGNUM): Likewise. >> (ZA_FREE_REGNUM): Likewise. >> (ZA_SAVED_REGNUM): Likewise. >> (ZA_REGNUM): Likewise. >> (ZT0_REGNUM): Likewise. >> (*mov<mode>_aarch64): Add support for moveable sysregs. >> (*movsi_aarch64): Likewise. >> (*movdi_aarch64): Likewise. >> * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/aarch64/acle/fp8.c: New tests. >> [...] >> @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64" >> [w, r Z ; neon_from_gp<q>, nosimd ] fmov\t%s0, %w1 >> [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0] >> [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1 >> + [Umv, r ; mrs , * ] msr\t%0, %x1 >> + [r, Umv ; mrs , * ] mrs\t%x0, %1 >> } >> ) >> >> @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64" >> [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 >> [w , w ; fmov , fp , 4] fmov\t%s0, %s1 >> [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); >> + [Umv, r ; mrs , * , 8] msr\t%0, %x1 >> + [r, Umv ; mrs , * , 8] mrs\t%x0, %1 > > The lengths should be 4 rather than 8. > >> } >> "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) >> && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" >> @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64" >> [w, w ; fmov , fp , 4] fmov\t%d0, %d1 >> [w, Dd ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); >> [w, Dx ; neon_move, simd, 8] # >> + [Umv, r; mrs , * , 8] msr\t%0, %1 >> + [r, Umv; mrs , * , 8] mrs\t%0, %1 > > Similarly here. > >> } >> "CONST_INT_P (operands[1]) >> && REG_P (operands[0]) >> [...] >> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> index 459442be155..1a5c3d7e8fd 100644 >> --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> @@ -1,6 +1,7 @@ >> /* Test the fp8 ACLE intrinsics family. */ >> /* { dg-do compile } */ >> /* { dg-options "-O1 -march=armv8-a" } */ >> +/* { dg-final { check-function-bodies "**" "" "" } } */ >> >> #include <arm_acle.h> >> >> @@ -17,4 +18,107 @@ >> #error "__ARM_FEATURE_FP8 feature macro defined." >> #endif >> >> +/* >> +**test_write_fpmr_sysreg_asm_64: >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_64 (uint64_t val) >> +{ >> + register uint64_t fpmr asm ("fpmr") = val; >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_write_fpmr_sysreg_asm_32: >> +** uxtw x0, w0 >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_32 (uint32_t val) >> +{ >> + register uint64_t fpmr asm ("fpmr") = val; > > By using uint64_t rather than uint32_t, these tests are testing movdi > rather than the smaller move patterns. I think it should be uint32_t > instead. We should then have just an MSR, without an extension. > > Similarly for the 16-bit and 8-bit cases. > > LGTM with those changes, but please give others a day or so to comment. > > Thanks, > Richard > > >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_write_fpmr_sysreg_asm_16: >> +** and x0, x0, 65535 >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_16 (uint16_t val) >> +{ >> + register uint64_t fpmr asm ("fpmr") = val; >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_write_fpmr_sysreg_asm_8: >> +** and x0, x0, 255 >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_8 (uint8_t val) >> +{ >> + register uint64_t fpmr asm ("fpmr") = val; >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_64: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint64_t >> +test_read_fpmr_sysreg_asm_64 () >> +{ >> + register uint64_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_32: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint32_t >> +test_read_fpmr_sysreg_asm_32 () >> +{ >> + register uint32_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_16: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint16_t >> +test_read_fpmr_sysreg_asm_16 () >> +{ >> + register uint16_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_8: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint8_t >> +test_read_fpmr_sysreg_asm_8 () >> +{ >> + register uint8_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> #pragma GCC pop_options
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index e0cf382998c..9810f2c0390 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -2018,6 +2018,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) case PR_HI_REGS: return mode == VNx32BImode ? 2 : 1; + case MOVEABLE_SYSREGS: case FFR_REGS: case PR_AND_FFR_REGS: case FAKE_REGS: @@ -2045,6 +2046,9 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) /* This must have the same size as _Unwind_Word. */ return mode == DImode; + if (regno == FPM_REGNUM) + return mode == QImode || mode == HImode || mode == SImode || mode == DImode; + unsigned int vec_flags = aarch64_classify_vector_mode (mode); if (vec_flags == VEC_SVE_PRED) return pr_or_ffr_regnum_p (regno); @@ -12680,6 +12684,9 @@ aarch64_regno_regclass (unsigned regno) if (PR_REGNUM_P (regno)) return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS; + if (regno == FPM_REGNUM) + return MOVEABLE_SYSREGS; + if (regno == FFR_REGNUM || regno == FFRT_REGNUM) return FFR_REGS; @@ -13068,6 +13075,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) case PR_HI_REGS: return mode == VNx32BImode ? 2 : 1; + case MOVEABLE_SYSREGS: case STACK_REG: case FFR_REGS: case PR_AND_FFR_REGS: diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 2e75c6b81e2..2dfb999bea5 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -523,6 +523,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ 0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \ 0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \ + 1, /* FPMR */ \ 1, 1, /* FFR and FFRT */ \ 1, 1, 1, 1, 1, 1, 1, 1 /* Fake registers */ \ } @@ -547,6 +548,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED 1, 1, 1, 0, /* SFP, AP, CC, VG */ \ 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ + 1, /* FPMR */ \ 1, 1, /* FFR and FFRT */ \ 0, 0, 0, 0, 0, 0, 0, 0 /* Fake registers */ \ } @@ -564,6 +566,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED "sfp", "ap", "cc", "vg", \ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \ + "fpmr", \ "ffr", "ffrt", \ "lowering", "tpidr2_block", "sme_state", "tpidr2_setup", \ "za_free", "za_saved", "za", "zt0" \ @@ -775,6 +778,7 @@ enum reg_class PR_REGS, FFR_REGS, PR_AND_FFR_REGS, + MOVEABLE_SYSREGS, FAKE_REGS, ALL_REGS, LIM_REG_CLASSES /* Last */ @@ -801,6 +805,7 @@ enum reg_class "PR_REGS", \ "FFR_REGS", \ "PR_AND_FFR_REGS", \ + "MOVEABLE_SYSREGS", \ "FAKE_REGS", \ "ALL_REGS" \ } @@ -822,10 +827,11 @@ enum reg_class { 0x00000000, 0x00000000, 0x00000ff0 }, /* PR_LO_REGS */ \ { 0x00000000, 0x00000000, 0x000ff000 }, /* PR_HI_REGS */ \ { 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \ - { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \ - { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \ - { 0x00000000, 0x00000000, 0x3fc00000 }, /* FAKE_REGS */ \ - { 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \ + { 0x00000000, 0x00000000, 0x00600000 }, /* FFR_REGS */ \ + { 0x00000000, 0x00000000, 0x006ffff0 }, /* PR_AND_FFR_REGS */ \ + { 0x00000000, 0x00000000, 0x00100000 }, /* MOVEABLE_SYSREGS */ \ + { 0x00000000, 0x00000000, 0x7f800000 }, /* FAKE_REGS */ \ + { 0xffffffff, 0xffffffff, 0x001fffff } /* ALL_REGS */ \ } #define REGNO_REG_CLASS(REGNO) aarch64_regno_regclass (REGNO) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 94ff0eefa77..22e57ee7ccf 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -107,10 +107,14 @@ (define_constants (P14_REGNUM 82) (P15_REGNUM 83) (LAST_SAVED_REGNUM 83) - (FFR_REGNUM 84) + + ;; Floating Point Mode Register, used in FP8 insns. + (FPM_REGNUM 84) + + (FFR_REGNUM 85) ;; "FFR token": a fake register used for representing the scheduling ;; restrictions on FFR-related operations. - (FFRT_REGNUM 85) + (FFRT_REGNUM 86) ;; ---------------------------------------------------------------- ;; Fake registers @@ -122,17 +126,17 @@ (define_constants ;; ABI-related lowering is needed. These placeholders read and ;; write this register. Instructions that depend on the lowering ;; read the register. - (LOWERING_REGNUM 86) + (LOWERING_REGNUM 87) ;; Represents the contents of the current function's TPIDR2 block, ;; in abstract form. - (TPIDR2_BLOCK_REGNUM 87) + (TPIDR2_BLOCK_REGNUM 88) ;; Holds the value that the current function wants PSTATE.ZA to be. ;; The actual value can sometimes vary, because it does not track ;; changes to PSTATE.ZA that happen during a lazy save and restore. ;; Those effects are instead tracked by ZA_SAVED_REGNUM. - (SME_STATE_REGNUM 88) + (SME_STATE_REGNUM 89) ;; Instructions write to this register if they set TPIDR2_EL0 to a ;; well-defined value. Instructions read from the register if they @@ -140,14 +144,14 @@ (define_constants ;; ;; The register does not model the architected TPIDR2_ELO, just the ;; current function's management of it. - (TPIDR2_SETUP_REGNUM 89) + (TPIDR2_SETUP_REGNUM 90) ;; Represents the property "has an incoming lazy save been committed?". - (ZA_FREE_REGNUM 90) + (ZA_FREE_REGNUM 91) ;; Represents the property "are the current function's ZA contents ;; stored in the lazy save buffer, rather than in ZA itself?". - (ZA_SAVED_REGNUM 91) + (ZA_SAVED_REGNUM 92) ;; Represents the contents of the current function's ZA state in ;; abstract form. At various times in the function, these contents @@ -155,10 +159,10 @@ (define_constants ;; ;; The contents persist even when the architected ZA is off. Private-ZA ;; functions have no effect on its contents. - (ZA_REGNUM 92) + (ZA_REGNUM 93) ;; Similarly represents the contents of the current function's ZT0 state. - (ZT0_REGNUM 93) + (ZT0_REGNUM 94) (FIRST_FAKE_REGNUM LOWERING_REGNUM) (LAST_FAKE_REGNUM ZT0_REGNUM) @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64" [w, r Z ; neon_from_gp<q>, nosimd ] fmov\t%s0, %w1 [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0] [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1 + [Umv, r ; mrs , * ] msr\t%0, %x1 + [r, Umv ; mrs , * ] mrs\t%x0, %1 } ) @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64" [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 [w , w ; fmov , fp , 4] fmov\t%s0, %s1 [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); + [Umv, r ; mrs , * , 8] msr\t%0, %x1 + [r, Umv ; mrs , * , 8] mrs\t%x0, %1 } "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64" [w, w ; fmov , fp , 4] fmov\t%d0, %d1 [w, Dd ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); [w, Dx ; neon_move, simd, 8] # + [Umv, r; mrs , * , 8] msr\t%0, %1 + [r, Umv; mrs , * , 8] mrs\t%0, %1 } "CONST_INT_P (operands[1]) && REG_P (operands[0]) diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index a2569cea510..0c81fb28f7e 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -77,6 +77,9 @@ (define_register_constraint "Upl" "PR_LO_REGS" (define_register_constraint "Uph" "PR_HI_REGS" "SVE predicate registers p8 - p15.") +(define_register_constraint "Umv" "MOVEABLE_SYSREGS" + "@internal System Registers suitable for moving rather than requiring an unspec msr") + (define_constraint "c" "@internal The condition code register." (match_operand 0 "cc_register")) diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c index 459442be155..1a5c3d7e8fd 100644 --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c @@ -1,6 +1,7 @@ /* Test the fp8 ACLE intrinsics family. */ /* { dg-do compile } */ /* { dg-options "-O1 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ #include <arm_acle.h> @@ -17,4 +18,107 @@ #error "__ARM_FEATURE_FP8 feature macro defined." #endif +/* +**test_write_fpmr_sysreg_asm_64: +** msr fpmr, x0 +** ret +*/ +void +test_write_fpmr_sysreg_asm_64 (uint64_t val) +{ + register uint64_t fpmr asm ("fpmr") = val; + asm volatile ("" ::"Umv"(fpmr)); +} + +/* +**test_write_fpmr_sysreg_asm_32: +** uxtw x0, w0 +** msr fpmr, x0 +** ret +*/ +void +test_write_fpmr_sysreg_asm_32 (uint32_t val) +{ + register uint64_t fpmr asm ("fpmr") = val; + asm volatile ("" ::"Umv"(fpmr)); +} + +/* +**test_write_fpmr_sysreg_asm_16: +** and x0, x0, 65535 +** msr fpmr, x0 +** ret +*/ +void +test_write_fpmr_sysreg_asm_16 (uint16_t val) +{ + register uint64_t fpmr asm ("fpmr") = val; + asm volatile ("" ::"Umv"(fpmr)); +} + +/* +**test_write_fpmr_sysreg_asm_8: +** and x0, x0, 255 +** msr fpmr, x0 +** ret +*/ +void +test_write_fpmr_sysreg_asm_8 (uint8_t val) +{ + register uint64_t fpmr asm ("fpmr") = val; + asm volatile ("" ::"Umv"(fpmr)); +} + +/* +**test_read_fpmr_sysreg_asm_64: +** mrs x0, fpmr +** ret +*/ +uint64_t +test_read_fpmr_sysreg_asm_64 () +{ + register uint64_t fpmr asm ("fpmr"); + asm volatile ("" : "=Umv"(fpmr) :); + return fpmr; +} + +/* +**test_read_fpmr_sysreg_asm_32: +** mrs x0, fpmr +** ret +*/ +uint32_t +test_read_fpmr_sysreg_asm_32 () +{ + register uint32_t fpmr asm ("fpmr"); + asm volatile ("" : "=Umv"(fpmr) :); + return fpmr; +} + +/* +**test_read_fpmr_sysreg_asm_16: +** mrs x0, fpmr +** ret +*/ +uint16_t +test_read_fpmr_sysreg_asm_16 () +{ + register uint16_t fpmr asm ("fpmr"); + asm volatile ("" : "=Umv"(fpmr) :); + return fpmr; +} + +/* +**test_read_fpmr_sysreg_asm_8: +** mrs x0, fpmr +** ret +*/ +uint8_t +test_read_fpmr_sysreg_asm_8 () +{ + register uint8_t fpmr asm ("fpmr"); + asm volatile ("" : "=Umv"(fpmr) :); + return fpmr; +} + #pragma GCC pop_options