Message ID | 20240612123505.837005-1-victor.donascimento@arm.com |
---|---|
State | New |
Headers | show |
Series | [v2] libatomic: Add rcpc3 128-bit atomic operations for AArch64 | expand |
Victor Do Nascimento <victor.donascimento@arm.com> writes: > The introduction of the optional RCPC3 architectural extension for > Armv8.2-A upwards provides additional support for the release > consistency model, introducing the Load-Acquire RCpc Pair Ordered, and > Store-Release Pair Ordered operations in the form of LDIAPP and STILP. > > These operations are single-copy atomic on cores which also implement > LSE2 and, as such, support for these operations is added to Libatomic > and employed accordingly when the LSE2 and RCPC3 features are detected > in a given core at runtime. > > libatomic/ChangeLog: > > * config/linux/aarch64/atomic_16.S (libat_load_16): Add LRCPC3 > variant. > (libat_store_16): Likewise. > * config/linux/aarch64/host-config.h (HWCAP2_LRCPC3): New. > (LSE2_LRCPC3_ATOP): Previously LSE2_ATOP. New ifuncs guarded > under it. > (has_rcpc3): New. > --- > libatomic/config/linux/aarch64/atomic_16.S | 46 +++++++++++++++++++- > libatomic/config/linux/aarch64/host-config.h | 34 +++++++++++++-- > 2 files changed, 74 insertions(+), 6 deletions(-) > > diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S > index c44c31c6418..5767fba5c03 100644 > --- a/libatomic/config/linux/aarch64/atomic_16.S > +++ b/libatomic/config/linux/aarch64/atomic_16.S > @@ -35,16 +35,21 @@ > writes, this will be true when using atomics in actual code. > > The libat_<op>_16 entry points are ARMv8.0. > - The libat_<op>_16_i1 entry points are used when LSE128 is available. > + The libat_<op>_16_i1 entry points are used when LSE128 or LRCPC3 is available. > The libat_<op>_16_i2 entry points are used when LSE2 is available. */ > > #include "auto-config.h" > > .arch armv8-a+lse > > +/* There is overlap in atomic instructions implemented in RCPC3 and LSE2. > + Consequently, both _i1 and _i2 suffixes are needed for functions using these. > + Elsewhere, all extension-specific implementations are mapped to _i1. */ > + > +#define LRCPC3(NAME) libat_##NAME##_i1 > #define LSE128(NAME) libat_##NAME##_i1 > #define LSE(NAME) libat_##NAME##_i1 > -#define LSE2(NAME) libat_##NAME##_i1 > +#define LSE2(NAME) libat_##NAME##_i2 > #define CORE(NAME) libat_##NAME > #define ATOMIC(NAME) __atomic_##NAME > > @@ -513,6 +518,43 @@ END (test_and_set_16) > /* ifunc implementations: Carries run-time dependence on the presence of further > architectural extensions. */ > > +ENTRY_FEAT (load_16, LRCPC3) > + cbnz w1, 1f > + > + /* RELAXED. */ > + ldp res0, res1, [x0] > + ret > +1: > + cmp w1, SEQ_CST > + b.eq 2f > + > + /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */ > + /* ldiapp res0, res1, [x0] */ > + .inst 0xd9411800 > + ret > + > + /* SEQ_CST. */ > +2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */ > + /* ldiapp res0, res1, [x0] */ > + .inst 0xd9411800 > + ret > +END_FEAT (load_16, LRCPC3) > + > + > +ENTRY_FEAT (store_16, LRCPC3) > + cbnz w4, 1f > + > + /* RELAXED. */ > + stp in0, in1, [x0] > + ret > + > + /* RELEASE/SEQ_CST. */ > +1: /* stilp in0, in1, [x0] */ > + .inst 0xd9031802 > + ret > +END_FEAT (store_16, LRCPC3) > + > + > ENTRY_FEAT (exchange_16, LSE128) > mov tmp0, x0 > mov res0, in0 > diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h > index d05e9eb628f..8adf0563001 100644 > --- a/libatomic/config/linux/aarch64/host-config.h > +++ b/libatomic/config/linux/aarch64/host-config.h > @@ -33,6 +33,9 @@ > #ifndef HWCAP_USCAT > # define HWCAP_USCAT (1 << 25) > #endif > +#ifndef HWCAP2_LRCPC3 > +# define HWCAP2_LRCPC3 (1UL << 46) > +#endif > #ifndef HWCAP2_LSE128 > # define HWCAP2_LSE128 (1UL << 47) > #endif > @@ -54,7 +57,7 @@ typedef struct __ifunc_arg_t { > #if defined (LAT_CAS_N) > # define LSE_ATOP > #elif defined (LAT_LOAD_N) || defined (LAT_STORE_N) > -# define LSE2_ATOP > +# define LSE2_LRCPC3_ATOP > #elif defined (LAT_EXCH_N) || defined (LAT_FIOR_N) || defined (LAT_FAND_N) > # define LSE128_ATOP > #endif > @@ -63,9 +66,10 @@ typedef struct __ifunc_arg_t { > # if defined (LSE_ATOP) > # define IFUNC_NCOND(N) 1 > # define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) > -# elif defined (LSE2_ATOP) > -# define IFUNC_NCOND(N) 1 > -# define IFUNC_COND_1 (has_lse2 (hwcap, features)) > +# elif defined (LSE2_LRCPC3_ATOP) > +# define IFUNC_NCOND(N) 2 > +# define IFUNC_COND_1 (has_rcpc3 (hwcap, features)) > +# define IFUNC_COND_2 (has_lse2 (hwcap, features)) > # elif defined (LSE128_ATOP) > # define IFUNC_NCOND(N) 1 > # define IFUNC_COND_1 (has_lse128 (hwcap, features)) > @@ -131,6 +135,28 @@ has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features) > return false; > } > > +/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20]. The > + expected value is 0b0011. Check that. */ > + > +static inline bool > +has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features) > +{ > + if (hwcap & _IFUNC_ARG_HWCAP > + && features->_hwcap2 & HWCAP2_LRCPC3) > + return true; > + /* Try fallback feature check method to guarantee LRCPC3 is not implemented. > + > + In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return. > + If feature check available, check LSE2 prerequisite before proceeding. */ It seems unfortunate that one of the things we do as part of this function is check for the presence of LSE2, which is also what the second ifunc does. It might be clearer to have a single resolver that selects the appropriate routine number. But that would be another change to the target-independent code, and you've already had to do one of those as part of this series. I also can't be 100% sure that having a single resolver would make things clearer. So I agree the current approach is ok for now. > + if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT)) Nit: should be one fewer space before "||". OK with that change, thanks. Richard > + return false; > + unsigned long isar1; > + asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1)); > + if (AT_FEAT_FIELD (isar1) >= 3) > + return true; > + return false; > +} > + > #endif /* HAVE_IFUNC */ > > /* All 128-bit atomic functions are defined in aarch64/atomic_16.S. */
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S index c44c31c6418..5767fba5c03 100644 --- a/libatomic/config/linux/aarch64/atomic_16.S +++ b/libatomic/config/linux/aarch64/atomic_16.S @@ -35,16 +35,21 @@ writes, this will be true when using atomics in actual code. The libat_<op>_16 entry points are ARMv8.0. - The libat_<op>_16_i1 entry points are used when LSE128 is available. + The libat_<op>_16_i1 entry points are used when LSE128 or LRCPC3 is available. The libat_<op>_16_i2 entry points are used when LSE2 is available. */ #include "auto-config.h" .arch armv8-a+lse +/* There is overlap in atomic instructions implemented in RCPC3 and LSE2. + Consequently, both _i1 and _i2 suffixes are needed for functions using these. + Elsewhere, all extension-specific implementations are mapped to _i1. */ + +#define LRCPC3(NAME) libat_##NAME##_i1 #define LSE128(NAME) libat_##NAME##_i1 #define LSE(NAME) libat_##NAME##_i1 -#define LSE2(NAME) libat_##NAME##_i1 +#define LSE2(NAME) libat_##NAME##_i2 #define CORE(NAME) libat_##NAME #define ATOMIC(NAME) __atomic_##NAME @@ -513,6 +518,43 @@ END (test_and_set_16) /* ifunc implementations: Carries run-time dependence on the presence of further architectural extensions. */ +ENTRY_FEAT (load_16, LRCPC3) + cbnz w1, 1f + + /* RELAXED. */ + ldp res0, res1, [x0] + ret +1: + cmp w1, SEQ_CST + b.eq 2f + + /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */ + /* ldiapp res0, res1, [x0] */ + .inst 0xd9411800 + ret + + /* SEQ_CST. */ +2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */ + /* ldiapp res0, res1, [x0] */ + .inst 0xd9411800 + ret +END_FEAT (load_16, LRCPC3) + + +ENTRY_FEAT (store_16, LRCPC3) + cbnz w4, 1f + + /* RELAXED. */ + stp in0, in1, [x0] + ret + + /* RELEASE/SEQ_CST. */ +1: /* stilp in0, in1, [x0] */ + .inst 0xd9031802 + ret +END_FEAT (store_16, LRCPC3) + + ENTRY_FEAT (exchange_16, LSE128) mov tmp0, x0 mov res0, in0 diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h index d05e9eb628f..8adf0563001 100644 --- a/libatomic/config/linux/aarch64/host-config.h +++ b/libatomic/config/linux/aarch64/host-config.h @@ -33,6 +33,9 @@ #ifndef HWCAP_USCAT # define HWCAP_USCAT (1 << 25) #endif +#ifndef HWCAP2_LRCPC3 +# define HWCAP2_LRCPC3 (1UL << 46) +#endif #ifndef HWCAP2_LSE128 # define HWCAP2_LSE128 (1UL << 47) #endif @@ -54,7 +57,7 @@ typedef struct __ifunc_arg_t { #if defined (LAT_CAS_N) # define LSE_ATOP #elif defined (LAT_LOAD_N) || defined (LAT_STORE_N) -# define LSE2_ATOP +# define LSE2_LRCPC3_ATOP #elif defined (LAT_EXCH_N) || defined (LAT_FIOR_N) || defined (LAT_FAND_N) # define LSE128_ATOP #endif @@ -63,9 +66,10 @@ typedef struct __ifunc_arg_t { # if defined (LSE_ATOP) # define IFUNC_NCOND(N) 1 # define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) -# elif defined (LSE2_ATOP) -# define IFUNC_NCOND(N) 1 -# define IFUNC_COND_1 (has_lse2 (hwcap, features)) +# elif defined (LSE2_LRCPC3_ATOP) +# define IFUNC_NCOND(N) 2 +# define IFUNC_COND_1 (has_rcpc3 (hwcap, features)) +# define IFUNC_COND_2 (has_lse2 (hwcap, features)) # elif defined (LSE128_ATOP) # define IFUNC_NCOND(N) 1 # define IFUNC_COND_1 (has_lse128 (hwcap, features)) @@ -131,6 +135,28 @@ has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features) return false; } +/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20]. The + expected value is 0b0011. Check that. */ + +static inline bool +has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features) +{ + if (hwcap & _IFUNC_ARG_HWCAP + && features->_hwcap2 & HWCAP2_LRCPC3) + return true; + /* Try fallback feature check method to guarantee LRCPC3 is not implemented. + + In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return. + If feature check available, check LSE2 prerequisite before proceeding. */ + if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT)) + return false; + unsigned long isar1; + asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1)); + if (AT_FEAT_FIELD (isar1) >= 3) + return true; + return false; +} + #endif /* HAVE_IFUNC */ /* All 128-bit atomic functions are defined in aarch64/atomic_16.S. */