Message ID | 20220728063120.2867508-5-npiggin@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | powerpc: alternate queued spinlock implementation | expand |
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: > The first 16 bits of the lock are only modified by the owner, and other > modifications always use atomic operations on the entire 32 bits, so > unlocks can use plain stores on the 16 bits. This is the same kind of > optimisation done by core qspinlock code. > --- > arch/powerpc/include/asm/qspinlock.h | 6 +----- > arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++-- > 2 files changed, 18 insertions(+), 7 deletions(-) > > diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h > index f06117aa60e1..79a1936fb68d 100644 > --- a/arch/powerpc/include/asm/qspinlock.h > +++ b/arch/powerpc/include/asm/qspinlock.h > @@ -38,11 +38,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) > > static inline void queued_spin_unlock(struct qspinlock *lock) > { > - for (;;) { > - int val = atomic_read(&lock->val); > - if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val) > - return; > - } > + smp_store_release(&lock->locked, 0); Is it also possible for lock_set_locked() to use a non-atomic acquire operation? > } > > #define arch_spin_is_locked(l) queued_spin_is_locked(l) > diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h > index 9630e714c70d..3425dab42576 100644 > --- a/arch/powerpc/include/asm/qspinlock_types.h > +++ b/arch/powerpc/include/asm/qspinlock_types.h > @@ -3,12 +3,27 @@ > #define _ASM_POWERPC_QSPINLOCK_TYPES_H > > #include <linux/types.h> > +#include <asm/byteorder.h> > > typedef struct qspinlock { > - atomic_t val; > + union { > + atomic_t val; > + > +#ifdef __LITTLE_ENDIAN > + struct { > + u16 locked; > + u8 reserved[2]; > + }; > +#else > + struct { > + u8 reserved[2]; > + u16 locked; > + }; > +#endif > + }; > } arch_spinlock_t; Just to double check we have: #define _Q_LOCKED_OFFSET 0 #define _Q_LOCKED_BITS 1 #define _Q_LOCKED_MASK 0x00000001 #define _Q_LOCKED_VAL 1 #define _Q_TAIL_CPU_OFFSET 16 #define _Q_TAIL_CPU_BITS 16 #define _Q_TAIL_CPU_MASK 0xffff0000 so the ordering here looks correct. > > -#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } > +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } > > /* > * Bitfields in the atomic value:
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: [resend as utf-8, not utf-7] > The first 16 bits of the lock are only modified by the owner, and other > modifications always use atomic operations on the entire 32 bits, so > unlocks can use plain stores on the 16 bits. This is the same kind of > optimisation done by core qspinlock code. > --- > arch/powerpc/include/asm/qspinlock.h | 6 +----- > arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++-- > 2 files changed, 18 insertions(+), 7 deletions(-) > > diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h > index f06117aa60e1..79a1936fb68d 100644 > --- a/arch/powerpc/include/asm/qspinlock.h > +++ b/arch/powerpc/include/asm/qspinlock.h > @@ -38,11 +38,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) > > static inline void queued_spin_unlock(struct qspinlock *lock) > { > - for (;;) { > - int val = atomic_read(&lock->val); > - if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val) > - return; > - } > + smp_store_release(&lock->locked, 0); Is it also possible for lock_set_locked() to use a non-atomic acquire operation? > } > > #define arch_spin_is_locked(l) queued_spin_is_locked(l) > diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h > index 9630e714c70d..3425dab42576 100644 > --- a/arch/powerpc/include/asm/qspinlock_types.h > +++ b/arch/powerpc/include/asm/qspinlock_types.h > @@ -3,12 +3,27 @@ > #define _ASM_POWERPC_QSPINLOCK_TYPES_H > > #include <linux/types.h> > +#include <asm/byteorder.h> > > typedef struct qspinlock { > - atomic_t val; > + union { > + atomic_t val; > + > +#ifdef __LITTLE_ENDIAN > + struct { > + u16 locked; > + u8 reserved[2]; > + }; > +#else > + struct { > + u8 reserved[2]; > + u16 locked; > + }; > +#endif > + }; > } arch_spinlock_t; Just to double check we have: #define _Q_LOCKED_OFFSET 0 #define _Q_LOCKED_BITS 1 #define _Q_LOCKED_MASK 0x00000001 #define _Q_LOCKED_VAL 1 #define _Q_TAIL_CPU_OFFSET 16 #define _Q_TAIL_CPU_BITS 16 #define _Q_TAIL_CPU_MASK 0xffff0000 so the ordering here looks correct. > > -#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } > +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } > > /* > * Bitfields in the atomic value:
On Thu Nov 10, 2022 at 10:39 AM AEST, Jordan Niethe wrote: > On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: > [resend as utf-8, not utf-7] > > The first 16 bits of the lock are only modified by the owner, and other > > modifications always use atomic operations on the entire 32 bits, so > > unlocks can use plain stores on the 16 bits. This is the same kind of > > optimisation done by core qspinlock code. > > --- > > arch/powerpc/include/asm/qspinlock.h | 6 +----- > > arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++-- > > 2 files changed, 18 insertions(+), 7 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h > > index f06117aa60e1..79a1936fb68d 100644 > > --- a/arch/powerpc/include/asm/qspinlock.h > > +++ b/arch/powerpc/include/asm/qspinlock.h > > @@ -38,11 +38,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) > > > > static inline void queued_spin_unlock(struct qspinlock *lock) > > { > > - for (;;) { > > - int val = atomic_read(&lock->val); > > - if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val) > > - return; > > - } > > + smp_store_release(&lock->locked, 0); > > Is it also possible for lock_set_locked() to use a non-atomic acquire > operation? It has to be atomic as mentioned earlier. Thanks, Nick
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index f06117aa60e1..79a1936fb68d 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -38,11 +38,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static inline void queued_spin_unlock(struct qspinlock *lock) { - for (;;) { - int val = atomic_read(&lock->val); - if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val) - return; - } + smp_store_release(&lock->locked, 0); } #define arch_spin_is_locked(l) queued_spin_is_locked(l) diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index 9630e714c70d..3425dab42576 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -3,12 +3,27 @@ #define _ASM_POWERPC_QSPINLOCK_TYPES_H #include <linux/types.h> +#include <asm/byteorder.h> typedef struct qspinlock { - atomic_t val; + union { + atomic_t val; + +#ifdef __LITTLE_ENDIAN + struct { + u16 locked; + u8 reserved[2]; + }; +#else + struct { + u8 reserved[2]; + u16 locked; + }; +#endif + }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } /* * Bitfields in the atomic value: