Message ID | 20220728063120.2867508-9-npiggin@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | powerpc: alternate queued spinlock implementation | expand |
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: > Store the owner CPU number in the lock word so it may be yielded to, > as powerpc's paravirtualised simple spinlocks do. > --- > arch/powerpc/include/asm/qspinlock.h | 8 +++++++- > arch/powerpc/include/asm/qspinlock_types.h | 10 ++++++++++ > arch/powerpc/lib/qspinlock.c | 6 +++--- > 3 files changed, 20 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h > index 3ab354159e5e..44601b261e08 100644 > --- a/arch/powerpc/include/asm/qspinlock.h > +++ b/arch/powerpc/include/asm/qspinlock.h > @@ -20,9 +20,15 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock) > return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); > } > > +static __always_inline u32 queued_spin_get_locked_val(void) Maybe this function should have "encode" in the name to match with encode_tail_cpu(). > +{ > + /* XXX: make this use lock value in paca like simple spinlocks? */ Is that the paca's lock_token which is 0x8000? > + return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); > +} > + > static __always_inline int queued_spin_trylock(struct qspinlock *lock) > { > - u32 new = _Q_LOCKED_VAL; > + u32 new = queued_spin_get_locked_val(); > u32 prev; > > asm volatile( > diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h > index 8b20f5e22bba..35f9525381e6 100644 > --- a/arch/powerpc/include/asm/qspinlock_types.h > +++ b/arch/powerpc/include/asm/qspinlock_types.h > @@ -29,6 +29,8 @@ typedef struct qspinlock { > * Bitfields in the lock word: > * > * 0: locked bit > + * 1-14: lock holder cpu > + * 15: unused bit > * 16: must queue bit > * 17-31: tail cpu (+1) So there is one more bit to store the tail cpu vs the lock holder cpu? > */ > @@ -39,6 +41,14 @@ typedef struct qspinlock { > #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) > #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) > > +#define _Q_OWNER_CPU_OFFSET 1 > +#define _Q_OWNER_CPU_BITS 14 > +#define _Q_OWNER_CPU_MASK _Q_SET_MASK(OWNER_CPU) > + > +#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS) > +#error "qspinlock does not support such large CONFIG_NR_CPUS" > +#endif > + > #define _Q_MUST_Q_OFFSET 16 > #define _Q_MUST_Q_BITS 1 > #define _Q_MUST_Q_MASK _Q_SET_MASK(MUST_Q) > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > index a906cc8f15fa..aa26cfe21f18 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -50,7 +50,7 @@ static inline int get_tail_cpu(u32 val) > /* Take the lock by setting the lock bit, no other CPUs will touch it. */ > static __always_inline void lock_set_locked(struct qspinlock *lock) > { > - u32 new = _Q_LOCKED_VAL; > + u32 new = queued_spin_get_locked_val(); > u32 prev; > > asm volatile( > @@ -68,7 +68,7 @@ static __always_inline void lock_set_locked(struct qspinlock *lock) > /* Take lock, clearing tail, cmpxchg with old (which must not be locked) */ > static __always_inline int trylock_clear_tail_cpu(struct qspinlock *lock, u32 old) > { > - u32 new = _Q_LOCKED_VAL; > + u32 new = queued_spin_get_locked_val(); > u32 prev; > > BUG_ON(old & _Q_LOCKED_VAL); > @@ -116,7 +116,7 @@ static __always_inline u32 __trylock_cmpxchg(struct qspinlock *lock, u32 old, u3 > /* Take lock, preserving tail, cmpxchg with val (which must not be locked) */ > static __always_inline int trylock_with_tail_cpu(struct qspinlock *lock, u32 val) > { > - u32 newval = _Q_LOCKED_VAL | (val & _Q_TAIL_CPU_MASK); > + u32 newval = queued_spin_get_locked_val() | (val & _Q_TAIL_CPU_MASK); > > if (__trylock_cmpxchg(lock, val, newval) == val) > return 1;
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: [resend as utf-8, not utf-7] > Store the owner CPU number in the lock word so it may be yielded to, > as powerpc's paravirtualised simple spinlocks do. > --- > arch/powerpc/include/asm/qspinlock.h | 8 +++++++- > arch/powerpc/include/asm/qspinlock_types.h | 10 ++++++++++ > arch/powerpc/lib/qspinlock.c | 6 +++--- > 3 files changed, 20 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h > index 3ab354159e5e..44601b261e08 100644 > --- a/arch/powerpc/include/asm/qspinlock.h > +++ b/arch/powerpc/include/asm/qspinlock.h > @@ -20,9 +20,15 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock) > return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); > } > > +static __always_inline u32 queued_spin_get_locked_val(void) Maybe this function should have "encode" in the name to match with encode_tail_cpu(). > +{ > + /* XXX: make this use lock value in paca like simple spinlocks? */ Is that the paca's lock_token which is 0x8000? > + return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); > +} > + > static __always_inline int queued_spin_trylock(struct qspinlock *lock) > { > - u32 new = _Q_LOCKED_VAL; > + u32 new = queued_spin_get_locked_val(); > u32 prev; > > asm volatile( > diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h > index 8b20f5e22bba..35f9525381e6 100644 > --- a/arch/powerpc/include/asm/qspinlock_types.h > +++ b/arch/powerpc/include/asm/qspinlock_types.h > @@ -29,6 +29,8 @@ typedef struct qspinlock { > * Bitfields in the lock word: > * > * 0: locked bit > + * 1-14: lock holder cpu > + * 15: unused bit > * 16: must queue bit > * 17-31: tail cpu (+1) So there is one more bit to store the tail cpu vs the lock holder cpu? > */ > @@ -39,6 +41,14 @@ typedef struct qspinlock { > #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) > #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) > > +#define _Q_OWNER_CPU_OFFSET 1 > +#define _Q_OWNER_CPU_BITS 14 > +#define _Q_OWNER_CPU_MASK _Q_SET_MASK(OWNER_CPU) > + > +#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS) > +#error "qspinlock does not support such large CONFIG_NR_CPUS" > +#endif > + > #define _Q_MUST_Q_OFFSET 16 > #define _Q_MUST_Q_BITS 1 > #define _Q_MUST_Q_MASK _Q_SET_MASK(MUST_Q) > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > index a906cc8f15fa..aa26cfe21f18 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -50,7 +50,7 @@ static inline int get_tail_cpu(u32 val) > /* Take the lock by setting the lock bit, no other CPUs will touch it. */ > static __always_inline void lock_set_locked(struct qspinlock *lock) > { > - u32 new = _Q_LOCKED_VAL; > + u32 new = queued_spin_get_locked_val(); > u32 prev; > > asm volatile( > @@ -68,7 +68,7 @@ static __always_inline void lock_set_locked(struct qspinlock *lock) > /* Take lock, clearing tail, cmpxchg with old (which must not be locked) */ > static __always_inline int trylock_clear_tail_cpu(struct qspinlock *lock, u32 old) > { > - u32 new = _Q_LOCKED_VAL; > + u32 new = queued_spin_get_locked_val(); > u32 prev; > > BUG_ON(old & _Q_LOCKED_VAL); > @@ -116,7 +116,7 @@ static __always_inline u32 __trylock_cmpxchg(struct qspinlock *lock, u32 old, u3 > /* Take lock, preserving tail, cmpxchg with val (which must not be locked) */ > static __always_inline int trylock_with_tail_cpu(struct qspinlock *lock, u32 val) > { > - u32 newval = _Q_LOCKED_VAL | (val & _Q_TAIL_CPU_MASK); > + u32 newval = queued_spin_get_locked_val() | (val & _Q_TAIL_CPU_MASK); > > if (__trylock_cmpxchg(lock, val, newval) == val) > return 1;
On Thu Nov 10, 2022 at 10:40 AM AEST, Jordan Niethe wrote: > On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: > [resend as utf-8, not utf-7] > > Store the owner CPU number in the lock word so it may be yielded to, > > as powerpc's paravirtualised simple spinlocks do. > > --- > > arch/powerpc/include/asm/qspinlock.h | 8 +++++++- > > arch/powerpc/include/asm/qspinlock_types.h | 10 ++++++++++ > > arch/powerpc/lib/qspinlock.c | 6 +++--- > > 3 files changed, 20 insertions(+), 4 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h > > index 3ab354159e5e..44601b261e08 100644 > > --- a/arch/powerpc/include/asm/qspinlock.h > > +++ b/arch/powerpc/include/asm/qspinlock.h > > @@ -20,9 +20,15 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock) > > return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); > > } > > > > +static __always_inline u32 queued_spin_get_locked_val(void) > > Maybe this function should have "encode" in the name to match with > encode_tail_cpu(). Yep. > > +{ > > + /* XXX: make this use lock value in paca like simple spinlocks? */ > > Is that the paca's lock_token which is 0x8000? Yes, which AFAIKS is actually unused now with queued spinlocks. > > + return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); > > +} > > + > > static __always_inline int queued_spin_trylock(struct qspinlock *lock) > > { > > - u32 new = _Q_LOCKED_VAL; > > + u32 new = queued_spin_get_locked_val(); > > u32 prev; > > > > asm volatile( > > diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h > > index 8b20f5e22bba..35f9525381e6 100644 > > --- a/arch/powerpc/include/asm/qspinlock_types.h > > +++ b/arch/powerpc/include/asm/qspinlock_types.h > > @@ -29,6 +29,8 @@ typedef struct qspinlock { > > * Bitfields in the lock word: > > * > > * 0: locked bit > > + * 1-14: lock holder cpu > > + * 15: unused bit > > * 16: must queue bit > > * 17-31: tail cpu (+1) > > So there is one more bit to store the tail cpu vs the lock holder cpu? Yeah but the tail has to encode it as CPU+1. Thanks, Nick
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 3ab354159e5e..44601b261e08 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -20,9 +20,15 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock) return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); } +static __always_inline u32 queued_spin_get_locked_val(void) +{ + /* XXX: make this use lock value in paca like simple spinlocks? */ + return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); +} + static __always_inline int queued_spin_trylock(struct qspinlock *lock) { - u32 new = _Q_LOCKED_VAL; + u32 new = queued_spin_get_locked_val(); u32 prev; asm volatile( diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index 8b20f5e22bba..35f9525381e6 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -29,6 +29,8 @@ typedef struct qspinlock { * Bitfields in the lock word: * * 0: locked bit + * 1-14: lock holder cpu + * 15: unused bit * 16: must queue bit * 17-31: tail cpu (+1) */ @@ -39,6 +41,14 @@ typedef struct qspinlock { #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +#define _Q_OWNER_CPU_OFFSET 1 +#define _Q_OWNER_CPU_BITS 14 +#define _Q_OWNER_CPU_MASK _Q_SET_MASK(OWNER_CPU) + +#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS) +#error "qspinlock does not support such large CONFIG_NR_CPUS" +#endif + #define _Q_MUST_Q_OFFSET 16 #define _Q_MUST_Q_BITS 1 #define _Q_MUST_Q_MASK _Q_SET_MASK(MUST_Q) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index a906cc8f15fa..aa26cfe21f18 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -50,7 +50,7 @@ static inline int get_tail_cpu(u32 val) /* Take the lock by setting the lock bit, no other CPUs will touch it. */ static __always_inline void lock_set_locked(struct qspinlock *lock) { - u32 new = _Q_LOCKED_VAL; + u32 new = queued_spin_get_locked_val(); u32 prev; asm volatile( @@ -68,7 +68,7 @@ static __always_inline void lock_set_locked(struct qspinlock *lock) /* Take lock, clearing tail, cmpxchg with old (which must not be locked) */ static __always_inline int trylock_clear_tail_cpu(struct qspinlock *lock, u32 old) { - u32 new = _Q_LOCKED_VAL; + u32 new = queued_spin_get_locked_val(); u32 prev; BUG_ON(old & _Q_LOCKED_VAL); @@ -116,7 +116,7 @@ static __always_inline u32 __trylock_cmpxchg(struct qspinlock *lock, u32 old, u3 /* Take lock, preserving tail, cmpxchg with val (which must not be locked) */ static __always_inline int trylock_with_tail_cpu(struct qspinlock *lock, u32 val) { - u32 newval = _Q_LOCKED_VAL | (val & _Q_TAIL_CPU_MASK); + u32 newval = queued_spin_get_locked_val() | (val & _Q_TAIL_CPU_MASK); if (__trylock_cmpxchg(lock, val, newval) == val) return 1;