@@ -5,6 +5,15 @@
#include <linux/compiler.h>
#include <asm/qspinlock_types.h>
+/*
+ * The trylock itself may steal. This makes trylocks slightly stronger, and
+ * might make spin locks slightly more efficient when stealing.
+ *
+ * This is compile-time, so if true then there may always be stealers, so the
+ * nosteal paths become unused.
+ */
+#define _Q_SPIN_TRY_LOCK_STEAL 1
+
static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
{
return READ_ONCE(lock->val);
@@ -26,11 +35,12 @@ static __always_inline u32 queued_spin_encode_locked_val(void)
return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
}
-static __always_inline int queued_spin_trylock(struct qspinlock *lock)
+static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock)
{
u32 new = queued_spin_encode_locked_val();
u32 prev;
+ /* Trylock succeeds only when unlocked and no queued nodes */
asm volatile(
"1: lwarx %0,0,%1,%3 # queued_spin_trylock \n"
" cmpwi 0,%0,0 \n"
@@ -47,6 +57,38 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock)
return likely(prev == 0);
}
+static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
+{
+ u32 new = queued_spin_encode_locked_val();
+ u32 prev, tmp;
+
+ /* Trylock may get ahead of queued nodes if it finds unlocked */
+ asm volatile(
+"1: lwarx %0,0,%2,%5 # queued_spin_trylock \n"
+" andc. %1,%0,%4 \n"
+" bne- 2f \n"
+" and %1,%0,%4 \n"
+" or %1,%1,%3 \n"
+" stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"2: \n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK),
+ "i" (IS_ENABLED(CONFIG_PPC64))
+ : "cr0", "memory");
+
+ return likely(!(prev & ~_Q_TAIL_CPU_MASK));
+}
+
+static __always_inline int queued_spin_trylock(struct qspinlock *lock)
+{
+ if (!_Q_SPIN_TRY_LOCK_STEAL)
+ return __queued_spin_trylock_nosteal(lock);
+ else
+ return __queued_spin_trylock_steal(lock);
+}
+
void queued_spin_lock_slowpath(struct qspinlock *lock);
static __always_inline void queued_spin_lock(struct qspinlock *lock)
@@ -24,7 +24,11 @@ struct qnodes {
/* Tuning parameters */
static int steal_spins __read_mostly = (1<<5);
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+static const bool maybe_stealers = true;
+#else
static bool maybe_stealers __read_mostly = true;
+#endif
static int head_spins __read_mostly = (1<<8);
static bool pv_yield_owner __read_mostly = true;
@@ -527,6 +531,10 @@ void pv_spinlocks_init(void)
#include <linux/debugfs.h>
static int steal_spins_set(void *data, u64 val)
{
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+ /* MAYBE_STEAL remains true */
+ steal_spins = val;
+#else
static DEFINE_MUTEX(lock);
/*
@@ -551,6 +559,7 @@ static int steal_spins_set(void *data, u64 val)
steal_spins = val;
}
mutex_unlock(&lock);
+#endif
return 0;
}
This gives trylock slightly more strength, and it also gives most of the benefit of passing 'val' back through the slowpath without the complexity. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- arch/powerpc/include/asm/qspinlock.h | 44 +++++++++++++++++++++++++++- arch/powerpc/lib/qspinlock.c | 9 ++++++ 2 files changed, 52 insertions(+), 1 deletion(-)