new file mode 100644
@@ -0,0 +1,62 @@
+/* Misc low level processor primitives */
+#ifndef _LINUX_PROCESSOR_H
+#define _LINUX_PROCESSOR_H
+
+#include <asm/processor.h>
+
+/*
+ * spin_begin is used before beginning a busy-wait loop, and must be paired
+ * with spin_end when the loop is exited. spin_cpu_relax must be called
+ * within the loop.
+ *
+ * The loop body should be as small and fast as possible, on the order of
+ * tens of instructions/cycles as a guide. It should and avoid calling
+ * cpu_relax, or any "spin" or sleep type of primitive including nested uses
+ * of these primitives. It should not lock or take any other resource.
+ * Violations of these guidelies will not cause a bug, but may cause sub
+ * optimal performance.
+ *
+ * These loops are optimized to be used where wait times are expected to be
+ * less than the cost of a context switch (and associated overhead).
+ *
+ * Detection of resource owner and decision to spin or sleep or guest-yield
+ * (e.g., spin lock holder vcpu preempted, or mutex owner not on CPU) can be
+ * tested within the loop body.
+ */
+#ifndef spin_begin
+#define spin_begin()
+#endif
+
+#ifndef spin_cpu_relax
+#define spin_cpu_relax() cpu_relax()
+#endif
+
+/*
+ * spin_cpu_yield may be called to yield (undirected) to the hypervisor if
+ * necessary. This should be used if the wait is expected to take longer
+ * than context switch overhead, but we can't sleep or do a directed yield.
+ */
+#ifndef spin_cpu_yield
+#define spin_cpu_yield() cpu_relax_yield()
+#endif
+
+#ifndef spin_end
+#define spin_end()
+#endif
+
+/*
+ * spin_until_cond_likely can be used to wait for a condition to become true. It
+ * may be expected that the first iteration will true in the common case
+ * (no spinning).
+ */
+#ifndef spin_until_cond_likely
+#define spin_until_cond_likely(cond) \
+do { \
+ spin_begin(); \
+ while (unlikely(cond)) \
+ spin_cpu_relax(); \
+ spin_end(); \
+} while (0)
+#endif
+
+#endif /* _LINUX_PROCESSOR_H */
@@ -398,6 +398,33 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
#ifdef CONFIG_PPC64
#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0)
+
+#ifndef spin_begin
+#define spin_begin() HMT_low()
+#endif
+
+#ifndef spin_cpu_relax
+#define spin_cpu_relax() barrier()
+#endif
+
+#ifndef spin_cpu_yield
+#define spin_cpu_yield()
+#endif
+
+#ifndef spin_end
+#define spin_end() HMT_medium()
+#endif
+
+#define spin_until_cond_likely(cond) \
+do { \
+ if (unlikely(cond)) { \
+ spin_begin(); \
+ while (cond) \
+ spin_cpu_relax(); \
+ spin_end(); \
+ } \
+} while (0)
+
#else
#define cpu_relax() barrier()
#endif
@@ -442,6 +442,7 @@ void __delay(unsigned long loops)
unsigned long start;
int diff;
+ spin_begin();
if (__USE_RTC()) {
start = get_rtcl();
do {
@@ -449,13 +450,14 @@ void __delay(unsigned long loops)
diff = get_rtcl() - start;
if (diff < 0)
diff += 1000000000;
+ spin_cpu_relax();
} while (diff < loops);
} else {
start = get_tbl();
while (get_tbl() - start < loops)
- HMT_low();
- HMT_medium();
+ spin_cpu_relax();
}
+ spin_end();
}
EXPORT_SYMBOL(__delay);
@@ -181,8 +181,10 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
+ spin_begin();
while(test_bit(HPTE_LOCK_BIT, word))
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
}
}
@@ -25,9 +25,11 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr)
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
preempt_enable();
+ spin_begin();
do {
- cpu_relax();
+ spin_cpu_relax();
} while (test_bit(bitnum, addr));
+ spin_end();
preempt_disable();
}
#endif
@@ -80,8 +80,7 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi)
if (dev && dev->npinfo) {
int owner = smp_processor_id();
- while (cmpxchg(&napi->poll_owner, -1, owner) != -1)
- cpu_relax();
+ spin_until_cond_likely(cmpxchg(&napi->poll_owner, -1, owner) == -1);
return napi;
}
@@ -32,6 +32,7 @@
* by Keith Owens and Andrea Arcangeli
*/
+#include <linux/processor.h>
#include <linux/spinlock.h>
#include <linux/preempt.h>
#include <linux/lockdep.h>
@@ -108,12 +109,8 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
{
unsigned ret;
-repeat:
- ret = READ_ONCE(s->sequence);
- if (unlikely(ret & 1)) {
- cpu_relax();
- goto repeat;
- }
+ spin_until_cond_likely(!((ret = READ_ONCE(s->sequence)) & 1));
+
return ret;
}
@@ -27,8 +27,10 @@ struct mcs_spinlock {
*/
#define arch_mcs_spin_lock_contended(l) \
do { \
+ spin_begin(); \
while (!(smp_load_acquire(l))) \
- cpu_relax(); \
+ spin_cpu_relax(); \
+ spin_end(); \
} while (0)
#endif
@@ -107,8 +109,10 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
if (likely(cmpxchg_release(lock, node, NULL) == node))
return;
/* Wait until the next pointer is set */
+ spin_begin();
while (!(next = READ_ONCE(node->next)))
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
}
/* Pass lock to next waiter. */
@@ -427,6 +427,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
bool ret = true;
rcu_read_lock();
+ spin_begin();
while (__mutex_owner(lock) == owner) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
@@ -450,8 +451,9 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
break;
}
- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
rcu_read_unlock();
return ret;
@@ -532,20 +534,25 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
goto fail;
}
+ spin_begin();
for (;;) {
struct task_struct *owner;
/* Try to acquire the mutex... */
owner = __mutex_trylock_or_owner(lock);
- if (!owner)
+ if (!owner) {
+ spin_end();
break;
+ }
/*
* There's an owner, wait for it to either
* release the lock or go to sleep.
*/
- if (!mutex_spin_on_owner(lock, owner, ww_ctx, waiter))
+ if (!mutex_spin_on_owner(lock, owner, ww_ctx, waiter)) {
+ spin_end();
goto fail_unlock;
+ }
/*
* The cpu_relax() call is a compiler barrier which forces
@@ -553,7 +560,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
- cpu_relax();
+ spin_cpu_relax();
}
if (!waiter)
@@ -53,6 +53,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
*/
old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
+ spin_begin();
for (;;) {
if (atomic_read(&lock->tail) == curr &&
atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
@@ -80,8 +81,9 @@ osq_wait_next(struct optimistic_spin_queue *lock,
break;
}
- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
return next;
}
@@ -53,10 +53,12 @@ struct __qrwlock {
static __always_inline void
rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
{
+ spin_begin();
while ((cnts & _QW_WMASK) == _QW_LOCKED) {
- cpu_relax();
+ spin_cpu_relax();
cnts = atomic_read_acquire(&lock->cnts);
}
+ spin_end();
}
/**
@@ -123,6 +125,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
* Set the waiting flag to notify readers that a writer is pending,
* or wait for a previous writer to go away.
*/
+ spin_begin();
for (;;) {
struct __qrwlock *l = (struct __qrwlock *)lock;
@@ -130,7 +133,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
(cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
break;
- cpu_relax();
+ spin_cpu_relax();
}
/* When no more readers, set the locked flag */
@@ -141,8 +144,10 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
_QW_LOCKED) == _QW_WAITING))
break;
- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
+
unlock:
arch_spin_unlock(&lock->wait_lock);
}
@@ -361,6 +361,7 @@ void queued_spin_unlock_wait(struct qspinlock *lock)
{
u32 val;
+ spin_begin();
for (;;) {
val = atomic_read(&lock->val);
@@ -371,14 +372,15 @@ void queued_spin_unlock_wait(struct qspinlock *lock)
break;
/* not locked, but pending, wait until we observe the lock */
- cpu_relax();
+ spin_cpu_relax();
}
/* any unlock is good */
while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
- cpu_relax();
+ spin_cpu_relax();
done:
+ spin_end();
smp_acquire__after_ctrl_dep();
}
EXPORT_SYMBOL(queued_spin_unlock_wait);
@@ -427,8 +429,10 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* 0,1,0 -> 0,0,1
*/
if (val == _Q_PENDING_VAL) {
+ spin_begin();
while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
}
/*
@@ -608,8 +612,10 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* contended path; wait for next if not observed yet, release.
*/
if (!next) {
+ spin_begin();
while (!(next = READ_ONCE(node->next)))
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
}
arch_mcs_spin_unlock_contended(&next->locked);
@@ -292,15 +292,19 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
bool wait_early;
for (;;) {
+ spin_begin();
for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
- if (READ_ONCE(node->locked))
+ if (READ_ONCE(node->locked)) {
+ spin_end();
return;
+ }
if (pv_wait_early(pp, loop)) {
wait_early = true;
break;
}
- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
/*
* Order pn->state vs pn->locked thusly:
@@ -416,11 +420,15 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
* disable lock stealing before attempting to acquire the lock.
*/
set_pending(lock);
+ spin_begin();
for (loop = SPIN_THRESHOLD; loop; loop--) {
- if (trylock_clear_pending(lock))
+ if (trylock_clear_pending(lock)) {
+ spin_end();
goto gotlock;
- cpu_relax();
+ }
+ spin_cpu_relax();
}
+ spin_end();
clear_pending(lock);
@@ -358,6 +358,7 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
goto out;
rcu_read_lock();
+ spin_begin();
while (sem->owner == owner) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
@@ -373,12 +374,14 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
*/
if (!owner->on_cpu || need_resched() ||
vcpu_is_preempted(task_cpu(owner))) {
+ spin_end();
rcu_read_unlock();
return false;
}
- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
rcu_read_unlock();
out:
/*
@@ -408,6 +411,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
* 2) readers own the lock as we can't determine if they are
* actively running or not.
*/
+ spin_begin();
while (rwsem_spin_on_owner(sem)) {
/*
* Try to acquire the lock
@@ -432,8 +436,9 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
- cpu_relax();
+ spin_cpu_relax();
}
+ spin_end();
osq_unlock(&sem->osq);
done:
preempt_enable();
@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <linux/stackprotector.h>
#include <linux/suspend.h>
+#include <linux/processor.h>
#include <asm/tlb.h>
@@ -63,9 +64,13 @@ static noinline int __cpuidle cpu_idle_poll(void)
trace_cpu_idle_rcuidle(0, smp_processor_id());
local_irq_enable();
stop_critical_timings();
+
+ spin_begin();
while (!tif_need_resched() &&
(cpu_idle_force_poll || tick_check_broadcast_expired()))
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
+
start_critical_timings();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
rcu_idle_exit();