@@ -12,6 +12,7 @@
struct qnode {
struct qnode *next;
struct qspinlock *lock;
+ int yield_cpu;
u8 locked; /* 1 if lock acquired */
};
@@ -28,6 +29,7 @@ static int HEAD_SPINS __read_mostly = (1<<13);
static bool pv_yield_owner __read_mostly = true;
static bool pv_yield_allow_steal __read_mostly = false;
static bool pv_yield_prev __read_mostly = false;
+static bool pv_yield_propagate_owner __read_mostly = false;
static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
@@ -197,7 +199,7 @@ static inline struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
BUG();
}
-static void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
+static void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
{
int owner;
u32 yield_count;
@@ -238,13 +240,76 @@ static void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt
cpu_relax();
}
+static void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ __yield_to_locked_owner(lock, val, paravirt, false);
+}
+
+static void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
+{
+ __yield_to_locked_owner(lock, val, paravirt, clear_mustq);
+}
+
+static void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
+{
+ struct qnode *next;
+ int owner;
+
+ if (!paravirt)
+ return;
+ if (!pv_yield_propagate_owner)
+ return;
+
+ owner = get_owner_cpu(val);
+ if (*set_yield_cpu == owner)
+ return;
+
+ next = READ_ONCE(node->next);
+ if (!next)
+ return;
+
+ if (vcpu_is_preempted(owner)) {
+ next->yield_cpu = owner;
+ *set_yield_cpu = owner;
+ } else if (*set_yield_cpu != -1) {
+ next->yield_cpu = owner;
+ *set_yield_cpu = owner;
+ }
+}
+
static void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
{
u32 yield_count;
+ int yield_cpu;
if (!paravirt)
goto relax;
+ if (!pv_yield_propagate_owner)
+ goto yield_prev;
+
+ yield_cpu = READ_ONCE(node->yield_cpu);
+ if (yield_cpu == -1) {
+ /* Propagate back the -1 CPU */
+ if (node->next && node->next->yield_cpu != -1)
+ node->next->yield_cpu = yield_cpu;
+ goto yield_prev;
+ }
+
+ yield_count = yield_count_of(yield_cpu);
+ if ((yield_count & 1) == 0)
+ goto yield_prev; /* owner vcpu is running */
+
+ smp_rmb();
+
+ if (yield_cpu == node->yield_cpu) {
+ if (node->next && node->next->yield_cpu != yield_cpu)
+ node->next->yield_cpu = yield_cpu;
+ yield_to_preempted(yield_cpu, yield_count);
+ return;
+ }
+
+yield_prev:
if (!pv_yield_prev)
goto relax;
@@ -276,7 +341,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
break;
if (val & _Q_LOCKED_VAL) {
- yield_to_locked_owner(lock, val, paravirt, false);
+ yield_to_locked_owner(lock, val, paravirt);
continue;
}
@@ -313,6 +378,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
node = &qnodesp->nodes[idx];
node->next = NULL;
node->lock = lock;
+ node->yield_cpu = -1;
node->locked = 0;
tail = encode_tail_cpu();
@@ -334,13 +400,21 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
while (!node->locked)
yield_to_prev(lock, node, prev_cpu, paravirt);
+ /* Clear out stale propagated yield_cpu */
+ if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
+ node->yield_cpu = -1;
+
smp_rmb(); /* acquire barrier for the mcs lock */
}
if (!MAYBE_STEALERS) {
+ int set_yield_cpu = -1;
+
/* We're at the head of the waitqueue, wait for the lock. */
- while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL)
- yield_to_locked_owner(lock, val, paravirt, false);
+ while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
+ propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
+ yield_head_to_locked_owner(lock, val, paravirt, false);
+ }
/* If we're the last queued, must clean up the tail. */
if ((val & _Q_TAIL_CPU_MASK) == tail) {
@@ -352,6 +426,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
/* We must be the owner, just set the lock bit and acquire */
lock_set_locked(lock);
} else {
+ int set_yield_cpu = -1;
int iters = 0;
again:
/* We're at the head of the waitqueue, wait for the lock. */
@@ -360,7 +435,8 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
lock_set_mustq(lock);
val |= _Q_MUST_Q_VAL;
}
- yield_to_locked_owner(lock, val, paravirt,
+ propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
+ yield_head_to_locked_owner(lock, val, paravirt,
pv_yield_allow_steal && (iters > HEAD_SPINS));
}
@@ -513,6 +589,22 @@ static int pv_yield_prev_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
+static int pv_yield_propagate_owner_set(void *data, u64 val)
+{
+ pv_yield_propagate_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_propagate_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_propagate_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
+
static __init int spinlock_debugfs_init(void)
{
debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
@@ -521,6 +613,7 @@ static __init int spinlock_debugfs_init(void)
debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
+ debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
}
return 0;