diff mbox series

[16/17] powerpc/qspinlock: allow indefinite spinning on a preempted owner

Message ID 20220728063120.2867508-18-npiggin@gmail.com (mailing list archive)
State Changes Requested
Headers show
Series powerpc: alternate queued spinlock implementation | expand

Commit Message

Nicholas Piggin July 28, 2022, 6:31 a.m. UTC
Provide an option that holds off queueing indefinitely while the lock
owner is preempted. This could reduce queueing latencies for very
overcommitted vcpu situations.

This is disabled by default.
---
 arch/powerpc/lib/qspinlock.c | 91 +++++++++++++++++++++++++++++++-----
 1 file changed, 79 insertions(+), 12 deletions(-)

Comments

Jordan Niethe Aug. 12, 2022, 4:49 a.m. UTC | #1
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> Provide an option that holds off queueing indefinitely while the lock
> owner is preempted. This could reduce queueing latencies for very
> overcommitted vcpu situations.
> 
> This is disabled by default.
> ---
>  arch/powerpc/lib/qspinlock.c | 91 +++++++++++++++++++++++++++++++-----
>  1 file changed, 79 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 24f68bd71e2b..5cfd69931e31 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -35,6 +35,7 @@ static int HEAD_SPINS __read_mostly = (1<<8);
>  
>  static bool pv_yield_owner __read_mostly = true;
>  static bool pv_yield_allow_steal __read_mostly = false;
> +static bool pv_spin_on_preempted_owner __read_mostly = false;
>  static bool pv_yield_prev __read_mostly = true;
>  static bool pv_yield_propagate_owner __read_mostly = true;
>  static bool pv_prod_head __read_mostly = false;
> @@ -220,13 +221,15 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
>  	BUG();
>  }
>  
> -static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
> +static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted)
>  {
>  	int owner;
>  	u32 yield_count;
>  
>  	BUG_ON(!(val & _Q_LOCKED_VAL));
>  
> +	*preempted = false;
> +
>  	if (!paravirt)
>  		goto relax;
>  
> @@ -241,6 +244,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
>  
>  	spin_end();
>  
> +	*preempted = true;
> +
>  	/*
>  	 * Read the lock word after sampling the yield count. On the other side
>  	 * there may a wmb because the yield count update is done by the
> @@ -265,14 +270,14 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
>  	spin_cpu_relax();
>  }
>  
> -static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
> +static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool *preempted)

It seems like preempted parameter could be the return value of
yield_to_locked_owner(). Then callers that don't use the value returned in
preempted don't need to create an unnecessary variable to pass in.

>  {
> -	__yield_to_locked_owner(lock, val, paravirt, false);
> +	__yield_to_locked_owner(lock, val, paravirt, false, preempted);
>  }
>  
> -static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
> +static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted)
>  {
> -	__yield_to_locked_owner(lock, val, paravirt, clear_mustq);
> +	__yield_to_locked_owner(lock, val, paravirt, clear_mustq, preempted);
>  }
>  
>  static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
> @@ -364,12 +369,33 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
>  
>  static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
>  {
> -	int iters;
> +	int iters = 0;
> +
> +	if (!STEAL_SPINS) {
> +		if (paravirt && pv_spin_on_preempted_owner) {
> +			spin_begin();
> +			for (;;) {
> +				u32 val = READ_ONCE(lock->val);
> +				bool preempted;
> +
> +				if (val & _Q_MUST_Q_VAL)
> +					break;
> +				if (!(val & _Q_LOCKED_VAL))
> +					break;
> +				if (!vcpu_is_preempted(get_owner_cpu(val)))
> +					break;
> +				yield_to_locked_owner(lock, val, paravirt, &preempted);
> +			}
> +			spin_end();
> +		}
> +		return false;
> +	}
>  
>  	/* Attempt to steal the lock */
>  	spin_begin();
>  	for (;;) {
>  		u32 val = READ_ONCE(lock->val);
> +		bool preempted;
>  
>  		if (val & _Q_MUST_Q_VAL)
>  			break;
> @@ -382,9 +408,22 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
>  			continue;
>  		}
>  
> -		yield_to_locked_owner(lock, val, paravirt);
> -
> -		iters++;
> +		yield_to_locked_owner(lock, val, paravirt, &preempted);
> +
> +		if (paravirt && preempted) {
> +			if (!pv_spin_on_preempted_owner)
> +				iters++;
> +			/*
> +			 * pv_spin_on_preempted_owner don't increase iters
> +			 * while the owner is preempted -- we won't interfere
> +			 * with it by definition. This could introduce some
> +			 * latency issue if we continually observe preempted
> +			 * owners, but hopefully that's a rare corner case of
> +			 * a badly oversubscribed system.
> +			 */
> +		} else {
> +			iters++;
> +		}
>  
>  		if (iters >= get_steal_spins(paravirt, false))
>  			break;
> @@ -463,8 +502,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  		/* We're at the head of the waitqueue, wait for the lock. */
>  		spin_begin();
>  		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> +			bool preempted;
> +
>  			propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
> -			yield_head_to_locked_owner(lock, val, paravirt, false);
> +			yield_head_to_locked_owner(lock, val, paravirt, false, &preempted);
>  		}
>  		spin_end();
>  
> @@ -486,11 +527,20 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  		/* We're at the head of the waitqueue, wait for the lock. */
>  		spin_begin();
>  		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> +			bool preempted;
> +
>  			propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
>  			yield_head_to_locked_owner(lock, val, paravirt,
> -					pv_yield_allow_steal && set_mustq);
> +					pv_yield_allow_steal && set_mustq,
> +					&preempted);
> +
> +			if (paravirt && preempted) {
> +				if (!pv_spin_on_preempted_owner)
> +					iters++;
> +			} else {
> +				iters++;
> +			}
>  
> -			iters++;
>  			if (!set_mustq && iters >= get_head_spins(paravirt)) {
>  				set_mustq = true;
>  				lock_set_mustq(lock);
> @@ -663,6 +713,22 @@ static int pv_yield_allow_steal_get(void *data, u64 *val)
>  
>  DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
>  
> +static int pv_spin_on_preempted_owner_set(void *data, u64 val)
> +{
> +	pv_spin_on_preempted_owner = !!val;
> +
> +	return 0;
> +}
> +
> +static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
> +{
> +	*val = pv_spin_on_preempted_owner;
> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
> +
>  static int pv_yield_prev_set(void *data, u64 val)
>  {
>  	pv_yield_prev = !!val;
> @@ -719,6 +785,7 @@ static __init int spinlock_debugfs_init(void)
>  	if (is_shared_processor()) {
>  		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
>  		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
> +		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
>  		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
>  		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
>  		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
Laurent Dufour Sept. 22, 2022, 3:02 p.m. UTC | #2
On 28/07/2022 08:31:19, Nicholas Piggin wrote:
> Provide an option that holds off queueing indefinitely while the lock
> owner is preempted. This could reduce queueing latencies for very
> overcommitted vcpu situations.
> 
> This is disabled by default.

Hi Nick,

I should have missed something here.

If this option is turned on, CPU trying to lock when there is a preempted
owner will spin checking the lock->val and yielding the lock owner CPU.
Am I right?

If yes, why not being queued and spin checking its own value, yielding
against the lock owner CPU? This will generate less cache bouncing, which
is what the queued spinlock is trying to address, isn't it?

Thanks,
Laurent.

> ---
>  arch/powerpc/lib/qspinlock.c | 91 +++++++++++++++++++++++++++++++-----
>  1 file changed, 79 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 24f68bd71e2b..5cfd69931e31 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -35,6 +35,7 @@ static int HEAD_SPINS __read_mostly = (1<<8);
>  
>  static bool pv_yield_owner __read_mostly = true;
>  static bool pv_yield_allow_steal __read_mostly = false;
> +static bool pv_spin_on_preempted_owner __read_mostly = false;
>  static bool pv_yield_prev __read_mostly = true;
>  static bool pv_yield_propagate_owner __read_mostly = true;
>  static bool pv_prod_head __read_mostly = false;
> @@ -220,13 +221,15 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
>  	BUG();
>  }
>  
> -static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
> +static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted)
>  {
>  	int owner;
>  	u32 yield_count;
>  
>  	BUG_ON(!(val & _Q_LOCKED_VAL));
>  
> +	*preempted = false;
> +
>  	if (!paravirt)
>  		goto relax;
>  
> @@ -241,6 +244,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
>  
>  	spin_end();
>  
> +	*preempted = true;
> +
>  	/*
>  	 * Read the lock word after sampling the yield count. On the other side
>  	 * there may a wmb because the yield count update is done by the
> @@ -265,14 +270,14 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
>  	spin_cpu_relax();
>  }
>  
> -static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
> +static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool *preempted)
>  {
> -	__yield_to_locked_owner(lock, val, paravirt, false);
> +	__yield_to_locked_owner(lock, val, paravirt, false, preempted);
>  }
>  
> -static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
> +static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted)
>  {
> -	__yield_to_locked_owner(lock, val, paravirt, clear_mustq);
> +	__yield_to_locked_owner(lock, val, paravirt, clear_mustq, preempted);
>  }
>  
>  static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
> @@ -364,12 +369,33 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
>  
>  static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
>  {
> -	int iters;
> +	int iters = 0;
> +
> +	if (!STEAL_SPINS) {
> +		if (paravirt && pv_spin_on_preempted_owner) {
> +			spin_begin();
> +			for (;;) {
> +				u32 val = READ_ONCE(lock->val);
> +				bool preempted;
> +
> +				if (val & _Q_MUST_Q_VAL)
> +					break;
> +				if (!(val & _Q_LOCKED_VAL))
> +					break;
> +				if (!vcpu_is_preempted(get_owner_cpu(val)))
> +					break;
> +				yield_to_locked_owner(lock, val, paravirt, &preempted);
> +			}
> +			spin_end();
> +		}
> +		return false;
> +	}
>  
>  	/* Attempt to steal the lock */
>  	spin_begin();
>  	for (;;) {
>  		u32 val = READ_ONCE(lock->val);
> +		bool preempted;
>  
>  		if (val & _Q_MUST_Q_VAL)
>  			break;
> @@ -382,9 +408,22 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
>  			continue;
>  		}
>  
> -		yield_to_locked_owner(lock, val, paravirt);
> -
> -		iters++;
> +		yield_to_locked_owner(lock, val, paravirt, &preempted);
> +
> +		if (paravirt && preempted) {
> +			if (!pv_spin_on_preempted_owner)
> +				iters++;
> +			/*
> +			 * pv_spin_on_preempted_owner don't increase iters
> +			 * while the owner is preempted -- we won't interfere
> +			 * with it by definition. This could introduce some
> +			 * latency issue if we continually observe preempted
> +			 * owners, but hopefully that's a rare corner case of
> +			 * a badly oversubscribed system.
> +			 */
> +		} else {
> +			iters++;
> +		}
>  
>  		if (iters >= get_steal_spins(paravirt, false))
>  			break;
> @@ -463,8 +502,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  		/* We're at the head of the waitqueue, wait for the lock. */
>  		spin_begin();
>  		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> +			bool preempted;
> +
>  			propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
> -			yield_head_to_locked_owner(lock, val, paravirt, false);
> +			yield_head_to_locked_owner(lock, val, paravirt, false, &preempted);
>  		}
>  		spin_end();
>  
> @@ -486,11 +527,20 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  		/* We're at the head of the waitqueue, wait for the lock. */
>  		spin_begin();
>  		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> +			bool preempted;
> +
>  			propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
>  			yield_head_to_locked_owner(lock, val, paravirt,
> -					pv_yield_allow_steal && set_mustq);
> +					pv_yield_allow_steal && set_mustq,
> +					&preempted);
> +
> +			if (paravirt && preempted) {
> +				if (!pv_spin_on_preempted_owner)
> +					iters++;
> +			} else {
> +				iters++;
> +			}
>  
> -			iters++;
>  			if (!set_mustq && iters >= get_head_spins(paravirt)) {
>  				set_mustq = true;
>  				lock_set_mustq(lock);
> @@ -663,6 +713,22 @@ static int pv_yield_allow_steal_get(void *data, u64 *val)
>  
>  DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
>  
> +static int pv_spin_on_preempted_owner_set(void *data, u64 val)
> +{
> +	pv_spin_on_preempted_owner = !!val;
> +
> +	return 0;
> +}
> +
> +static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
> +{
> +	*val = pv_spin_on_preempted_owner;
> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
> +
>  static int pv_yield_prev_set(void *data, u64 val)
>  {
>  	pv_yield_prev = !!val;
> @@ -719,6 +785,7 @@ static __init int spinlock_debugfs_init(void)
>  	if (is_shared_processor()) {
>  		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
>  		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
> +		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
>  		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
>  		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
>  		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
Nicholas Piggin Sept. 23, 2022, 8:16 a.m. UTC | #3
On Fri Sep 23, 2022 at 1:02 AM AEST, Laurent Dufour wrote:
> On 28/07/2022 08:31:19, Nicholas Piggin wrote:
> > Provide an option that holds off queueing indefinitely while the lock
> > owner is preempted. This could reduce queueing latencies for very
> > overcommitted vcpu situations.
> > 
> > This is disabled by default.
>
> Hi Nick,
>
> I should have missed something here.
>
> If this option is turned on, CPU trying to lock when there is a preempted
> owner will spin checking the lock->val and yielding the lock owner CPU.
> Am I right?

Yes.

> If yes, why not being queued and spin checking its own value, yielding
> against the lock owner CPU?

I guess the idea is that when we start getting vCPU preemption, queueing
behaviour causes this "train wreck" behaviour where lock waiters being
preempted can halt lock transfers to other waiters (whereas with simple
spinlocks only owner vCPU preemption matters). So the heuristics for
paravirt qspinlock basically come down to avoiding queueing and making
waiters behave more like a simple spinlock when it matters. That's the
case for upstream and this rewrite.

> This will generate less cache bouncing, which
> is what the queued spinlock is trying to address, isn't it?

It could. When the owner is preempted it's not going to be modifying
the lock word and probably not surrounding data in the same cache
line, and there won't be a lot of other try-lock operations come in
(because they'll mostly queue up here as well). So cacheline bouncing
shouldn't be the worst problem we face here. But it possibly is a
concern.

I didn't yet meausre any real improvement from this option, and it
possibly has some starvation potential, so it's disabled by default for
now.

Thanks,
Nick
Jordan Niethe Nov. 10, 2022, 12:44 a.m. UTC | #4
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
[resend as utf-8, not utf-7]
> Provide an option that holds off queueing indefinitely while the lock
> owner is preempted. This could reduce queueing latencies for very
> overcommitted vcpu situations.
> 
> This is disabled by default.
> ---
>  arch/powerpc/lib/qspinlock.c | 91 +++++++++++++++++++++++++++++++-----
>  1 file changed, 79 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 24f68bd71e2b..5cfd69931e31 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -35,6 +35,7 @@ static int HEAD_SPINS __read_mostly = (1<<8);
>  
>  static bool pv_yield_owner __read_mostly = true;
>  static bool pv_yield_allow_steal __read_mostly = false;
> +static bool pv_spin_on_preempted_owner __read_mostly = false;
>  static bool pv_yield_prev __read_mostly = true;
>  static bool pv_yield_propagate_owner __read_mostly = true;
>  static bool pv_prod_head __read_mostly = false;
> @@ -220,13 +221,15 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
>  	BUG();
>  }
>  
> -static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
> +static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted)
>  {
>  	int owner;
>  	u32 yield_count;
>  
>  	BUG_ON(!(val & _Q_LOCKED_VAL));
>  
> +	*preempted = false;
> +
>  	if (!paravirt)
>  		goto relax;
>  
> @@ -241,6 +244,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
>  
>  	spin_end();
>  
> +	*preempted = true;
> +
>  	/*
>  	 * Read the lock word after sampling the yield count. On the other side
>  	 * there may a wmb because the yield count update is done by the
> @@ -265,14 +270,14 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
>  	spin_cpu_relax();
>  }
>  
> -static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
> +static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool *preempted)

It seems like preempted parameter could be the return value of
yield_to_locked_owner(). Then callers that don't use the value returned in
preempted don't need to create an unnecessary variable to pass in.

>  {
> -	__yield_to_locked_owner(lock, val, paravirt, false);
> +	__yield_to_locked_owner(lock, val, paravirt, false, preempted);
>  }
>  
> -static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
> +static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted)
>  {
> -	__yield_to_locked_owner(lock, val, paravirt, clear_mustq);
> +	__yield_to_locked_owner(lock, val, paravirt, clear_mustq, preempted);
>  }
>  
>  static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
> @@ -364,12 +369,33 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
>  
>  static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
>  {
> -	int iters;
> +	int iters = 0;
> +
> +	if (!STEAL_SPINS) {
> +		if (paravirt && pv_spin_on_preempted_owner) {
> +			spin_begin();
> +			for (;;) {
> +				u32 val = READ_ONCE(lock->val);
> +				bool preempted;
> +
> +				if (val & _Q_MUST_Q_VAL)
> +					break;
> +				if (!(val & _Q_LOCKED_VAL))
> +					break;
> +				if (!vcpu_is_preempted(get_owner_cpu(val)))
> +					break;
> +				yield_to_locked_owner(lock, val, paravirt, &preempted);
> +			}
> +			spin_end();
> +		}
> +		return false;
> +	}
>  
>  	/* Attempt to steal the lock */
>  	spin_begin();
>  	for (;;) {
>  		u32 val = READ_ONCE(lock->val);
> +		bool preempted;
>  
>  		if (val & _Q_MUST_Q_VAL)
>  			break;
> @@ -382,9 +408,22 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
>  			continue;
>  		}
>  
> -		yield_to_locked_owner(lock, val, paravirt);
> -
> -		iters++;
> +		yield_to_locked_owner(lock, val, paravirt, &preempted);
> +
> +		if (paravirt && preempted) {
> +			if (!pv_spin_on_preempted_owner)
> +				iters++;
> +			/*
> +			 * pv_spin_on_preempted_owner don't increase iters
> +			 * while the owner is preempted -- we won't interfere
> +			 * with it by definition. This could introduce some
> +			 * latency issue if we continually observe preempted
> +			 * owners, but hopefully that's a rare corner case of
> +			 * a badly oversubscribed system.
> +			 */
> +		} else {
> +			iters++;
> +		}
>  
>  		if (iters >= get_steal_spins(paravirt, false))
>  			break;
> @@ -463,8 +502,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  		/* We're at the head of the waitqueue, wait for the lock. */
>  		spin_begin();
>  		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> +			bool preempted;
> +
>  			propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
> -			yield_head_to_locked_owner(lock, val, paravirt, false);
> +			yield_head_to_locked_owner(lock, val, paravirt, false, &preempted);
>  		}
>  		spin_end();
>  
> @@ -486,11 +527,20 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  		/* We're at the head of the waitqueue, wait for the lock. */
>  		spin_begin();
>  		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> +			bool preempted;
> +
>  			propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
>  			yield_head_to_locked_owner(lock, val, paravirt,
> -					pv_yield_allow_steal && set_mustq);
> +					pv_yield_allow_steal && set_mustq,
> +					&preempted);
> +
> +			if (paravirt && preempted) {
> +				if (!pv_spin_on_preempted_owner)
> +					iters++;
> +			} else {
> +				iters++;
> +			}
>  
> -			iters++;
>  			if (!set_mustq && iters >= get_head_spins(paravirt)) {
>  				set_mustq = true;
>  				lock_set_mustq(lock);
> @@ -663,6 +713,22 @@ static int pv_yield_allow_steal_get(void *data, u64 *val)
>  
>  DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
>  
> +static int pv_spin_on_preempted_owner_set(void *data, u64 val)
> +{
> +	pv_spin_on_preempted_owner = !!val;
> +
> +	return 0;
> +}
> +
> +static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
> +{
> +	*val = pv_spin_on_preempted_owner;
> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
> +
>  static int pv_yield_prev_set(void *data, u64 val)
>  {
>  	pv_yield_prev = !!val;
> @@ -719,6 +785,7 @@ static __init int spinlock_debugfs_init(void)
>  	if (is_shared_processor()) {
>  		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
>  		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
> +		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
>  		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
>  		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
>  		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
Nicholas Piggin Nov. 10, 2022, 11:38 a.m. UTC | #5
On Thu Nov 10, 2022 at 10:44 AM AEST, Jordan Niethe wrote:
> On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> [resend as utf-8, not utf-7]
> > Provide an option that holds off queueing indefinitely while the lock
> > owner is preempted. This could reduce queueing latencies for very
> > overcommitted vcpu situations.
> > 
> > This is disabled by default.
> > ---
> >  arch/powerpc/lib/qspinlock.c | 91 +++++++++++++++++++++++++++++++-----
> >  1 file changed, 79 insertions(+), 12 deletions(-)
> > 
> > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> > index 24f68bd71e2b..5cfd69931e31 100644
> > --- a/arch/powerpc/lib/qspinlock.c
> > +++ b/arch/powerpc/lib/qspinlock.c
> > @@ -35,6 +35,7 @@ static int HEAD_SPINS __read_mostly = (1<<8);
> >  
> >  static bool pv_yield_owner __read_mostly = true;
> >  static bool pv_yield_allow_steal __read_mostly = false;
> > +static bool pv_spin_on_preempted_owner __read_mostly = false;
> >  static bool pv_yield_prev __read_mostly = true;
> >  static bool pv_yield_propagate_owner __read_mostly = true;
> >  static bool pv_prod_head __read_mostly = false;
> > @@ -220,13 +221,15 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
> >  	BUG();
> >  }
> >  
> > -static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
> > +static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted)
> >  {
> >  	int owner;
> >  	u32 yield_count;
> >  
> >  	BUG_ON(!(val & _Q_LOCKED_VAL));
> >  
> > +	*preempted = false;
> > +
> >  	if (!paravirt)
> >  		goto relax;
> >  
> > @@ -241,6 +244,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
> >  
> >  	spin_end();
> >  
> > +	*preempted = true;
> > +
> >  	/*
> >  	 * Read the lock word after sampling the yield count. On the other side
> >  	 * there may a wmb because the yield count update is done by the
> > @@ -265,14 +270,14 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
> >  	spin_cpu_relax();
> >  }
> >  
> > -static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
> > +static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool *preempted)
>
> It seems like preempted parameter could be the return value of
> yield_to_locked_owner(). Then callers that don't use the value returned in
> preempted don't need to create an unnecessary variable to pass in.

That works.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 24f68bd71e2b..5cfd69931e31 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -35,6 +35,7 @@  static int HEAD_SPINS __read_mostly = (1<<8);
 
 static bool pv_yield_owner __read_mostly = true;
 static bool pv_yield_allow_steal __read_mostly = false;
+static bool pv_spin_on_preempted_owner __read_mostly = false;
 static bool pv_yield_prev __read_mostly = true;
 static bool pv_yield_propagate_owner __read_mostly = true;
 static bool pv_prod_head __read_mostly = false;
@@ -220,13 +221,15 @@  static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 	BUG();
 }
 
-static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
+static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted)
 {
 	int owner;
 	u32 yield_count;
 
 	BUG_ON(!(val & _Q_LOCKED_VAL));
 
+	*preempted = false;
+
 	if (!paravirt)
 		goto relax;
 
@@ -241,6 +244,8 @@  static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
 
 	spin_end();
 
+	*preempted = true;
+
 	/*
 	 * Read the lock word after sampling the yield count. On the other side
 	 * there may a wmb because the yield count update is done by the
@@ -265,14 +270,14 @@  static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
 	spin_cpu_relax();
 }
 
-static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool *preempted)
 {
-	__yield_to_locked_owner(lock, val, paravirt, false);
+	__yield_to_locked_owner(lock, val, paravirt, false, preempted);
 }
 
-static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq)
+static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted)
 {
-	__yield_to_locked_owner(lock, val, paravirt, clear_mustq);
+	__yield_to_locked_owner(lock, val, paravirt, clear_mustq, preempted);
 }
 
 static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
@@ -364,12 +369,33 @@  static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
 
 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
 {
-	int iters;
+	int iters = 0;
+
+	if (!STEAL_SPINS) {
+		if (paravirt && pv_spin_on_preempted_owner) {
+			spin_begin();
+			for (;;) {
+				u32 val = READ_ONCE(lock->val);
+				bool preempted;
+
+				if (val & _Q_MUST_Q_VAL)
+					break;
+				if (!(val & _Q_LOCKED_VAL))
+					break;
+				if (!vcpu_is_preempted(get_owner_cpu(val)))
+					break;
+				yield_to_locked_owner(lock, val, paravirt, &preempted);
+			}
+			spin_end();
+		}
+		return false;
+	}
 
 	/* Attempt to steal the lock */
 	spin_begin();
 	for (;;) {
 		u32 val = READ_ONCE(lock->val);
+		bool preempted;
 
 		if (val & _Q_MUST_Q_VAL)
 			break;
@@ -382,9 +408,22 @@  static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 			continue;
 		}
 
-		yield_to_locked_owner(lock, val, paravirt);
-
-		iters++;
+		yield_to_locked_owner(lock, val, paravirt, &preempted);
+
+		if (paravirt && preempted) {
+			if (!pv_spin_on_preempted_owner)
+				iters++;
+			/*
+			 * pv_spin_on_preempted_owner don't increase iters
+			 * while the owner is preempted -- we won't interfere
+			 * with it by definition. This could introduce some
+			 * latency issue if we continually observe preempted
+			 * owners, but hopefully that's a rare corner case of
+			 * a badly oversubscribed system.
+			 */
+		} else {
+			iters++;
+		}
 
 		if (iters >= get_steal_spins(paravirt, false))
 			break;
@@ -463,8 +502,10 @@  static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 		/* We're at the head of the waitqueue, wait for the lock. */
 		spin_begin();
 		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
+			bool preempted;
+
 			propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
-			yield_head_to_locked_owner(lock, val, paravirt, false);
+			yield_head_to_locked_owner(lock, val, paravirt, false, &preempted);
 		}
 		spin_end();
 
@@ -486,11 +527,20 @@  static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 		/* We're at the head of the waitqueue, wait for the lock. */
 		spin_begin();
 		while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
+			bool preempted;
+
 			propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
 			yield_head_to_locked_owner(lock, val, paravirt,
-					pv_yield_allow_steal && set_mustq);
+					pv_yield_allow_steal && set_mustq,
+					&preempted);
+
+			if (paravirt && preempted) {
+				if (!pv_spin_on_preempted_owner)
+					iters++;
+			} else {
+				iters++;
+			}
 
-			iters++;
 			if (!set_mustq && iters >= get_head_spins(paravirt)) {
 				set_mustq = true;
 				lock_set_mustq(lock);
@@ -663,6 +713,22 @@  static int pv_yield_allow_steal_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
 
+static int pv_spin_on_preempted_owner_set(void *data, u64 val)
+{
+	pv_spin_on_preempted_owner = !!val;
+
+	return 0;
+}
+
+static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
+{
+	*val = pv_spin_on_preempted_owner;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
+
 static int pv_yield_prev_set(void *data, u64 val)
 {
 	pv_yield_prev = !!val;
@@ -719,6 +785,7 @@  static __init int spinlock_debugfs_init(void)
 	if (is_shared_processor()) {
 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
 		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
+		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
 		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
 		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
 		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);