diff mbox series

[09/17] powerpc/qspinlock: implement option to yield to previous node

Message ID 20220728063120.2867508-11-npiggin@gmail.com (mailing list archive)
State Changes Requested
Headers show
Series powerpc: alternate queued spinlock implementation | expand

Commit Message

Nicholas Piggin July 28, 2022, 6:31 a.m. UTC
Queued waiters which are not at the head of the queue don't spin on
the lock word but their qnode lock word, waiting for the previous queued
CPU to release them. Add an option which allows these waiters to yield
to the previous CPU if its vCPU is preempted.

Disable this option by default for now, i.e., no logical change.
---
 arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

Comments

Jordan Niethe Aug. 12, 2022, 2:07 a.m. UTC | #1
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> Queued waiters which are not at the head of the queue don't spin on
> the lock word but their qnode lock word, waiting for the previous queued
> CPU to release them. Add an option which allows these waiters to yield
> to the previous CPU if its vCPU is preempted.
> 
> Disable this option by default for now, i.e., no logical change.
> ---
>  arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 55286ac91da5..b39f8c5b329c 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -26,6 +26,7 @@ static bool MAYBE_STEALERS __read_mostly = true;
>  static int HEAD_SPINS __read_mostly = (1<<8);
>  
>  static bool pv_yield_owner __read_mostly = true;
> +static bool pv_yield_prev __read_mostly = true;

Similiar suggestion, maybe pv_yield_prev_enabled would read better.

Isn't this enabled by default contrary to the commit message?


>  
>  static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
>  
> @@ -224,6 +225,31 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va
>  	cpu_relax();
>  }
>  
> +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)

yield_to_locked_owner() takes a raw val and works out the cpu to yield to.
I think for consistency have yield_to_prev() take the raw val and work it out too.

> +{
> +	u32 yield_count;
> +
> +	if (!paravirt)
> +		goto relax;
> +
> +	if (!pv_yield_prev)
> +		goto relax;
> +
> +	yield_count = yield_count_of(prev_cpu);
> +	if ((yield_count & 1) == 0)
> +		goto relax; /* owner vcpu is running */
> +
> +	smp_rmb(); /* See yield_to_locked_owner comment */
> +
> +	if (!node->locked) {
> +		yield_to_preempted(prev_cpu, yield_count);
> +		return;
> +	}
> +
> +relax:
> +	cpu_relax();
> +}
> +
>  
>  static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
>  {
> @@ -291,13 +317,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  	 */
>  	if (old & _Q_TAIL_CPU_MASK) {
>  		struct qnode *prev = get_tail_qnode(lock, old);
> +		int prev_cpu = get_tail_cpu(old);

This could then be removed.

>  
>  		/* Link @node into the waitqueue. */
>  		WRITE_ONCE(prev->next, node);
>  
>  		/* Wait for mcs node lock to be released */
>  		while (!node->locked)
> -			cpu_relax();
> +			yield_to_prev(lock, node, prev_cpu, paravirt);

And would have this as:
			yield_to_prev(lock, node, old, paravirt);


>  
>  		smp_rmb(); /* acquire barrier for the mcs lock */
>  	}
> @@ -448,12 +475,29 @@ static int pv_yield_owner_get(void *data, u64 *val)
>  
>  DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
>  
> +static int pv_yield_prev_set(void *data, u64 val)
> +{
> +	pv_yield_prev = !!val;
> +
> +	return 0;
> +}
> +
> +static int pv_yield_prev_get(void *data, u64 *val)
> +{
> +	*val = pv_yield_prev;
> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
> +
>  static __init int spinlock_debugfs_init(void)
>  {
>  	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
>  	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
>  	if (is_shared_processor()) {
>  		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
> +		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
>  	}
>  
>  	return 0;
Jordan Niethe Nov. 10, 2022, 12:41 a.m. UTC | #2
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
[resend as utf-8, not utf-7]
> Queued waiters which are not at the head of the queue don't spin on
> the lock word but their qnode lock word, waiting for the previous queued
> CPU to release them. Add an option which allows these waiters to yield
> to the previous CPU if its vCPU is preempted.
> 
> Disable this option by default for now, i.e., no logical change.
> ---
>  arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 55286ac91da5..b39f8c5b329c 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -26,6 +26,7 @@ static bool MAYBE_STEALERS __read_mostly = true;
>  static int HEAD_SPINS __read_mostly = (1<<8);
>  
>  static bool pv_yield_owner __read_mostly = true;
> +static bool pv_yield_prev __read_mostly = true;

Similiar suggestion, maybe pv_yield_prev_enabled would read better.

Isn't this enabled by default contrary to the commit message?


>  
>  static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
>  
> @@ -224,6 +225,31 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va
>  	cpu_relax();
>  }
>  
> +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)

yield_to_locked_owner() takes a raw val and works out the cpu to yield to.
I think for consistency have yield_to_prev() take the raw val and work it out too.

> +{
> +	u32 yield_count;
> +
> +	if (!paravirt)
> +		goto relax;
> +
> +	if (!pv_yield_prev)
> +		goto relax;
> +
> +	yield_count = yield_count_of(prev_cpu);
> +	if ((yield_count & 1) == 0)
> +		goto relax; /* owner vcpu is running */
> +
> +	smp_rmb(); /* See yield_to_locked_owner comment */
> +
> +	if (!node->locked) {
> +		yield_to_preempted(prev_cpu, yield_count);
> +		return;
> +	}
> +
> +relax:
> +	cpu_relax();
> +}
> +
>  
>  static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
>  {
> @@ -291,13 +317,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  	 */
>  	if (old & _Q_TAIL_CPU_MASK) {
>  		struct qnode *prev = get_tail_qnode(lock, old);
> +		int prev_cpu = get_tail_cpu(old);

This could then be removed.

>  
>  		/* Link @node into the waitqueue. */
>  		WRITE_ONCE(prev->next, node);
>  
>  		/* Wait for mcs node lock to be released */
>  		while (!node->locked)
> -			cpu_relax();
> +			yield_to_prev(lock, node, prev_cpu, paravirt);

And would have this as:
			yield_to_prev(lock, node, old, paravirt);


>  
>  		smp_rmb(); /* acquire barrier for the mcs lock */
>  	}
> @@ -448,12 +475,29 @@ static int pv_yield_owner_get(void *data, u64 *val)
>  
>  DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
>  
> +static int pv_yield_prev_set(void *data, u64 val)
> +{
> +	pv_yield_prev = !!val;
> +
> +	return 0;
> +}
> +
> +static int pv_yield_prev_get(void *data, u64 *val)
> +{
> +	*val = pv_yield_prev;
> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
> +
>  static __init int spinlock_debugfs_init(void)
>  {
>  	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
>  	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
>  	if (is_shared_processor()) {
>  		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
> +		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
>  	}
>  
>  	return 0;
Nicholas Piggin Nov. 10, 2022, 11:14 a.m. UTC | #3
On Thu Nov 10, 2022 at 10:41 AM AEST, Jordan Niethe wrote:
> On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> [resend as utf-8, not utf-7]
> > Queued waiters which are not at the head of the queue don't spin on
> > the lock word but their qnode lock word, waiting for the previous queued
> > CPU to release them. Add an option which allows these waiters to yield
> > to the previous CPU if its vCPU is preempted.
> > 
> > Disable this option by default for now, i.e., no logical change.
> > ---
> >  arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++-
> >  1 file changed, 45 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> > index 55286ac91da5..b39f8c5b329c 100644
> > --- a/arch/powerpc/lib/qspinlock.c
> > +++ b/arch/powerpc/lib/qspinlock.c
> > @@ -26,6 +26,7 @@ static bool MAYBE_STEALERS __read_mostly = true;
> >  static int HEAD_SPINS __read_mostly = (1<<8);
> >  
> >  static bool pv_yield_owner __read_mostly = true;
> > +static bool pv_yield_prev __read_mostly = true;
>
> Similiar suggestion, maybe pv_yield_prev_enabled would read better.
>
> Isn't this enabled by default contrary to the commit message?

Yeah a few of those changelogs got out of synch.

>
> >  
> >  static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
> >  
> > @@ -224,6 +225,31 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va
> >  	cpu_relax();
> >  }
> >  
> > +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
>
> yield_to_locked_owner() takes a raw val and works out the cpu to yield to.
> I think for consistency have yield_to_prev() take the raw val and work it out too.

Good thinking.

> > +{
> > +	u32 yield_count;
> > +
> > +	if (!paravirt)
> > +		goto relax;
> > +
> > +	if (!pv_yield_prev)
> > +		goto relax;
> > +
> > +	yield_count = yield_count_of(prev_cpu);
> > +	if ((yield_count & 1) == 0)
> > +		goto relax; /* owner vcpu is running */
> > +
> > +	smp_rmb(); /* See yield_to_locked_owner comment */
> > +
> > +	if (!node->locked) {
> > +		yield_to_preempted(prev_cpu, yield_count);
> > +		return;
> > +	}
> > +
> > +relax:
> > +	cpu_relax();
> > +}
> > +
> >  
> >  static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
> >  {
> > @@ -291,13 +317,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
> >  	 */
> >  	if (old & _Q_TAIL_CPU_MASK) {
> >  		struct qnode *prev = get_tail_qnode(lock, old);
> > +		int prev_cpu = get_tail_cpu(old);
>
> This could then be removed.
>
> >  
> >  		/* Link @node into the waitqueue. */
> >  		WRITE_ONCE(prev->next, node);
> >  
> >  		/* Wait for mcs node lock to be released */
> >  		while (!node->locked)
> > -			cpu_relax();
> > +			yield_to_prev(lock, node, prev_cpu, paravirt);
>
> And would have this as:
> 			yield_to_prev(lock, node, old, paravirt);

Yep.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 55286ac91da5..b39f8c5b329c 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -26,6 +26,7 @@  static bool MAYBE_STEALERS __read_mostly = true;
 static int HEAD_SPINS __read_mostly = (1<<8);
 
 static bool pv_yield_owner __read_mostly = true;
+static bool pv_yield_prev __read_mostly = true;
 
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 
@@ -224,6 +225,31 @@  static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va
 	cpu_relax();
 }
 
+static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
+{
+	u32 yield_count;
+
+	if (!paravirt)
+		goto relax;
+
+	if (!pv_yield_prev)
+		goto relax;
+
+	yield_count = yield_count_of(prev_cpu);
+	if ((yield_count & 1) == 0)
+		goto relax; /* owner vcpu is running */
+
+	smp_rmb(); /* See yield_to_locked_owner comment */
+
+	if (!node->locked) {
+		yield_to_preempted(prev_cpu, yield_count);
+		return;
+	}
+
+relax:
+	cpu_relax();
+}
+
 
 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
 {
@@ -291,13 +317,14 @@  static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 	 */
 	if (old & _Q_TAIL_CPU_MASK) {
 		struct qnode *prev = get_tail_qnode(lock, old);
+		int prev_cpu = get_tail_cpu(old);
 
 		/* Link @node into the waitqueue. */
 		WRITE_ONCE(prev->next, node);
 
 		/* Wait for mcs node lock to be released */
 		while (!node->locked)
-			cpu_relax();
+			yield_to_prev(lock, node, prev_cpu, paravirt);
 
 		smp_rmb(); /* acquire barrier for the mcs lock */
 	}
@@ -448,12 +475,29 @@  static int pv_yield_owner_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
 
+static int pv_yield_prev_set(void *data, u64 val)
+{
+	pv_yield_prev = !!val;
+
+	return 0;
+}
+
+static int pv_yield_prev_get(void *data, u64 *val)
+{
+	*val = pv_yield_prev;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
+
 static __init int spinlock_debugfs_init(void)
 {
 	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
 	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
 	if (is_shared_processor()) {
 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
+		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
 	}
 
 	return 0;