Message ID | 20220728063120.2867508-11-npiggin@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | powerpc: alternate queued spinlock implementation | expand |
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: > Queued waiters which are not at the head of the queue don't spin on > the lock word but their qnode lock word, waiting for the previous queued > CPU to release them. Add an option which allows these waiters to yield > to the previous CPU if its vCPU is preempted. > > Disable this option by default for now, i.e., no logical change. > --- > arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++- > 1 file changed, 45 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > index 55286ac91da5..b39f8c5b329c 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -26,6 +26,7 @@ static bool MAYBE_STEALERS __read_mostly = true; > static int HEAD_SPINS __read_mostly = (1<<8); > > static bool pv_yield_owner __read_mostly = true; > +static bool pv_yield_prev __read_mostly = true; Similiar suggestion, maybe pv_yield_prev_enabled would read better. Isn't this enabled by default contrary to the commit message? > > static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); > > @@ -224,6 +225,31 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va > cpu_relax(); > } > > +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) yield_to_locked_owner() takes a raw val and works out the cpu to yield to. I think for consistency have yield_to_prev() take the raw val and work it out too. > +{ > + u32 yield_count; > + > + if (!paravirt) > + goto relax; > + > + if (!pv_yield_prev) > + goto relax; > + > + yield_count = yield_count_of(prev_cpu); > + if ((yield_count & 1) == 0) > + goto relax; /* owner vcpu is running */ > + > + smp_rmb(); /* See yield_to_locked_owner comment */ > + > + if (!node->locked) { > + yield_to_preempted(prev_cpu, yield_count); > + return; > + } > + > +relax: > + cpu_relax(); > +} > + > > static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) > { > @@ -291,13 +317,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b > */ > if (old & _Q_TAIL_CPU_MASK) { > struct qnode *prev = get_tail_qnode(lock, old); > + int prev_cpu = get_tail_cpu(old); This could then be removed. > > /* Link @node into the waitqueue. */ > WRITE_ONCE(prev->next, node); > > /* Wait for mcs node lock to be released */ > while (!node->locked) > - cpu_relax(); > + yield_to_prev(lock, node, prev_cpu, paravirt); And would have this as: yield_to_prev(lock, node, old, paravirt); > > smp_rmb(); /* acquire barrier for the mcs lock */ > } > @@ -448,12 +475,29 @@ static int pv_yield_owner_get(void *data, u64 *val) > > DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); > > +static int pv_yield_prev_set(void *data, u64 val) > +{ > + pv_yield_prev = !!val; > + > + return 0; > +} > + > +static int pv_yield_prev_get(void *data, u64 *val) > +{ > + *val = pv_yield_prev; > + > + return 0; > +} > + > +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); > + > static __init int spinlock_debugfs_init(void) > { > debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); > debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); > if (is_shared_processor()) { > debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); > + debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); > } > > return 0;
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: [resend as utf-8, not utf-7] > Queued waiters which are not at the head of the queue don't spin on > the lock word but their qnode lock word, waiting for the previous queued > CPU to release them. Add an option which allows these waiters to yield > to the previous CPU if its vCPU is preempted. > > Disable this option by default for now, i.e., no logical change. > --- > arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++- > 1 file changed, 45 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > index 55286ac91da5..b39f8c5b329c 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -26,6 +26,7 @@ static bool MAYBE_STEALERS __read_mostly = true; > static int HEAD_SPINS __read_mostly = (1<<8); > > static bool pv_yield_owner __read_mostly = true; > +static bool pv_yield_prev __read_mostly = true; Similiar suggestion, maybe pv_yield_prev_enabled would read better. Isn't this enabled by default contrary to the commit message? > > static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); > > @@ -224,6 +225,31 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va > cpu_relax(); > } > > +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) yield_to_locked_owner() takes a raw val and works out the cpu to yield to. I think for consistency have yield_to_prev() take the raw val and work it out too. > +{ > + u32 yield_count; > + > + if (!paravirt) > + goto relax; > + > + if (!pv_yield_prev) > + goto relax; > + > + yield_count = yield_count_of(prev_cpu); > + if ((yield_count & 1) == 0) > + goto relax; /* owner vcpu is running */ > + > + smp_rmb(); /* See yield_to_locked_owner comment */ > + > + if (!node->locked) { > + yield_to_preempted(prev_cpu, yield_count); > + return; > + } > + > +relax: > + cpu_relax(); > +} > + > > static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) > { > @@ -291,13 +317,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b > */ > if (old & _Q_TAIL_CPU_MASK) { > struct qnode *prev = get_tail_qnode(lock, old); > + int prev_cpu = get_tail_cpu(old); This could then be removed. > > /* Link @node into the waitqueue. */ > WRITE_ONCE(prev->next, node); > > /* Wait for mcs node lock to be released */ > while (!node->locked) > - cpu_relax(); > + yield_to_prev(lock, node, prev_cpu, paravirt); And would have this as: yield_to_prev(lock, node, old, paravirt); > > smp_rmb(); /* acquire barrier for the mcs lock */ > } > @@ -448,12 +475,29 @@ static int pv_yield_owner_get(void *data, u64 *val) > > DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); > > +static int pv_yield_prev_set(void *data, u64 val) > +{ > + pv_yield_prev = !!val; > + > + return 0; > +} > + > +static int pv_yield_prev_get(void *data, u64 *val) > +{ > + *val = pv_yield_prev; > + > + return 0; > +} > + > +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); > + > static __init int spinlock_debugfs_init(void) > { > debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); > debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); > if (is_shared_processor()) { > debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); > + debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); > } > > return 0;
On Thu Nov 10, 2022 at 10:41 AM AEST, Jordan Niethe wrote: > On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: > [resend as utf-8, not utf-7] > > Queued waiters which are not at the head of the queue don't spin on > > the lock word but their qnode lock word, waiting for the previous queued > > CPU to release them. Add an option which allows these waiters to yield > > to the previous CPU if its vCPU is preempted. > > > > Disable this option by default for now, i.e., no logical change. > > --- > > arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++- > > 1 file changed, 45 insertions(+), 1 deletion(-) > > > > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > > index 55286ac91da5..b39f8c5b329c 100644 > > --- a/arch/powerpc/lib/qspinlock.c > > +++ b/arch/powerpc/lib/qspinlock.c > > @@ -26,6 +26,7 @@ static bool MAYBE_STEALERS __read_mostly = true; > > static int HEAD_SPINS __read_mostly = (1<<8); > > > > static bool pv_yield_owner __read_mostly = true; > > +static bool pv_yield_prev __read_mostly = true; > > Similiar suggestion, maybe pv_yield_prev_enabled would read better. > > Isn't this enabled by default contrary to the commit message? Yeah a few of those changelogs got out of synch. > > > > > static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); > > > > @@ -224,6 +225,31 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va > > cpu_relax(); > > } > > > > +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) > > yield_to_locked_owner() takes a raw val and works out the cpu to yield to. > I think for consistency have yield_to_prev() take the raw val and work it out too. Good thinking. > > +{ > > + u32 yield_count; > > + > > + if (!paravirt) > > + goto relax; > > + > > + if (!pv_yield_prev) > > + goto relax; > > + > > + yield_count = yield_count_of(prev_cpu); > > + if ((yield_count & 1) == 0) > > + goto relax; /* owner vcpu is running */ > > + > > + smp_rmb(); /* See yield_to_locked_owner comment */ > > + > > + if (!node->locked) { > > + yield_to_preempted(prev_cpu, yield_count); > > + return; > > + } > > + > > +relax: > > + cpu_relax(); > > +} > > + > > > > static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) > > { > > @@ -291,13 +317,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b > > */ > > if (old & _Q_TAIL_CPU_MASK) { > > struct qnode *prev = get_tail_qnode(lock, old); > > + int prev_cpu = get_tail_cpu(old); > > This could then be removed. > > > > > /* Link @node into the waitqueue. */ > > WRITE_ONCE(prev->next, node); > > > > /* Wait for mcs node lock to be released */ > > while (!node->locked) > > - cpu_relax(); > > + yield_to_prev(lock, node, prev_cpu, paravirt); > > And would have this as: > yield_to_prev(lock, node, old, paravirt); Yep. Thanks, Nick
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 55286ac91da5..b39f8c5b329c 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -26,6 +26,7 @@ static bool MAYBE_STEALERS __read_mostly = true; static int HEAD_SPINS __read_mostly = (1<<8); static bool pv_yield_owner __read_mostly = true; +static bool pv_yield_prev __read_mostly = true; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -224,6 +225,31 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va cpu_relax(); } +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt) +{ + u32 yield_count; + + if (!paravirt) + goto relax; + + if (!pv_yield_prev) + goto relax; + + yield_count = yield_count_of(prev_cpu); + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + smp_rmb(); /* See yield_to_locked_owner comment */ + + if (!node->locked) { + yield_to_preempted(prev_cpu, yield_count); + return; + } + +relax: + cpu_relax(); +} + static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { @@ -291,13 +317,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b */ if (old & _Q_TAIL_CPU_MASK) { struct qnode *prev = get_tail_qnode(lock, old); + int prev_cpu = get_tail_cpu(old); /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); /* Wait for mcs node lock to be released */ while (!node->locked) - cpu_relax(); + yield_to_prev(lock, node, prev_cpu, paravirt); smp_rmb(); /* acquire barrier for the mcs lock */ } @@ -448,12 +475,29 @@ static int pv_yield_owner_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); +static int pv_yield_prev_set(void *data, u64 val) +{ + pv_yield_prev = !!val; + + return 0; +} + +static int pv_yield_prev_get(void *data, u64 *val) +{ + *val = pv_yield_prev; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); if (is_shared_processor()) { debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); } return 0;