Message ID | 20230508020120.218494-8-rmclure@linux.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | powerpc: KCSAN fix warnings and mark accesses | expand |
On Mon May 8, 2023 at 12:01 PM AEST, Rohan McLure wrote: > The idle_state entry in the PACA on PowerNV features a bit which is > atomically tested and set through ldarx/stdcx. to be used as a spinlock. > This lock then guards access to other bit fields of idle_state. KCSAN > cannot differentiate between any of these bitfield accesses as they all > are implemented by 8-byte store/load instructions, thus cores contending > on the bit-lock appear to data race with modifications to idle_state. > > Separate the bit-lock entry from the data guarded by the lock to avoid > the possibility of data races being detected by KCSAN. > > Suggested-by: Nicholas Piggin <npiggin@gmail.com> > Signed-off-by: Rohan McLure <rmclure@ibm.com> > --- > arch/powerpc/include/asm/paca.h | 1 + > arch/powerpc/platforms/powernv/idle.c | 20 +++++++++++--------- > 2 files changed, 12 insertions(+), 9 deletions(-) > > diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h > index da0377f46597..cb325938766a 100644 > --- a/arch/powerpc/include/asm/paca.h > +++ b/arch/powerpc/include/asm/paca.h > @@ -191,6 +191,7 @@ struct paca_struct { > #ifdef CONFIG_PPC_POWERNV > /* PowerNV idle fields */ > /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */ > + unsigned long idle_lock; /* A value of 1 means acquired */ > unsigned long idle_state; > union { > /* P7/P8 specific fields */ > diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c > index 841cb7f31f4f..97dbb7bc2b00 100644 > --- a/arch/powerpc/platforms/powernv/idle.c > +++ b/arch/powerpc/platforms/powernv/idle.c > @@ -246,9 +246,9 @@ static inline void atomic_lock_thread_idle(void) > { > int cpu = raw_smp_processor_id(); > int first = cpu_first_thread_sibling(cpu); > - unsigned long *state = &paca_ptrs[first]->idle_state; > + unsigned long *lock = &paca_ptrs[first]->idle_lock; > > - while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state))) > + while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock))) > barrier(); > } > > @@ -258,29 +258,31 @@ static inline void atomic_unlock_and_stop_thread_idle(void) > int first = cpu_first_thread_sibling(cpu); > unsigned long thread = 1UL << cpu_thread_in_core(cpu); > unsigned long *state = &paca_ptrs[first]->idle_state; > + unsigned long *lock = &paca_ptrs[first]->idle_lock; > u64 s = READ_ONCE(*state); > u64 new, tmp; > > - BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); > + BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT)); > BUG_ON(s & thread); > > again: > - new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; > + new = s | thread; > tmp = cmpxchg(state, s, new); > if (unlikely(tmp != s)) { > s = tmp; > goto again; > } > + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); Sigh, another atomic. It's in a slow path though so I won't get too upset. Would be nice to add a comment here and revert it when KCSCAN can be taught about this pattern though, so we don't lose it. > } > > static inline void atomic_unlock_thread_idle(void) > { > int cpu = raw_smp_processor_id(); > int first = cpu_first_thread_sibling(cpu); > - unsigned long *state = &paca_ptrs[first]->idle_state; > + unsigned long *lock = &paca_ptrs[first]->idle_lock; > > - BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); > - clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); > + BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock)); > + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); > } > > /* P7 and P8 */ > @@ -380,9 +382,9 @@ static unsigned long power7_idle_insn(unsigned long type) > sprs.uamor = mfspr(SPRN_UAMOR); > } > > - local_paca->thread_idle_state = type; > + WRITE_ONCE(local_paca->thread_idle_state, type); > srr1 = isa206_idle_insn_mayloss(type); /* go idle */ > - local_paca->thread_idle_state = PNV_THREAD_RUNNING; > + WRITE_ONCE(local_paca->thread_idle_state, PNV_THREAD_RUNNING); Where is the thread_idle_state concurrency coming from? Thanks, Nick
> On 9 May 2023, at 12:26 pm, Nicholas Piggin <npiggin@gmail.com> wrote: > > On Mon May 8, 2023 at 12:01 PM AEST, Rohan McLure wrote: >> The idle_state entry in the PACA on PowerNV features a bit which is >> atomically tested and set through ldarx/stdcx. to be used as a spinlock. >> This lock then guards access to other bit fields of idle_state. KCSAN >> cannot differentiate between any of these bitfield accesses as they all >> are implemented by 8-byte store/load instructions, thus cores contending >> on the bit-lock appear to data race with modifications to idle_state. >> >> Separate the bit-lock entry from the data guarded by the lock to avoid >> the possibility of data races being detected by KCSAN. >> >> Suggested-by: Nicholas Piggin <npiggin@gmail.com> >> Signed-off-by: Rohan McLure <rmclure@ibm.com> >> --- >> arch/powerpc/include/asm/paca.h | 1 + >> arch/powerpc/platforms/powernv/idle.c | 20 +++++++++++--------- >> 2 files changed, 12 insertions(+), 9 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h >> index da0377f46597..cb325938766a 100644 >> --- a/arch/powerpc/include/asm/paca.h >> +++ b/arch/powerpc/include/asm/paca.h >> @@ -191,6 +191,7 @@ struct paca_struct { >> #ifdef CONFIG_PPC_POWERNV >> /* PowerNV idle fields */ >> /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */ >> + unsigned long idle_lock; /* A value of 1 means acquired */ >> unsigned long idle_state; >> union { >> /* P7/P8 specific fields */ >> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c >> index 841cb7f31f4f..97dbb7bc2b00 100644 >> --- a/arch/powerpc/platforms/powernv/idle.c >> +++ b/arch/powerpc/platforms/powernv/idle.c >> @@ -246,9 +246,9 @@ static inline void atomic_lock_thread_idle(void) >> { >> int cpu = raw_smp_processor_id(); >> int first = cpu_first_thread_sibling(cpu); >> - unsigned long *state = &paca_ptrs[first]->idle_state; >> + unsigned long *lock = &paca_ptrs[first]->idle_lock; >> >> - while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state))) >> + while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock))) >> barrier(); >> } >> >> @@ -258,29 +258,31 @@ static inline void atomic_unlock_and_stop_thread_idle(void) >> int first = cpu_first_thread_sibling(cpu); >> unsigned long thread = 1UL << cpu_thread_in_core(cpu); >> unsigned long *state = &paca_ptrs[first]->idle_state; >> + unsigned long *lock = &paca_ptrs[first]->idle_lock; >> u64 s = READ_ONCE(*state); >> u64 new, tmp; >> >> - BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); >> + BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT)); >> BUG_ON(s & thread); >> >> again: >> - new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; >> + new = s | thread; >> tmp = cmpxchg(state, s, new); >> if (unlikely(tmp != s)) { >> s = tmp; >> goto again; >> } >> + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); > > Sigh, another atomic. It's in a slow path though so I won't get too > upset. Would be nice to add a comment here and revert it when KCSCAN > can be taught about this pattern though, so we don't lose it. > >> } >> >> static inline void atomic_unlock_thread_idle(void) >> { >> int cpu = raw_smp_processor_id(); >> int first = cpu_first_thread_sibling(cpu); >> - unsigned long *state = &paca_ptrs[first]->idle_state; >> + unsigned long *lock = &paca_ptrs[first]->idle_lock; >> >> - BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); >> - clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); >> + BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock)); >> + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); >> } >> >> /* P7 and P8 */ >> @@ -380,9 +382,9 @@ static unsigned long power7_idle_insn(unsigned long type) >> sprs.uamor = mfspr(SPRN_UAMOR); >> } >> >> - local_paca->thread_idle_state = type; >> + WRITE_ONCE(local_paca->thread_idle_state, type); >> srr1 = isa206_idle_insn_mayloss(type); /* go idle */ >> - local_paca->thread_idle_state = PNV_THREAD_RUNNING; >> + WRITE_ONCE(local_paca->thread_idle_state, PNV_THREAD_RUNNING); > > Where is the thread_idle_state concurrency coming from? Yeah, I agree, WRITE_ONCE isn’t necessary here, as all reads of this variable by xmon are purely diagnostic (data races permitted), and the isa206_idle_insn_mayloss() call is a compiler barrier. So write instructions will be emitted on each side of the call. > > Thanks, > Nick
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index da0377f46597..cb325938766a 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -191,6 +191,7 @@ struct paca_struct { #ifdef CONFIG_PPC_POWERNV /* PowerNV idle fields */ /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */ + unsigned long idle_lock; /* A value of 1 means acquired */ unsigned long idle_state; union { /* P7/P8 specific fields */ diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 841cb7f31f4f..97dbb7bc2b00 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -246,9 +246,9 @@ static inline void atomic_lock_thread_idle(void) { int cpu = raw_smp_processor_id(); int first = cpu_first_thread_sibling(cpu); - unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long *lock = &paca_ptrs[first]->idle_lock; - while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state))) + while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock))) barrier(); } @@ -258,29 +258,31 @@ static inline void atomic_unlock_and_stop_thread_idle(void) int first = cpu_first_thread_sibling(cpu); unsigned long thread = 1UL << cpu_thread_in_core(cpu); unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long *lock = &paca_ptrs[first]->idle_lock; u64 s = READ_ONCE(*state); u64 new, tmp; - BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); + BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT)); BUG_ON(s & thread); again: - new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; + new = s | thread; tmp = cmpxchg(state, s, new); if (unlikely(tmp != s)) { s = tmp; goto again; } + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); } static inline void atomic_unlock_thread_idle(void) { int cpu = raw_smp_processor_id(); int first = cpu_first_thread_sibling(cpu); - unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long *lock = &paca_ptrs[first]->idle_lock; - BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); - clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); + BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock)); + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); } /* P7 and P8 */ @@ -380,9 +382,9 @@ static unsigned long power7_idle_insn(unsigned long type) sprs.uamor = mfspr(SPRN_UAMOR); } - local_paca->thread_idle_state = type; + WRITE_ONCE(local_paca->thread_idle_state, type); srr1 = isa206_idle_insn_mayloss(type); /* go idle */ - local_paca->thread_idle_state = PNV_THREAD_RUNNING; + WRITE_ONCE(local_paca->thread_idle_state, PNV_THREAD_RUNNING); WARN_ON_ONCE(!srr1); WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
The idle_state entry in the PACA on PowerNV features a bit which is atomically tested and set through ldarx/stdcx. to be used as a spinlock. This lock then guards access to other bit fields of idle_state. KCSAN cannot differentiate between any of these bitfield accesses as they all are implemented by 8-byte store/load instructions, thus cores contending on the bit-lock appear to data race with modifications to idle_state. Separate the bit-lock entry from the data guarded by the lock to avoid the possibility of data races being detected by KCSAN. Suggested-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Rohan McLure <rmclure@ibm.com> --- arch/powerpc/include/asm/paca.h | 1 + arch/powerpc/platforms/powernv/idle.c | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-)