Message ID | 5715D04E.9050009@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Hi Xinhui, On Tue, Apr 19, 2016 at 02:29:34PM +0800, Pan Xinhui wrote: > From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > > Implement xchg{u8,u16}{local,relaxed}, and > cmpxchg{u8,u16}{,local,acquire,relaxed}. > > It works on all ppc. > Nice work! AFAICT, your work doesn't depend on anything that ppc-specific, right? So maybe we can use it as a general approach for a fallback implementation on the archs without u8/u16 atomics. ;-) > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > --- > change from V1: > rework totally. > --- > arch/powerpc/include/asm/cmpxchg.h | 83 ++++++++++++++++++++++++++++++++++++++ > 1 file changed, 83 insertions(+) > > diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h > index 44efe73..79a1f45 100644 > --- a/arch/powerpc/include/asm/cmpxchg.h > +++ b/arch/powerpc/include/asm/cmpxchg.h > @@ -7,6 +7,37 @@ > #include <asm/asm-compat.h> > #include <linux/bug.h> > > +#ifdef __BIG_ENDIAN > +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) > +#else > +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE) > +#endif > + > +static __always_inline unsigned long > +__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, > + unsigned long new); > + > +#define __XCHG_GEN(cmp, type, sfx, u32sfx, skip, v) \ > +static __always_inline u32 \ > +__##cmp##xchg_##type##sfx(v void *ptr, u32 old, u32 new) \ > +{ \ > + int size = sizeof (type); \ > + int off = (unsigned long)ptr % sizeof(u32); \ > + volatile u32 *p = ptr - off; \ > + int bitoff = BITOFF_CAL(size, off); \ > + u32 bitmask = ((0x1 << size * BITS_PER_BYTE) - 1) << bitoff; \ > + u32 oldv, newv; \ > + u32 ret; \ > + do { \ > + oldv = READ_ONCE(*p); \ > + ret = (oldv & bitmask) >> bitoff; \ > + if (skip && ret != old) \ > + break; \ > + newv = (oldv & ~bitmask) | (new << bitoff); \ > + } while (__cmpxchg_u32##u32sfx((v void*)p, oldv, newv) != oldv);\ Forgive me if this is too paranoid, but I think we can save the READ_ONCE() in the loop if we change the code into the following, because cmpxchg will return the "new" value, if the cmp part fails. newv = READ_ONCE(*p); do { oldv = newv; ret = (oldv & bitmask) >> bitoff; if (skip && ret != old) break; newv = (oldv & ~bitmask) | (new << bitoff); newv = __cmpxchg_u32##u32sfx((void *)p, oldv, newv); } while(newv != oldv); > + return ret; \ > +} > + > /* > * Atomic exchange > * > @@ -14,6 +45,19 @@ > * the previous value stored there. > */ > > +#define XCHG_GEN(type, sfx, v) \ > + __XCHG_GEN(_, type, sfx, _local, 0, v) \ ^^^^^^^ This should be sfx, right? Otherwise, all the newly added xchg will call __cmpxchg_u32_local, this will result in wrong ordering guarantees. > +static __always_inline u32 __xchg_##type##sfx(v void *p, u32 n) \ > +{ \ > + return ___xchg_##type##sfx(p, 0, n); \ > +} > + > +XCHG_GEN(u8, _local, volatile); I don't think we need the "volatile" modifier here, because READ_ONCE() and __cmpxchg_u32_* all have "volatile" semantics IIUC, so maybe we can save a paramter for the __XCHG_GEN macro. Regards, Boqun > +XCHG_GEN(u8, _relaxed, ); > +XCHG_GEN(u16, _local, volatile); > +XCHG_GEN(u16, _relaxed, ); > +#undef XCHG_GEN > + > static __always_inline unsigned long > __xchg_u32_local(volatile void *p, unsigned long val) > { > @@ -88,6 +132,10 @@ static __always_inline unsigned long > __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) > { > switch (size) { > + case 1: > + return __xchg_u8_local(ptr, x); > + case 2: > + return __xchg_u16_local(ptr, x); > case 4: > return __xchg_u32_local(ptr, x); > #ifdef CONFIG_PPC64 > @@ -103,6 +151,10 @@ static __always_inline unsigned long > __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) > { > switch (size) { > + case 1: > + return __xchg_u8_relaxed(ptr, x); > + case 2: > + return __xchg_u16_relaxed(ptr, x); > case 4: > return __xchg_u32_relaxed(ptr, x); > #ifdef CONFIG_PPC64 > @@ -226,6 +278,21 @@ __cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new) > return prev; > } > > + > +#define CMPXCHG_GEN(type, sfx, v) \ > + __XCHG_GEN(cmp, type, sfx, sfx, 1, v) > + > +CMPXCHG_GEN(u8, , volatile); > +CMPXCHG_GEN(u8, _local, volatile); > +CMPXCHG_GEN(u8, _relaxed, ); > +CMPXCHG_GEN(u8, _acquire, ); > +CMPXCHG_GEN(u16, , volatile); > +CMPXCHG_GEN(u16, _local, volatile); > +CMPXCHG_GEN(u16, _relaxed, ); > +CMPXCHG_GEN(u16, _acquire, ); > +#undef CMPXCHG_GEN > +#undef __XCHG_GEN > + > #ifdef CONFIG_PPC64 > static __always_inline unsigned long > __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) > @@ -316,6 +383,10 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, > unsigned int size) > { > switch (size) { > + case 1: > + return __cmpxchg_u8(ptr, old, new); > + case 2: > + return __cmpxchg_u16(ptr, old, new); > case 4: > return __cmpxchg_u32(ptr, old, new); > #ifdef CONFIG_PPC64 > @@ -332,6 +403,10 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, > unsigned int size) > { > switch (size) { > + case 1: > + return __cmpxchg_u8_local(ptr, old, new); > + case 2: > + return __cmpxchg_u16_local(ptr, old, new); > case 4: > return __cmpxchg_u32_local(ptr, old, new); > #ifdef CONFIG_PPC64 > @@ -348,6 +423,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, > unsigned int size) > { > switch (size) { > + case 1: > + return __cmpxchg_u8_relaxed(ptr, old, new); > + case 2: > + return __cmpxchg_u16_relaxed(ptr, old, new); > case 4: > return __cmpxchg_u32_relaxed(ptr, old, new); > #ifdef CONFIG_PPC64 > @@ -364,6 +443,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, > unsigned int size) > { > switch (size) { > + case 1: > + return __cmpxchg_u8_acquire(ptr, old, new); > + case 2: > + return __cmpxchg_u16_acquire(ptr, old, new); > case 4: > return __cmpxchg_u32_acquire(ptr, old, new); > #ifdef CONFIG_PPC64 > -- > 2.4.3 >
Hello, boqun On 2016年04月19日 17:18, Boqun Feng wrote: > Hi Xinhui, > > On Tue, Apr 19, 2016 at 02:29:34PM +0800, Pan Xinhui wrote: >> From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> >> >> Implement xchg{u8,u16}{local,relaxed}, and >> cmpxchg{u8,u16}{,local,acquire,relaxed}. >> >> It works on all ppc. >> > > Nice work! > thank you. > AFAICT, your work doesn't depend on anything that ppc-specific, right? > So maybe we can use it as a general approach for a fallback > implementation on the archs without u8/u16 atomics. ;-) > >> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> >> Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> >> --- >> change from V1: >> rework totally. >> --- >> arch/powerpc/include/asm/cmpxchg.h | 83 ++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 83 insertions(+) >> >> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h >> index 44efe73..79a1f45 100644 >> --- a/arch/powerpc/include/asm/cmpxchg.h >> +++ b/arch/powerpc/include/asm/cmpxchg.h >> @@ -7,6 +7,37 @@ >> #include <asm/asm-compat.h> >> #include <linux/bug.h> >> >> +#ifdef __BIG_ENDIAN >> +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) >> +#else >> +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE) >> +#endif >> + >> +static __always_inline unsigned long >> +__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, >> + unsigned long new); >> + >> +#define __XCHG_GEN(cmp, type, sfx, u32sfx, skip, v) \ >> +static __always_inline u32 \ >> +__##cmp##xchg_##type##sfx(v void *ptr, u32 old, u32 new) \ >> +{ \ >> + int size = sizeof (type); \ >> + int off = (unsigned long)ptr % sizeof(u32); \ >> + volatile u32 *p = ptr - off; \ >> + int bitoff = BITOFF_CAL(size, off); \ >> + u32 bitmask = ((0x1 << size * BITS_PER_BYTE) - 1) << bitoff; \ >> + u32 oldv, newv; \ >> + u32 ret; \ >> + do { \ >> + oldv = READ_ONCE(*p); \ >> + ret = (oldv & bitmask) >> bitoff; \ >> + if (skip && ret != old) \ >> + break; \ >> + newv = (oldv & ~bitmask) | (new << bitoff); \ >> + } while (__cmpxchg_u32##u32sfx((v void*)p, oldv, newv) != oldv);\ > > Forgive me if this is too paranoid, but I think we can save the > READ_ONCE() in the loop if we change the code into the following, > because cmpxchg will return the "new" value, if the cmp part fails. > > newv = READ_ONCE(*p); > > do { > oldv = newv; > ret = (oldv & bitmask) >> bitoff; > if (skip && ret != old) > break; > newv = (oldv & ~bitmask) | (new << bitoff); > newv = __cmpxchg_u32##u32sfx((void *)p, oldv, newv); > } while(newv != oldv); > >> + return ret; \ >> +} a little optimization. Patch V3 will include your code, thanks. >> + >> /* >> * Atomic exchange >> * >> @@ -14,6 +45,19 @@ >> * the previous value stored there. >> */ >> >> +#define XCHG_GEN(type, sfx, v) \ >> + __XCHG_GEN(_, type, sfx, _local, 0, v) \ > ^^^^^^^ > > This should be sfx, right? Otherwise, all the newly added xchg will > call __cmpxchg_u32_local, this will result in wrong ordering guarantees. > I mean that. But I will think of the ordering issue for a while. :) >> +static __always_inline u32 __xchg_##type##sfx(v void *p, u32 n) \ >> +{ \ >> + return ___xchg_##type##sfx(p, 0, n); \ >> +} >> + >> +XCHG_GEN(u8, _local, volatile); > > I don't think we need the "volatile" modifier here, because READ_ONCE() > and __cmpxchg_u32_* all have "volatile" semantics IIUC, so maybe we can > save a paramter for the __XCHG_GEN macro. > such cleanup work can be done in separated patch. Here I just make the compiler happy. thanks xinhui > Regards, > Boqun > >> +XCHG_GEN(u8, _relaxed, ); >> +XCHG_GEN(u16, _local, volatile); >> +XCHG_GEN(u16, _relaxed, ); >> +#undef XCHG_GEN >> + >> static __always_inline unsigned long >> __xchg_u32_local(volatile void *p, unsigned long val) >> { >> @@ -88,6 +132,10 @@ static __always_inline unsigned long >> __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) >> { >> switch (size) { >> + case 1: >> + return __xchg_u8_local(ptr, x); >> + case 2: >> + return __xchg_u16_local(ptr, x); >> case 4: >> return __xchg_u32_local(ptr, x); >> #ifdef CONFIG_PPC64 >> @@ -103,6 +151,10 @@ static __always_inline unsigned long >> __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) >> { >> switch (size) { >> + case 1: >> + return __xchg_u8_relaxed(ptr, x); >> + case 2: >> + return __xchg_u16_relaxed(ptr, x); >> case 4: >> return __xchg_u32_relaxed(ptr, x); >> #ifdef CONFIG_PPC64 >> @@ -226,6 +278,21 @@ __cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new) >> return prev; >> } >> >> + >> +#define CMPXCHG_GEN(type, sfx, v) \ >> + __XCHG_GEN(cmp, type, sfx, sfx, 1, v) >> + >> +CMPXCHG_GEN(u8, , volatile); >> +CMPXCHG_GEN(u8, _local, volatile); >> +CMPXCHG_GEN(u8, _relaxed, ); >> +CMPXCHG_GEN(u8, _acquire, ); >> +CMPXCHG_GEN(u16, , volatile); >> +CMPXCHG_GEN(u16, _local, volatile); >> +CMPXCHG_GEN(u16, _relaxed, ); >> +CMPXCHG_GEN(u16, _acquire, ); >> +#undef CMPXCHG_GEN >> +#undef __XCHG_GEN >> + >> #ifdef CONFIG_PPC64 >> static __always_inline unsigned long >> __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) >> @@ -316,6 +383,10 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, >> unsigned int size) >> { >> switch (size) { >> + case 1: >> + return __cmpxchg_u8(ptr, old, new); >> + case 2: >> + return __cmpxchg_u16(ptr, old, new); >> case 4: >> return __cmpxchg_u32(ptr, old, new); >> #ifdef CONFIG_PPC64 >> @@ -332,6 +403,10 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, >> unsigned int size) >> { >> switch (size) { >> + case 1: >> + return __cmpxchg_u8_local(ptr, old, new); >> + case 2: >> + return __cmpxchg_u16_local(ptr, old, new); >> case 4: >> return __cmpxchg_u32_local(ptr, old, new); >> #ifdef CONFIG_PPC64 >> @@ -348,6 +423,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, >> unsigned int size) >> { >> switch (size) { >> + case 1: >> + return __cmpxchg_u8_relaxed(ptr, old, new); >> + case 2: >> + return __cmpxchg_u16_relaxed(ptr, old, new); >> case 4: >> return __cmpxchg_u32_relaxed(ptr, old, new); >> #ifdef CONFIG_PPC64 >> @@ -364,6 +443,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, >> unsigned int size) >> { >> switch (size) { >> + case 1: >> + return __cmpxchg_u8_acquire(ptr, old, new); >> + case 2: >> + return __cmpxchg_u16_acquire(ptr, old, new); >> case 4: >> return __cmpxchg_u32_acquire(ptr, old, new); >> #ifdef CONFIG_PPC64 >> -- >> 2.4.3 >> >
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 44efe73..79a1f45 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -7,6 +7,37 @@ #include <asm/asm-compat.h> #include <linux/bug.h> +#ifdef __BIG_ENDIAN +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) +#else +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE) +#endif + +static __always_inline unsigned long +__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, + unsigned long new); + +#define __XCHG_GEN(cmp, type, sfx, u32sfx, skip, v) \ +static __always_inline u32 \ +__##cmp##xchg_##type##sfx(v void *ptr, u32 old, u32 new) \ +{ \ + int size = sizeof (type); \ + int off = (unsigned long)ptr % sizeof(u32); \ + volatile u32 *p = ptr - off; \ + int bitoff = BITOFF_CAL(size, off); \ + u32 bitmask = ((0x1 << size * BITS_PER_BYTE) - 1) << bitoff; \ + u32 oldv, newv; \ + u32 ret; \ + do { \ + oldv = READ_ONCE(*p); \ + ret = (oldv & bitmask) >> bitoff; \ + if (skip && ret != old) \ + break; \ + newv = (oldv & ~bitmask) | (new << bitoff); \ + } while (__cmpxchg_u32##u32sfx((v void*)p, oldv, newv) != oldv);\ + return ret; \ +} + /* * Atomic exchange * @@ -14,6 +45,19 @@ * the previous value stored there. */ +#define XCHG_GEN(type, sfx, v) \ + __XCHG_GEN(_, type, sfx, _local, 0, v) \ +static __always_inline u32 __xchg_##type##sfx(v void *p, u32 n) \ +{ \ + return ___xchg_##type##sfx(p, 0, n); \ +} + +XCHG_GEN(u8, _local, volatile); +XCHG_GEN(u8, _relaxed, ); +XCHG_GEN(u16, _local, volatile); +XCHG_GEN(u16, _relaxed, ); +#undef XCHG_GEN + static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) { @@ -88,6 +132,10 @@ static __always_inline unsigned long __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) { switch (size) { + case 1: + return __xchg_u8_local(ptr, x); + case 2: + return __xchg_u16_local(ptr, x); case 4: return __xchg_u32_local(ptr, x); #ifdef CONFIG_PPC64 @@ -103,6 +151,10 @@ static __always_inline unsigned long __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) { switch (size) { + case 1: + return __xchg_u8_relaxed(ptr, x); + case 2: + return __xchg_u16_relaxed(ptr, x); case 4: return __xchg_u32_relaxed(ptr, x); #ifdef CONFIG_PPC64 @@ -226,6 +278,21 @@ __cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new) return prev; } + +#define CMPXCHG_GEN(type, sfx, v) \ + __XCHG_GEN(cmp, type, sfx, sfx, 1, v) + +CMPXCHG_GEN(u8, , volatile); +CMPXCHG_GEN(u8, _local, volatile); +CMPXCHG_GEN(u8, _relaxed, ); +CMPXCHG_GEN(u8, _acquire, ); +CMPXCHG_GEN(u16, , volatile); +CMPXCHG_GEN(u16, _local, volatile); +CMPXCHG_GEN(u16, _relaxed, ); +CMPXCHG_GEN(u16, _acquire, ); +#undef CMPXCHG_GEN +#undef __XCHG_GEN + #ifdef CONFIG_PPC64 static __always_inline unsigned long __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) @@ -316,6 +383,10 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8(ptr, old, new); + case 2: + return __cmpxchg_u16(ptr, old, new); case 4: return __cmpxchg_u32(ptr, old, new); #ifdef CONFIG_PPC64 @@ -332,6 +403,10 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_local(ptr, old, new); + case 2: + return __cmpxchg_u16_local(ptr, old, new); case 4: return __cmpxchg_u32_local(ptr, old, new); #ifdef CONFIG_PPC64 @@ -348,6 +423,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_relaxed(ptr, old, new); + case 2: + return __cmpxchg_u16_relaxed(ptr, old, new); case 4: return __cmpxchg_u32_relaxed(ptr, old, new); #ifdef CONFIG_PPC64 @@ -364,6 +443,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_acquire(ptr, old, new); + case 2: + return __cmpxchg_u16_acquire(ptr, old, new); case 4: return __cmpxchg_u32_acquire(ptr, old, new); #ifdef CONFIG_PPC64