Message ID | 5720837D.6050807@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
On Wed, Apr 27, 2016 at 05:16:45PM +0800, Pan Xinhui wrote: > From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > > Implement xchg{u8,u16}{local,relaxed}, and > cmpxchg{u8,u16}{,local,acquire,relaxed}. > > It works on all ppc. > > remove volatile of first parameter in __cmpxchg_local and __cmpxchg > > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > --- > change from v3: > rewrite in asm for the LL/SC. > remove volatile in __cmpxchg_local and __cmpxchg. > change from v2: > in the do{}while(), we save one load and use corresponding cmpxchg suffix. > Also add corresponding __cmpxchg_u32 function declaration in the __XCHG_GEN > change from V1: > rework totally. > --- > arch/powerpc/include/asm/cmpxchg.h | 109 ++++++++++++++++++++++++++++++++++++- > 1 file changed, 106 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h > index 44efe73..8a3735f 100644 > --- a/arch/powerpc/include/asm/cmpxchg.h > +++ b/arch/powerpc/include/asm/cmpxchg.h > @@ -7,6 +7,71 @@ > #include <asm/asm-compat.h> > #include <linux/bug.h> > > +#ifdef __BIG_ENDIAN > +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) > +#else > +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE) > +#endif > + > +#define XCHG_GEN(type, sfx, cl) \ > +static inline u32 __xchg_##type##sfx(void *p, u32 val) \ > +{ \ > + unsigned int prev, prev_mask, tmp, bitoff, off; \ > + \ > + off = (unsigned long)p % sizeof(u32); \ > + bitoff = BITOFF_CAL(sizeof(type), off); \ > + p -= off; \ > + val <<= bitoff; \ > + prev_mask = (u32)(type)-1 << bitoff; \ > + \ > + __asm__ __volatile__( \ > +"1: lwarx %0,0,%3\n" \ > +" andc %1,%0,%5\n" \ > +" or %1,%1,%4\n" \ > + PPC405_ERR77(0,%3) \ > +" stwcx. %1,0,%3\n" \ > +" bne- 1b\n" \ > + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ I think we can save the "tmp" here by: __asm__ volatile__( "1: lwarx %0,0,%2\n" " andc %0,%0,%4\n" " or %0,%0,%3\n" PPC405_ERR77(0,%2) " stwcx. %0,0,%2\n" " bne- 1b\n" : "=&r" (prev), "+m" (*(u32*)p) : "r" (p), "r" (val), "r" (prev_mask) : "cc", cl); right? > + : "r" (p), "r" (val), "r" (prev_mask) \ > + : "cc", cl); \ > + \ > + return prev >> bitoff; \ > +} > + > +#define CMPXCHG_GEN(type, sfx, br, br2, cl) \ > +static inline \ > +u32 __cmpxchg_##type##sfx(void *p, u32 old, u32 new) \ > +{ \ > + unsigned int prev, prev_mask, tmp, bitoff, off; \ > + \ > + off = (unsigned long)p % sizeof(u32); \ > + bitoff = BITOFF_CAL(sizeof(type), off); \ > + p -= off; \ > + old <<= bitoff; \ > + new <<= bitoff; \ > + prev_mask = (u32)(type)-1 << bitoff; \ > + \ > + __asm__ __volatile__( \ > + br \ > +"1: lwarx %0,0,%3\n" \ > +" and %1,%0,%6\n" \ > +" cmpw 0,%1,%4\n" \ > +" bne- 2f\n" \ > +" andc %1,%0,%6\n" \ > +" or %1,%1,%5\n" \ > + PPC405_ERR77(0,%3) \ > +" stwcx. %1,0,%3\n" \ > +" bne- 1b\n" \ > + br2 \ > + "\n" \ > +"2:" \ > + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ And "tmp" here could also be saved by: "1: lwarx %0,0,%2\n" \ " xor %3,%0,%3\n" \ " and. %3,%3,%5\n" \ " bne- 2f\n" \ " andc %0,%0,%5\n" \ " or %0,%0,%4\n" \ PPC405_ERR77(0,%2) \ " stwcx. %0,0,%2\n" \ " bne- 1b\n" \ br2 \ "\n" \ "2:" \ : "=&r" (prev), "+m" (*(u32*)p) \ : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \ : "cc", cl); \ right? IIUC, saving the local variable "tmp" will result in saving a general register for the compilers to use for other variables. So thoughts? Regards, Boqun > + : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \ > + : "cc", cl); \ > + \ > + return prev >> bitoff; \ > +} > + > /* > * Atomic exchange > * > @@ -14,6 +79,11 @@ > * the previous value stored there. > */ > > +XCHG_GEN(u8, _local, "memory"); > +XCHG_GEN(u8, _relaxed, "cc"); > +XCHG_GEN(u16, _local, "memory"); > +XCHG_GEN(u16, _relaxed, "cc"); > + > static __always_inline unsigned long > __xchg_u32_local(volatile void *p, unsigned long val) > { > @@ -85,9 +155,13 @@ __xchg_u64_relaxed(u64 *p, unsigned long val) > #endif > > static __always_inline unsigned long > -__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) > +__xchg_local(void *ptr, unsigned long x, unsigned int size) > { > switch (size) { > + case 1: > + return __xchg_u8_local(ptr, x); > + case 2: > + return __xchg_u16_local(ptr, x); > case 4: > return __xchg_u32_local(ptr, x); > #ifdef CONFIG_PPC64 > @@ -103,6 +177,10 @@ static __always_inline unsigned long > __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) > { > switch (size) { > + case 1: > + return __xchg_u8_relaxed(ptr, x); > + case 2: > + return __xchg_u16_relaxed(ptr, x); > case 4: > return __xchg_u32_relaxed(ptr, x); > #ifdef CONFIG_PPC64 > @@ -131,6 +209,15 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) > * and return the old value of *p. > */ > > +CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); > +CMPXCHG_GEN(u8, _local, , , "memory"); > +CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); > +CMPXCHG_GEN(u8, _relaxed, , , "cc"); > +CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); > +CMPXCHG_GEN(u16, _local, , , "memory"); > +CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); > +CMPXCHG_GEN(u16, _relaxed, , , "cc"); > + > static __always_inline unsigned long > __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) > { > @@ -312,10 +399,14 @@ __cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new) > #endif > > static __always_inline unsigned long > -__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, > +__cmpxchg(void *ptr, unsigned long old, unsigned long new, > unsigned int size) > { > switch (size) { > + case 1: > + return __cmpxchg_u8(ptr, old, new); > + case 2: > + return __cmpxchg_u16(ptr, old, new); > case 4: > return __cmpxchg_u32(ptr, old, new); > #ifdef CONFIG_PPC64 > @@ -328,10 +419,14 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, > } > > static __always_inline unsigned long > -__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, > +__cmpxchg_local(void *ptr, unsigned long old, unsigned long new, > unsigned int size) > { > switch (size) { > + case 1: > + return __cmpxchg_u8_local(ptr, old, new); > + case 2: > + return __cmpxchg_u16_local(ptr, old, new); > case 4: > return __cmpxchg_u32_local(ptr, old, new); > #ifdef CONFIG_PPC64 > @@ -348,6 +443,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, > unsigned int size) > { > switch (size) { > + case 1: > + return __cmpxchg_u8_relaxed(ptr, old, new); > + case 2: > + return __cmpxchg_u16_relaxed(ptr, old, new); > case 4: > return __cmpxchg_u32_relaxed(ptr, old, new); > #ifdef CONFIG_PPC64 > @@ -364,6 +463,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, > unsigned int size) > { > switch (size) { > + case 1: > + return __cmpxchg_u8_acquire(ptr, old, new); > + case 2: > + return __cmpxchg_u16_acquire(ptr, old, new); > case 4: > return __cmpxchg_u32_acquire(ptr, old, new); > #ifdef CONFIG_PPC64 > -- > 2.4.3 >
On Wed, Apr 27, 2016 at 09:58:17PM +0800, Boqun Feng wrote: > On Wed, Apr 27, 2016 at 05:16:45PM +0800, Pan Xinhui wrote: > > From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > > > > Implement xchg{u8,u16}{local,relaxed}, and > > cmpxchg{u8,u16}{,local,acquire,relaxed}. > > > > It works on all ppc. > > > > remove volatile of first parameter in __cmpxchg_local and __cmpxchg > > > > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> > > Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > > --- > > change from v3: > > rewrite in asm for the LL/SC. > > remove volatile in __cmpxchg_local and __cmpxchg. > > change from v2: > > in the do{}while(), we save one load and use corresponding cmpxchg suffix. > > Also add corresponding __cmpxchg_u32 function declaration in the __XCHG_GEN > > change from V1: > > rework totally. > > --- > > arch/powerpc/include/asm/cmpxchg.h | 109 ++++++++++++++++++++++++++++++++++++- > > 1 file changed, 106 insertions(+), 3 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h > > index 44efe73..8a3735f 100644 > > --- a/arch/powerpc/include/asm/cmpxchg.h > > +++ b/arch/powerpc/include/asm/cmpxchg.h > > @@ -7,6 +7,71 @@ > > #include <asm/asm-compat.h> > > #include <linux/bug.h> > > > > +#ifdef __BIG_ENDIAN > > +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) > > +#else > > +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE) > > +#endif > > + > > +#define XCHG_GEN(type, sfx, cl) \ > > +static inline u32 __xchg_##type##sfx(void *p, u32 val) \ > > +{ \ > > + unsigned int prev, prev_mask, tmp, bitoff, off; \ > > + \ > > + off = (unsigned long)p % sizeof(u32); \ > > + bitoff = BITOFF_CAL(sizeof(type), off); \ > > + p -= off; \ > > + val <<= bitoff; \ > > + prev_mask = (u32)(type)-1 << bitoff; \ > > + \ > > + __asm__ __volatile__( \ > > +"1: lwarx %0,0,%3\n" \ > > +" andc %1,%0,%5\n" \ > > +" or %1,%1,%4\n" \ > > + PPC405_ERR77(0,%3) \ > > +" stwcx. %1,0,%3\n" \ > > +" bne- 1b\n" \ > > + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ > > I think we can save the "tmp" here by: > > __asm__ volatile__( > "1: lwarx %0,0,%2\n" > " andc %0,%0,%4\n" > " or %0,%0,%3\n" > PPC405_ERR77(0,%2) > " stwcx. %0,0,%2\n" > " bne- 1b\n" > : "=&r" (prev), "+m" (*(u32*)p) > : "r" (p), "r" (val), "r" (prev_mask) > : "cc", cl); > > right? > > > + : "r" (p), "r" (val), "r" (prev_mask) \ > > + : "cc", cl); \ > > + \ > > + return prev >> bitoff; \ > > +} > > + > > +#define CMPXCHG_GEN(type, sfx, br, br2, cl) \ > > +static inline \ > > +u32 __cmpxchg_##type##sfx(void *p, u32 old, u32 new) \ > > +{ \ > > + unsigned int prev, prev_mask, tmp, bitoff, off; \ > > + \ > > + off = (unsigned long)p % sizeof(u32); \ > > + bitoff = BITOFF_CAL(sizeof(type), off); \ > > + p -= off; \ > > + old <<= bitoff; \ > > + new <<= bitoff; \ > > + prev_mask = (u32)(type)-1 << bitoff; \ > > + \ > > + __asm__ __volatile__( \ > > + br \ > > +"1: lwarx %0,0,%3\n" \ > > +" and %1,%0,%6\n" \ > > +" cmpw 0,%1,%4\n" \ > > +" bne- 2f\n" \ > > +" andc %1,%0,%6\n" \ > > +" or %1,%1,%5\n" \ > > + PPC405_ERR77(0,%3) \ > > +" stwcx. %1,0,%3\n" \ > > +" bne- 1b\n" \ > > + br2 \ > > + "\n" \ > > +"2:" \ > > + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ > > And "tmp" here could also be saved by: > > "1: lwarx %0,0,%2\n" \ > " xor %3,%0,%3\n" \ > " and. %3,%3,%5\n" \ > " bne- 2f\n" \ > " andc %0,%0,%5\n" \ > " or %0,%0,%4\n" \ > PPC405_ERR77(0,%2) \ > " stwcx. %0,0,%2\n" \ > " bne- 1b\n" \ > br2 \ > "\n" \ > "2:" \ > : "=&r" (prev), "+m" (*(u32*)p) \ > : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \ > : "cc", cl); \ > Oops, this should be: "1: lwarx %0,0,%3\n" \ " xor %2,%0,%2\n" \ " and. %2,%2,%5\n" \ " bne- 2f\n" \ " andc %0,%0,%5\n" \ " or %0,%0,%4\n" \ PPC405_ERR77(0,%3) \ " stwcx. %0,0,%3\n" \ " bne- 1b\n" \ br2 \ "\n" \ "2:" \ : "=&r" (prev), "+m" (*(u32*)p), "+&r" (old) \ : "r" (p), "r" (new), "r" (prev_mask) \ : "cc", cl); \ Regards, Boqun
On Wed, Apr 27, 2016 at 09:58:17PM +0800, Boqun Feng wrote: > On Wed, Apr 27, 2016 at 05:16:45PM +0800, Pan Xinhui wrote: > > From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > > > > Implement xchg{u8,u16}{local,relaxed}, and > > cmpxchg{u8,u16}{,local,acquire,relaxed}. > > > > It works on all ppc. > > > > remove volatile of first parameter in __cmpxchg_local and __cmpxchg > > > > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> > > Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > > --- > > change from v3: > > rewrite in asm for the LL/SC. > > remove volatile in __cmpxchg_local and __cmpxchg. > > change from v2: > > in the do{}while(), we save one load and use corresponding cmpxchg suffix. > > Also add corresponding __cmpxchg_u32 function declaration in the __XCHG_GEN > > change from V1: > > rework totally. > > --- > > arch/powerpc/include/asm/cmpxchg.h | 109 ++++++++++++++++++++++++++++++++++++- > > 1 file changed, 106 insertions(+), 3 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h > > index 44efe73..8a3735f 100644 > > --- a/arch/powerpc/include/asm/cmpxchg.h > > +++ b/arch/powerpc/include/asm/cmpxchg.h > > @@ -7,6 +7,71 @@ > > #include <asm/asm-compat.h> > > #include <linux/bug.h> > > > > +#ifdef __BIG_ENDIAN > > +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) > > +#else > > +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE) > > +#endif > > + > > +#define XCHG_GEN(type, sfx, cl) \ > > +static inline u32 __xchg_##type##sfx(void *p, u32 val) \ > > +{ \ > > + unsigned int prev, prev_mask, tmp, bitoff, off; \ > > + \ > > + off = (unsigned long)p % sizeof(u32); \ > > + bitoff = BITOFF_CAL(sizeof(type), off); \ > > + p -= off; \ > > + val <<= bitoff; \ > > + prev_mask = (u32)(type)-1 << bitoff; \ > > + \ > > + __asm__ __volatile__( \ > > +"1: lwarx %0,0,%3\n" \ > > +" andc %1,%0,%5\n" \ > > +" or %1,%1,%4\n" \ > > + PPC405_ERR77(0,%3) \ > > +" stwcx. %1,0,%3\n" \ > > +" bne- 1b\n" \ > > + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ > > I think we can save the "tmp" here by: > > __asm__ volatile__( > "1: lwarx %0,0,%2\n" > " andc %0,%0,%4\n" > " or %0,%0,%3\n" > PPC405_ERR77(0,%2) > " stwcx. %0,0,%2\n" > " bne- 1b\n" > : "=&r" (prev), "+m" (*(u32*)p) > : "r" (p), "r" (val), "r" (prev_mask) > : "cc", cl); > > right? > > > + : "r" (p), "r" (val), "r" (prev_mask) \ > > + : "cc", cl); \ > > + \ > > + return prev >> bitoff; \ > > +} > > + > > +#define CMPXCHG_GEN(type, sfx, br, br2, cl) \ > > +static inline \ > > +u32 __cmpxchg_##type##sfx(void *p, u32 old, u32 new) \ > > +{ \ > > + unsigned int prev, prev_mask, tmp, bitoff, off; \ > > + \ > > + off = (unsigned long)p % sizeof(u32); \ > > + bitoff = BITOFF_CAL(sizeof(type), off); \ > > + p -= off; \ > > + old <<= bitoff; \ > > + new <<= bitoff; \ > > + prev_mask = (u32)(type)-1 << bitoff; \ > > + \ > > + __asm__ __volatile__( \ > > + br \ > > +"1: lwarx %0,0,%3\n" \ > > +" and %1,%0,%6\n" \ > > +" cmpw 0,%1,%4\n" \ > > +" bne- 2f\n" \ > > +" andc %1,%0,%6\n" \ > > +" or %1,%1,%5\n" \ > > + PPC405_ERR77(0,%3) \ > > +" stwcx. %1,0,%3\n" \ > > +" bne- 1b\n" \ > > + br2 \ > > + "\n" \ > > +"2:" \ > > + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ > > And "tmp" here could also be saved by: > > "1: lwarx %0,0,%2\n" \ > " xor %3,%0,%3\n" \ > " and. %3,%3,%5\n" \ > " bne- 2f\n" \ > " andc %0,%0,%5\n" \ > " or %0,%0,%4\n" \ > PPC405_ERR77(0,%2) \ > " stwcx. %0,0,%2\n" \ > " bne- 1b\n" \ > br2 \ > "\n" \ > "2:" \ > : "=&r" (prev), "+m" (*(u32*)p) \ > : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \ > : "cc", cl); \ > > right? > Sorry, my bad, we can't implement cmpxchg like this.. please ignore this, I should really go to bed soon... But still, we can save the "tmp" for xchg() I think. Regards, Boqun > IIUC, saving the local variable "tmp" will result in saving a general > register for the compilers to use for other variables. > > So thoughts? > > Regards, > Boqun >
On Wed, Apr 27, 2016 at 10:50:34PM +0800, Boqun Feng wrote: > > Sorry, my bad, we can't implement cmpxchg like this.. please ignore > this, I should really go to bed soon... > > But still, we can save the "tmp" for xchg() I think. > No.. we can't. Sorry for all the noise. This patch looks good to me. FWIW, you can add Acked-by: Boqun Feng <boqun.feng@gmail.com> Regards, Boqun
On Wed, Apr 27, 2016 at 05:16:45PM +0800, Pan Xinhui wrote: > From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > > Implement xchg{u8,u16}{local,relaxed}, and > cmpxchg{u8,u16}{,local,acquire,relaxed}. > > It works on all ppc. > > remove volatile of first parameter in __cmpxchg_local and __cmpxchg > > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> Generally has the right shape; and I trust others to double check the ppc-asm minutia. Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
On 2016年04月27日 22:59, Boqun Feng wrote: > On Wed, Apr 27, 2016 at 10:50:34PM +0800, Boqun Feng wrote: >> >> Sorry, my bad, we can't implement cmpxchg like this.. please ignore >> this, I should really go to bed soon... >> >> But still, we can save the "tmp" for xchg() I think. >> > > No.. we can't. Sorry for all the noise. > > This patch looks good to me. > > FWIW, you can add > > Acked-by: Boqun Feng <boqun.feng@gmail.com> > thanks! > Regards, > Boqun >
On 2016年04月28日 15:59, Peter Zijlstra wrote: > On Wed, Apr 27, 2016 at 05:16:45PM +0800, Pan Xinhui wrote: >> From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> >> >> Implement xchg{u8,u16}{local,relaxed}, and >> cmpxchg{u8,u16}{,local,acquire,relaxed}. >> >> It works on all ppc. >> >> remove volatile of first parameter in __cmpxchg_local and __cmpxchg >> >> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> >> Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > > Generally has the right shape; and I trust others to double check the > ppc-asm minutia. > > Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> > > thanks!
On Wed, 2016-04-27 at 09:16:45 UTC, xinhui wrote: > From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > > Implement xchg{u8,u16}{local,relaxed}, and > cmpxchg{u8,u16}{,local,acquire,relaxed}. > > It works on all ppc. > > remove volatile of first parameter in __cmpxchg_local and __cmpxchg > > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com> > Acked-by: Boqun Feng <boqun.feng@gmail.com> > Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/d0563a1297e234ed37f6b51c2e9321 cheers
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 44efe73..8a3735f 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -7,6 +7,71 @@ #include <asm/asm-compat.h> #include <linux/bug.h> +#ifdef __BIG_ENDIAN +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) +#else +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE) +#endif + +#define XCHG_GEN(type, sfx, cl) \ +static inline u32 __xchg_##type##sfx(void *p, u32 val) \ +{ \ + unsigned int prev, prev_mask, tmp, bitoff, off; \ + \ + off = (unsigned long)p % sizeof(u32); \ + bitoff = BITOFF_CAL(sizeof(type), off); \ + p -= off; \ + val <<= bitoff; \ + prev_mask = (u32)(type)-1 << bitoff; \ + \ + __asm__ __volatile__( \ +"1: lwarx %0,0,%3\n" \ +" andc %1,%0,%5\n" \ +" or %1,%1,%4\n" \ + PPC405_ERR77(0,%3) \ +" stwcx. %1,0,%3\n" \ +" bne- 1b\n" \ + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ + : "r" (p), "r" (val), "r" (prev_mask) \ + : "cc", cl); \ + \ + return prev >> bitoff; \ +} + +#define CMPXCHG_GEN(type, sfx, br, br2, cl) \ +static inline \ +u32 __cmpxchg_##type##sfx(void *p, u32 old, u32 new) \ +{ \ + unsigned int prev, prev_mask, tmp, bitoff, off; \ + \ + off = (unsigned long)p % sizeof(u32); \ + bitoff = BITOFF_CAL(sizeof(type), off); \ + p -= off; \ + old <<= bitoff; \ + new <<= bitoff; \ + prev_mask = (u32)(type)-1 << bitoff; \ + \ + __asm__ __volatile__( \ + br \ +"1: lwarx %0,0,%3\n" \ +" and %1,%0,%6\n" \ +" cmpw 0,%1,%4\n" \ +" bne- 2f\n" \ +" andc %1,%0,%6\n" \ +" or %1,%1,%5\n" \ + PPC405_ERR77(0,%3) \ +" stwcx. %1,0,%3\n" \ +" bne- 1b\n" \ + br2 \ + "\n" \ +"2:" \ + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ + : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \ + : "cc", cl); \ + \ + return prev >> bitoff; \ +} + /* * Atomic exchange * @@ -14,6 +79,11 @@ * the previous value stored there. */ +XCHG_GEN(u8, _local, "memory"); +XCHG_GEN(u8, _relaxed, "cc"); +XCHG_GEN(u16, _local, "memory"); +XCHG_GEN(u16, _relaxed, "cc"); + static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) { @@ -85,9 +155,13 @@ __xchg_u64_relaxed(u64 *p, unsigned long val) #endif static __always_inline unsigned long -__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) +__xchg_local(void *ptr, unsigned long x, unsigned int size) { switch (size) { + case 1: + return __xchg_u8_local(ptr, x); + case 2: + return __xchg_u16_local(ptr, x); case 4: return __xchg_u32_local(ptr, x); #ifdef CONFIG_PPC64 @@ -103,6 +177,10 @@ static __always_inline unsigned long __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) { switch (size) { + case 1: + return __xchg_u8_relaxed(ptr, x); + case 2: + return __xchg_u16_relaxed(ptr, x); case 4: return __xchg_u32_relaxed(ptr, x); #ifdef CONFIG_PPC64 @@ -131,6 +209,15 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) * and return the old value of *p. */ +CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); +CMPXCHG_GEN(u8, _local, , , "memory"); +CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); +CMPXCHG_GEN(u8, _relaxed, , , "cc"); +CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); +CMPXCHG_GEN(u16, _local, , , "memory"); +CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); +CMPXCHG_GEN(u16, _relaxed, , , "cc"); + static __always_inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) { @@ -312,10 +399,14 @@ __cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new) #endif static __always_inline unsigned long -__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, +__cmpxchg(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8(ptr, old, new); + case 2: + return __cmpxchg_u16(ptr, old, new); case 4: return __cmpxchg_u32(ptr, old, new); #ifdef CONFIG_PPC64 @@ -328,10 +419,14 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, } static __always_inline unsigned long -__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, +__cmpxchg_local(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_local(ptr, old, new); + case 2: + return __cmpxchg_u16_local(ptr, old, new); case 4: return __cmpxchg_u32_local(ptr, old, new); #ifdef CONFIG_PPC64 @@ -348,6 +443,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_relaxed(ptr, old, new); + case 2: + return __cmpxchg_u16_relaxed(ptr, old, new); case 4: return __cmpxchg_u32_relaxed(ptr, old, new); #ifdef CONFIG_PPC64 @@ -364,6 +463,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_acquire(ptr, old, new); + case 2: + return __cmpxchg_u16_acquire(ptr, old, new); case 4: return __cmpxchg_u32_acquire(ptr, old, new); #ifdef CONFIG_PPC64