Message ID | 1481032315-12420-1-git-send-email-stli@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
ping. On 12/06/2016 02:51 PM, Stefan Liebler wrote: > This uses atomic operations to access lock elision metadata that is accessed > concurrently (ie, adapt_count fields). The size of the data is less than a > word but accessed only with atomic loads and stores. > > See also x86 commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a: > "Use C11-like atomics instead of plain memory accesses in x86 lock elision." > > ChangeLog: > > * sysdeps/unix/sysv/linux/s390/elision-lock.c > (__lll_lock_elision): Use atomics to load / store adapt_count. > * sysdeps/unix/sysv/linux/s390/elision-trylock.c > (__lll_trylock_elision): Likewise. > --- > sysdeps/unix/sysv/linux/s390/elision-lock.c | 25 ++++++++++++++++++------- > sysdeps/unix/sysv/linux/s390/elision-trylock.c | 14 +++++++++----- > 2 files changed, 27 insertions(+), 12 deletions(-) > > diff --git a/sysdeps/unix/sysv/linux/s390/elision-lock.c b/sysdeps/unix/sysv/linux/s390/elision-lock.c > index ecb507e..1876d21 100644 > --- a/sysdeps/unix/sysv/linux/s390/elision-lock.c > +++ b/sysdeps/unix/sysv/linux/s390/elision-lock.c > @@ -45,11 +45,18 @@ > int > __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) > { > - if (*adapt_count > 0) > + /* adapt_count can be accessed concurrently; these accesses can be both > + inside of transactions (if critical sections are nested and the outer > + critical section uses lock elision) and outside of transactions. Thus, > + we need to use atomic accesses to avoid data races. However, the > + value of adapt_count is just a hint, so relaxed MO accesses are > + sufficient. */ > + if (atomic_load_relaxed (adapt_count) > 0) > { > /* Lost updates are possible, but harmless. Due to races this might lead > to *adapt_count becoming less than zero. */ > - (*adapt_count)--; > + atomic_store_relaxed (adapt_count, > + atomic_load_relaxed (adapt_count) - 1); > goto use_lock; > } > > @@ -74,8 +81,10 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) > /* In a non-nested transaction there is no need to abort, > which is expensive. */ > __builtin_tend (); > + /* Don't try to use transactions for the next couple of times. > + See above for why relaxed MO is sufficient. */ > if (aconf.skip_lock_busy > 0) > - *adapt_count = aconf.skip_lock_busy; > + atomic_store_relaxed (adapt_count, aconf.skip_lock_busy); > goto use_lock; > } > else /* nesting depth is > 1 */ > @@ -101,18 +110,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) > /* A persistent abort (cc 1 or 3) indicates that a retry is > probably futile. Use the normal locking now and for the > next couple of calls. > - Be careful to avoid writing to the lock. */ > + Be careful to avoid writing to the lock. See above for why > + relaxed MO is sufficient. */ > if (aconf.skip_lock_internal_abort > 0) > - *adapt_count = aconf.skip_lock_internal_abort; > + atomic_store_relaxed (adapt_count, > + aconf.skip_lock_internal_abort); > goto use_lock; > } > } > } > > /* Same logic as above, but for for a number of temporary failures in a > - row. */ > + row. See above for why relaxed MO is sufficient. */ > if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0) > - *adapt_count = aconf.skip_lock_out_of_tbegin_retries; > + atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries); > > use_lock: > return LLL_LOCK ((*futex), private); > diff --git a/sysdeps/unix/sysv/linux/s390/elision-trylock.c b/sysdeps/unix/sysv/linux/s390/elision-trylock.c > index 3d5a994..a3252b8 100644 > --- a/sysdeps/unix/sysv/linux/s390/elision-trylock.c > +++ b/sysdeps/unix/sysv/linux/s390/elision-trylock.c > @@ -49,8 +49,10 @@ __lll_trylock_elision (int *futex, short *adapt_count) > __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1); > } > > - /* Only try a transaction if it's worth it. */ > - if (*adapt_count <= 0) > + /* Only try a transaction if it's worth it. See __lll_lock_elision for > + why we need atomic accesses. Relaxed MO is sufficient because this is > + just a hint. */ > + if (atomic_load_relaxed (adapt_count) <= 0) > { > unsigned status; > > @@ -65,9 +67,10 @@ __lll_trylock_elision (int *futex, short *adapt_count) > __builtin_tend (); > /* Note: Changing the adapt_count here might abort a transaction on a > different cpu, but that could happen anyway when the futex is > - acquired, so there's no need to check the nesting depth here. */ > + acquired, so there's no need to check the nesting depth here. > + See above for why relaxed MO is sufficient. */ > if (aconf.skip_lock_busy > 0) > - *adapt_count = aconf.skip_lock_busy; > + atomic_store_relaxed (adapt_count, aconf.skip_lock_busy); > } > else > { > @@ -87,7 +90,8 @@ __lll_trylock_elision (int *futex, short *adapt_count) > { > /* Lost updates are possible, but harmless. Due to races this might lead > to *adapt_count becoming less than zero. */ > - (*adapt_count)--; > + atomic_store_relaxed (adapt_count, > + atomic_load_relaxed (adapt_count) - 1); > } > > return lll_trylock (*futex); >
On 12/13/2016 09:38 AM, Stefan Liebler wrote: > ping. > Any comments, objections? Otherwise I plan to commit this s390 specific patches next week.
On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
> ping.
Sorry, I've been busy with the robust mutex bugs.
This patch is OK. Thanks.
On 12/16/2016 07:14 PM, Torvald Riegel wrote: > On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote: >> ping. > > Sorry, I've been busy with the robust mutex bugs. > > This patch is OK. Thanks. > Thanks. Committed.
On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote: > On 12/16/2016 07:14 PM, Torvald Riegel wrote: > > On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote: > >> ping. > > > > Sorry, I've been busy with the robust mutex bugs. > > > > This patch is OK. Thanks. > > > Thanks. Committed. I've reviewed and ack'ed patch 1 of 4, but it looks like you committed patches 2-4 as well. Did anyone review these?
On Thu, 2016-12-22 at 10:34 +0100, Torvald Riegel wrote: > On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote: > > On 12/16/2016 07:14 PM, Torvald Riegel wrote: > > > On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote: > > >> ping. > > > > > > Sorry, I've been busy with the robust mutex bugs. > > > > > > This patch is OK. Thanks. > > > > > Thanks. Committed. > > I've reviewed and ack'ed patch 1 of 4, but it looks like you committed > patches 2-4 as well. Did anyone review these? > Ping.
On 12/22/2016 10:34 AM, Torvald Riegel wrote: > On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote: >> On 12/16/2016 07:14 PM, Torvald Riegel wrote: >>> On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote: >>>> ping. >>> >>> Sorry, I've been busy with the robust mutex bugs. >>> >>> This patch is OK. Thanks. >>> >> Thanks. Committed. > > I've reviewed and ack'ed patch 1 of 4, but it looks like you committed > patches 2-4 as well. Did anyone review these? > Sorry. That was my fault. Shall I revert the other patches and continue after the release?
On Wed, 2017-01-11 at 17:06 +0100, Stefan Liebler wrote: > On 12/22/2016 10:34 AM, Torvald Riegel wrote: > > On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote: > >> On 12/16/2016 07:14 PM, Torvald Riegel wrote: > >>> On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote: > >>>> ping. > >>> > >>> Sorry, I've been busy with the robust mutex bugs. > >>> > >>> This patch is OK. Thanks. > >>> > >> Thanks. Committed. > > > > I've reviewed and ack'ed patch 1 of 4, but it looks like you committed > > patches 2-4 as well. Did anyone review these? > > > Sorry. That was my fault. > Shall I revert the other patches and continue after the release? > Let's do the normal review process first. I've reviewed the other 3 by now; maybe we can just fix them up where necessary. Sounds good?
diff --git a/sysdeps/unix/sysv/linux/s390/elision-lock.c b/sysdeps/unix/sysv/linux/s390/elision-lock.c index ecb507e..1876d21 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-lock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-lock.c @@ -45,11 +45,18 @@ int __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) { - if (*adapt_count > 0) + /* adapt_count can be accessed concurrently; these accesses can be both + inside of transactions (if critical sections are nested and the outer + critical section uses lock elision) and outside of transactions. Thus, + we need to use atomic accesses to avoid data races. However, the + value of adapt_count is just a hint, so relaxed MO accesses are + sufficient. */ + if (atomic_load_relaxed (adapt_count) > 0) { /* Lost updates are possible, but harmless. Due to races this might lead to *adapt_count becoming less than zero. */ - (*adapt_count)--; + atomic_store_relaxed (adapt_count, + atomic_load_relaxed (adapt_count) - 1); goto use_lock; } @@ -74,8 +81,10 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) /* In a non-nested transaction there is no need to abort, which is expensive. */ __builtin_tend (); + /* Don't try to use transactions for the next couple of times. + See above for why relaxed MO is sufficient. */ if (aconf.skip_lock_busy > 0) - *adapt_count = aconf.skip_lock_busy; + atomic_store_relaxed (adapt_count, aconf.skip_lock_busy); goto use_lock; } else /* nesting depth is > 1 */ @@ -101,18 +110,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) /* A persistent abort (cc 1 or 3) indicates that a retry is probably futile. Use the normal locking now and for the next couple of calls. - Be careful to avoid writing to the lock. */ + Be careful to avoid writing to the lock. See above for why + relaxed MO is sufficient. */ if (aconf.skip_lock_internal_abort > 0) - *adapt_count = aconf.skip_lock_internal_abort; + atomic_store_relaxed (adapt_count, + aconf.skip_lock_internal_abort); goto use_lock; } } } /* Same logic as above, but for for a number of temporary failures in a - row. */ + row. See above for why relaxed MO is sufficient. */ if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0) - *adapt_count = aconf.skip_lock_out_of_tbegin_retries; + atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries); use_lock: return LLL_LOCK ((*futex), private); diff --git a/sysdeps/unix/sysv/linux/s390/elision-trylock.c b/sysdeps/unix/sysv/linux/s390/elision-trylock.c index 3d5a994..a3252b8 100644 --- a/sysdeps/unix/sysv/linux/s390/elision-trylock.c +++ b/sysdeps/unix/sysv/linux/s390/elision-trylock.c @@ -49,8 +49,10 @@ __lll_trylock_elision (int *futex, short *adapt_count) __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1); } - /* Only try a transaction if it's worth it. */ - if (*adapt_count <= 0) + /* Only try a transaction if it's worth it. See __lll_lock_elision for + why we need atomic accesses. Relaxed MO is sufficient because this is + just a hint. */ + if (atomic_load_relaxed (adapt_count) <= 0) { unsigned status; @@ -65,9 +67,10 @@ __lll_trylock_elision (int *futex, short *adapt_count) __builtin_tend (); /* Note: Changing the adapt_count here might abort a transaction on a different cpu, but that could happen anyway when the futex is - acquired, so there's no need to check the nesting depth here. */ + acquired, so there's no need to check the nesting depth here. + See above for why relaxed MO is sufficient. */ if (aconf.skip_lock_busy > 0) - *adapt_count = aconf.skip_lock_busy; + atomic_store_relaxed (adapt_count, aconf.skip_lock_busy); } else { @@ -87,7 +90,8 @@ __lll_trylock_elision (int *futex, short *adapt_count) { /* Lost updates are possible, but harmless. Due to races this might lead to *adapt_count becoming less than zero. */ - (*adapt_count)--; + atomic_store_relaxed (adapt_count, + atomic_load_relaxed (adapt_count) - 1); } return lll_trylock (*futex);