Message ID | CAMXFM3tui-xJ1pGWuK_YJS3XOR+KV4PL7NeKpgPJ_KC9pRARxw@mail.gmail.com |
---|---|
State | New |
Headers | show |
2017-04-05 11:23 GMT+02:00 Andrew Senkevich <andrew.n.senkevich@gmail.com>: > Hi, > > here is the patch to enable RTM lock elision in current rwlock implementation. > > Patch adds enabling itself using existing wrappers macro with new > conditions and fixes typo in elision tuning. > > ChangeLog: > > * nptl/pthread_rwlock_rdlock.c: Add RTM lock elision. > * nptl/pthread_rwlock_tryrdlock.c: Likewise. > * nptl/pthread_rwlock_trywrlock.c: Likewise. > * nptl/pthread_rwlock_wrlock.c: Likewise. > * nptl/pthread_rwlock_unlock.c: Add RTM unlock elision. > * sysdeps/x86/elide.h (ELIDE_LOCK): Fixed typo. > > diff --git a/nptl/pthread_rwlock_rdlock.c b/nptl/pthread_rwlock_rdlock.c > index e07581b..3073dfc 100644 > --- a/nptl/pthread_rwlock_rdlock.c > +++ b/nptl/pthread_rwlock_rdlock.c > @@ -16,6 +16,8 @@ > License along with the GNU C Library; if not, see > <http://www.gnu.org/licenses/>. */ > > +#include <elide.h> > + > #include "pthread_rwlock_common.c" > > /* See pthread_rwlock_common.c. */ > @@ -24,6 +26,12 @@ __pthread_rwlock_rdlock (pthread_rwlock_t *rwlock) > { > LIBC_PROBE (rdlock_entry, 1, rwlock); > > + if (ELIDE_LOCK (rwlock->__data.__rwelision, > + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && > (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 > + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 > + && rwlock->__data.__writers_futex == 0)) > + return 0; > + > int result = __pthread_rwlock_rdlock_full (rwlock, NULL); > LIBC_PROBE (rdlock_acquire_read, 1, rwlock); > return result; > diff --git a/nptl/pthread_rwlock_tryrdlock.c b/nptl/pthread_rwlock_tryrdlock.c > index 6c3014c..7cff149 100644 > --- a/nptl/pthread_rwlock_tryrdlock.c > +++ b/nptl/pthread_rwlock_tryrdlock.c > @@ -20,6 +20,8 @@ > #include "pthreadP.h" > #include <atomic.h> > #include <stdbool.h> > +#include <elide.h> > + > #include "pthread_rwlock_common.c" > > > @@ -27,6 +29,12 @@ > int > __pthread_rwlock_tryrdlock (pthread_rwlock_t *rwlock) > { > + if (ELIDE_TRYLOCK (rwlock->__data.__rwelision, > + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && > (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 > + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 > + && rwlock->__data.__writers_futex, 0)) > + return 0; > + > /* For tryrdlock, we could speculate that we will succeed and go ahead and > register as a reader. However, if we misspeculate, we have to do the > same steps as a timed-out rdlock, which will increase contention. > diff --git a/nptl/pthread_rwlock_trywrlock.c b/nptl/pthread_rwlock_trywrlock.c > index 0d9ccaf..dce32da 100644 > --- a/nptl/pthread_rwlock_trywrlock.c > +++ b/nptl/pthread_rwlock_trywrlock.c > @@ -19,11 +19,18 @@ > #include <errno.h> > #include "pthreadP.h" > #include <atomic.h> > +#include <elide.h> > > /* See pthread_rwlock_common.c for an overview. */ > int > __pthread_rwlock_trywrlock (pthread_rwlock_t *rwlock) > { > + if (ELIDE_TRYLOCK (rwlock->__data.__rwelision, > + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && > (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 > + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 > + && rwlock->__data.__writers_futex, 1)) > + return 0; > + > /* When in a trywrlock, we can acquire the write lock if it is in states > #1 (idle and read phase) and #5 (idle and write phase), and also in #6 > (readers waiting, write phase) if we prefer writers. > diff --git a/nptl/pthread_rwlock_unlock.c b/nptl/pthread_rwlock_unlock.c > index ef46e88..fa4555d 100644 > --- a/nptl/pthread_rwlock_unlock.c > +++ b/nptl/pthread_rwlock_unlock.c > @@ -22,6 +22,7 @@ > #include <pthread.h> > #include <pthreadP.h> > #include <stap-probe.h> > +#include <elide.h> > > #include "pthread_rwlock_common.c" > > @@ -31,6 +32,10 @@ __pthread_rwlock_unlock (pthread_rwlock_t *rwlock) > { > LIBC_PROBE (rwlock_unlock, 1, rwlock); > > + if (ELIDE_UNLOCK ((rwlock->__data.__readers >> > PTHREAD_RWLOCK_READER_SHIFT) == 0 > + && rwlock->__data.__writers_futex == 0)) > + return 0; > + > /* We distinguish between having acquired a read vs. a write lock by looking > at the writer TID. If it's equal to our TID, we must be the writer > because nobody else can have stored this value. Also, if we are a > diff --git a/nptl/pthread_rwlock_wrlock.c b/nptl/pthread_rwlock_wrlock.c > index 335fcd1..d041aad 100644 > --- a/nptl/pthread_rwlock_wrlock.c > +++ b/nptl/pthread_rwlock_wrlock.c > @@ -16,6 +16,8 @@ > License along with the GNU C Library; if not, see > <http://www.gnu.org/licenses/>. */ > > +#include <elide.h> > + > #include "pthread_rwlock_common.c" > > /* See pthread_rwlock_common.c. */ > @@ -24,6 +26,12 @@ __pthread_rwlock_wrlock (pthread_rwlock_t *rwlock) > { > LIBC_PROBE (wrlock_entry, 1, rwlock); > > + if (ELIDE_LOCK (rwlock->__data.__rwelision, > + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && > (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 > + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 > + && rwlock->__data.__writers_futex == 0)) > + return 0; > + > int result = __pthread_rwlock_wrlock_full (rwlock, NULL); > LIBC_PROBE (wrlock_acquire_write, 1, rwlock); > return result; > diff --git a/sysdeps/x86/elide.h b/sysdeps/x86/elide.h > index 53de418..3eaf15c 100644 > --- a/sysdeps/x86/elide.h > +++ b/sysdeps/x86/elide.h > @@ -77,7 +77,7 @@ elision_adapt(signed char *adapt_count, unsigned int status) > } \ > _xabort (_ABORT_LOCK_BUSY); \ > } \ > - if (!elision_adapt (&(adapt_count), status)) \ > + if (elision_adapt (&(adapt_count), status)) \ > break; \ > } \ > } \ > > Performance was measured with bench-multithread.c posted at > https://sourceware.org/ml/libc-alpha/2017-01/msg00168.html on i7-6700 > (Skylake). > > With R=60 and shared W=0 lock elision helps to increase scalability > with some slowdown in single-thread case: > w/o patch: > threads=1 iter/s= 4060317 timing_t=17040157458 > threads=2 iter/s= 6752786 timing_t=17040102928 > threads=3 iter/s= 9392445 timing_t=17040102070 > threads=4 iter/s= 10463599 timing_t=17040103938 > > with patch: > threads=1 iter/s= 3402230 timing_t=17040139858 > threads=2 iter/s= 6784424 timing_t=17040102620 > threads=3 iter/s= 10157296 timing_t=17040103664 > threads=4 iter/s= 13509321 timing_t=17040108804 > > But with R=60 and shared W=1 we have not so good picture: > w/o patch: > threads=1 iter/s= 3939640 timing_t=17040142322 > threads=2 iter/s= 6018330 timing_t=17040101032 > threads=3 iter/s= 7598935 timing_t=17040101858 > threads=4 iter/s= 7873063 timing_t=17040107686 > > with patch: > threads=1 iter/s= 3317374 timing_t=17040140564 > threads=2 iter/s= 4555148 timing_t=17040106910 > threads=3 iter/s= 6583628 timing_t=17040105330 > threads=4 iter/s= 7421028 timing_t=17040101884 > (perf stat -T tells we have 9.44% transactional cycles and 3.84% aborted cycles) > > I have tried to tune lock elision and it change performance but > haven't achieve results better or equal than w/o patch: > > with patch and tuning: > threads=1 iter/s= 3324805 timing_t=17040143918 > threads=2 iter/s= 5360456 timing_t=17040100294 > threads=3 iter/s= 6997841 timing_t=17040107640 > threads=4 iter/s= 7406136 timing_t=17040104018 > (perf stat -T tells we have 5.84% transactional cycles and 0.46% aborted cycles) > > Is condition for wrapper macro enough correct? > Maybe it have sence to make tuning configurable by tunables and have > additional switch for turn it on/off? Ping. Just a note that here are two parts, one part is only typo fix: > * sysdeps/x86/elide.h (ELIDE_LOCK): Fix typo. > > diff --git a/sysdeps/x86/elide.h b/sysdeps/x86/elide.h > index 53de418..3eaf15c 100644 > --- a/sysdeps/x86/elide.h > +++ b/sysdeps/x86/elide.h > @@ -77,7 +77,7 @@ elision_adapt(signed char *adapt_count, unsigned int status) > } \ > _xabort (_ABORT_LOCK_BUSY); \ > } \ > - if (!elision_adapt (&(adapt_count), status)) \ > + if (elision_adapt (&(adapt_count), status)) \ > break; \ > } \ > } \ -- WBR, Andrew
2017-04-05 11:23 GMT+02:00 Andrew Senkevich <andrew.n.senkevich@gmail.com>: > Hi, > > here is the patch to enable RTM lock elision in current rwlock implementation. > > Patch adds enabling itself using existing wrappers macro with new > conditions and fixes typo in elision tuning. > > ChangeLog: > > * nptl/pthread_rwlock_rdlock.c: Add RTM lock elision. > * nptl/pthread_rwlock_tryrdlock.c: Likewise. > * nptl/pthread_rwlock_trywrlock.c: Likewise. > * nptl/pthread_rwlock_wrlock.c: Likewise. > * nptl/pthread_rwlock_unlock.c: Add RTM unlock elision. > * sysdeps/x86/elide.h (ELIDE_LOCK): Fixed typo. > > diff --git a/nptl/pthread_rwlock_rdlock.c b/nptl/pthread_rwlock_rdlock.c > index e07581b..3073dfc 100644 > --- a/nptl/pthread_rwlock_rdlock.c > +++ b/nptl/pthread_rwlock_rdlock.c > @@ -16,6 +16,8 @@ > License along with the GNU C Library; if not, see > <http://www.gnu.org/licenses/>. */ > > +#include <elide.h> > + > #include "pthread_rwlock_common.c" > > /* See pthread_rwlock_common.c. */ > @@ -24,6 +26,12 @@ __pthread_rwlock_rdlock (pthread_rwlock_t *rwlock) > { > LIBC_PROBE (rdlock_entry, 1, rwlock); > > + if (ELIDE_LOCK (rwlock->__data.__rwelision, > + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && > (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 > + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 > + && rwlock->__data.__writers_futex == 0)) > + return 0; > + > int result = __pthread_rwlock_rdlock_full (rwlock, NULL); > LIBC_PROBE (rdlock_acquire_read, 1, rwlock); > return result; > diff --git a/nptl/pthread_rwlock_tryrdlock.c b/nptl/pthread_rwlock_tryrdlock.c > index 6c3014c..7cff149 100644 > --- a/nptl/pthread_rwlock_tryrdlock.c > +++ b/nptl/pthread_rwlock_tryrdlock.c > @@ -20,6 +20,8 @@ > #include "pthreadP.h" > #include <atomic.h> > #include <stdbool.h> > +#include <elide.h> > + > #include "pthread_rwlock_common.c" > > > @@ -27,6 +29,12 @@ > int > __pthread_rwlock_tryrdlock (pthread_rwlock_t *rwlock) > { > + if (ELIDE_TRYLOCK (rwlock->__data.__rwelision, > + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && > (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 > + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 > + && rwlock->__data.__writers_futex, 0)) > + return 0; > + > /* For tryrdlock, we could speculate that we will succeed and go ahead and > register as a reader. However, if we misspeculate, we have to do the > same steps as a timed-out rdlock, which will increase contention. > diff --git a/nptl/pthread_rwlock_trywrlock.c b/nptl/pthread_rwlock_trywrlock.c > index 0d9ccaf..dce32da 100644 > --- a/nptl/pthread_rwlock_trywrlock.c > +++ b/nptl/pthread_rwlock_trywrlock.c > @@ -19,11 +19,18 @@ > #include <errno.h> > #include "pthreadP.h" > #include <atomic.h> > +#include <elide.h> > > /* See pthread_rwlock_common.c for an overview. */ > int > __pthread_rwlock_trywrlock (pthread_rwlock_t *rwlock) > { > + if (ELIDE_TRYLOCK (rwlock->__data.__rwelision, > + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && > (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 > + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 > + && rwlock->__data.__writers_futex, 1)) > + return 0; > + > /* When in a trywrlock, we can acquire the write lock if it is in states > #1 (idle and read phase) and #5 (idle and write phase), and also in #6 > (readers waiting, write phase) if we prefer writers. > diff --git a/nptl/pthread_rwlock_unlock.c b/nptl/pthread_rwlock_unlock.c > index ef46e88..fa4555d 100644 > --- a/nptl/pthread_rwlock_unlock.c > +++ b/nptl/pthread_rwlock_unlock.c > @@ -22,6 +22,7 @@ > #include <pthread.h> > #include <pthreadP.h> > #include <stap-probe.h> > +#include <elide.h> > > #include "pthread_rwlock_common.c" > > @@ -31,6 +32,10 @@ __pthread_rwlock_unlock (pthread_rwlock_t *rwlock) > { > LIBC_PROBE (rwlock_unlock, 1, rwlock); > > + if (ELIDE_UNLOCK ((rwlock->__data.__readers >> > PTHREAD_RWLOCK_READER_SHIFT) == 0 > + && rwlock->__data.__writers_futex == 0)) > + return 0; > + > /* We distinguish between having acquired a read vs. a write lock by looking > at the writer TID. If it's equal to our TID, we must be the writer > because nobody else can have stored this value. Also, if we are a > diff --git a/nptl/pthread_rwlock_wrlock.c b/nptl/pthread_rwlock_wrlock.c > index 335fcd1..d041aad 100644 > --- a/nptl/pthread_rwlock_wrlock.c > +++ b/nptl/pthread_rwlock_wrlock.c > @@ -16,6 +16,8 @@ > License along with the GNU C Library; if not, see > <http://www.gnu.org/licenses/>. */ > > +#include <elide.h> > + > #include "pthread_rwlock_common.c" > > /* See pthread_rwlock_common.c. */ > @@ -24,6 +26,12 @@ __pthread_rwlock_wrlock (pthread_rwlock_t *rwlock) > { > LIBC_PROBE (wrlock_entry, 1, rwlock); > > + if (ELIDE_LOCK (rwlock->__data.__rwelision, > + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && > (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 > + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 > + && rwlock->__data.__writers_futex == 0)) > + return 0; > + > int result = __pthread_rwlock_wrlock_full (rwlock, NULL); > LIBC_PROBE (wrlock_acquire_write, 1, rwlock); > return result; > diff --git a/sysdeps/x86/elide.h b/sysdeps/x86/elide.h > index 53de418..3eaf15c 100644 > --- a/sysdeps/x86/elide.h > +++ b/sysdeps/x86/elide.h > @@ -77,7 +77,7 @@ elision_adapt(signed char *adapt_count, unsigned int status) > } \ > _xabort (_ABORT_LOCK_BUSY); \ > } \ > - if (!elision_adapt (&(adapt_count), status)) \ > + if (elision_adapt (&(adapt_count), status)) \ > break; \ > } \ > } \ > > Performance was measured with bench-multithread.c posted at > https://sourceware.org/ml/libc-alpha/2017-01/msg00168.html on i7-6700 > (Skylake). > > With R=60 and shared W=0 lock elision helps to increase scalability > with some slowdown in single-thread case: > w/o patch: > threads=1 iter/s= 4060317 timing_t=17040157458 > threads=2 iter/s= 6752786 timing_t=17040102928 > threads=3 iter/s= 9392445 timing_t=17040102070 > threads=4 iter/s= 10463599 timing_t=17040103938 > > with patch: > threads=1 iter/s= 3402230 timing_t=17040139858 > threads=2 iter/s= 6784424 timing_t=17040102620 > threads=3 iter/s= 10157296 timing_t=17040103664 > threads=4 iter/s= 13509321 timing_t=17040108804 > > But with R=60 and shared W=1 we have not so good picture: > w/o patch: > threads=1 iter/s= 3939640 timing_t=17040142322 > threads=2 iter/s= 6018330 timing_t=17040101032 > threads=3 iter/s= 7598935 timing_t=17040101858 > threads=4 iter/s= 7873063 timing_t=17040107686 > > with patch: > threads=1 iter/s= 3317374 timing_t=17040140564 > threads=2 iter/s= 4555148 timing_t=17040106910 > threads=3 iter/s= 6583628 timing_t=17040105330 > threads=4 iter/s= 7421028 timing_t=17040101884 > (perf stat -T tells we have 9.44% transactional cycles and 3.84% aborted cycles) > > I have tried to tune lock elision and it change performance but > haven't achieve results better or equal than w/o patch: > > with patch and tuning: > threads=1 iter/s= 3324805 timing_t=17040143918 > threads=2 iter/s= 5360456 timing_t=17040100294 > threads=3 iter/s= 6997841 timing_t=17040107640 > threads=4 iter/s= 7406136 timing_t=17040104018 > (perf stat -T tells we have 5.84% transactional cycles and 0.46% aborted cycles) > > Is condition for wrapper macro enough correct? > Maybe it have sence to make tuning configurable by tunables and have > additional switch for turn it on/off? Ping^2. -- WBR, Andrew
diff --git a/nptl/pthread_rwlock_rdlock.c b/nptl/pthread_rwlock_rdlock.c index e07581b..3073dfc 100644 --- a/nptl/pthread_rwlock_rdlock.c +++ b/nptl/pthread_rwlock_rdlock.c @@ -16,6 +16,8 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#include <elide.h> + #include "pthread_rwlock_common.c" /* See pthread_rwlock_common.c. */ @@ -24,6 +26,12 @@ __pthread_rwlock_rdlock (pthread_rwlock_t *rwlock) { LIBC_PROBE (rdlock_entry, 1, rwlock); + if (ELIDE_LOCK (rwlock->__data.__rwelision, + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 + && rwlock->__data.__writers_futex == 0)) + return 0; + int result = __pthread_rwlock_rdlock_full (rwlock, NULL); LIBC_PROBE (rdlock_acquire_read, 1, rwlock); return result; diff --git a/nptl/pthread_rwlock_tryrdlock.c b/nptl/pthread_rwlock_tryrdlock.c index 6c3014c..7cff149 100644 --- a/nptl/pthread_rwlock_tryrdlock.c +++ b/nptl/pthread_rwlock_tryrdlock.c @@ -20,6 +20,8 @@ #include "pthreadP.h" #include <atomic.h> #include <stdbool.h> +#include <elide.h> + #include "pthread_rwlock_common.c" @@ -27,6 +29,12 @@ int __pthread_rwlock_tryrdlock (pthread_rwlock_t *rwlock) { + if (ELIDE_TRYLOCK (rwlock->__data.__rwelision, + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 + && rwlock->__data.__writers_futex, 0)) + return 0; + /* For tryrdlock, we could speculate that we will succeed and go ahead and register as a reader. However, if we misspeculate, we have to do the same steps as a timed-out rdlock, which will increase contention. diff --git a/nptl/pthread_rwlock_trywrlock.c b/nptl/pthread_rwlock_trywrlock.c index 0d9ccaf..dce32da 100644 --- a/nptl/pthread_rwlock_trywrlock.c +++ b/nptl/pthread_rwlock_trywrlock.c @@ -19,11 +19,18 @@ #include <errno.h> #include "pthreadP.h" #include <atomic.h> +#include <elide.h> /* See pthread_rwlock_common.c for an overview. */ int __pthread_rwlock_trywrlock (pthread_rwlock_t *rwlock) { + if (ELIDE_TRYLOCK (rwlock->__data.__rwelision, + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 + && rwlock->__data.__writers_futex, 1)) + return 0; + /* When in a trywrlock, we can acquire the write lock if it is in states #1 (idle and read phase) and #5 (idle and write phase), and also in #6 (readers waiting, write phase) if we prefer writers. diff --git a/nptl/pthread_rwlock_unlock.c b/nptl/pthread_rwlock_unlock.c index ef46e88..fa4555d 100644 --- a/nptl/pthread_rwlock_unlock.c +++ b/nptl/pthread_rwlock_unlock.c @@ -22,6 +22,7 @@ #include <pthread.h> #include <pthreadP.h> #include <stap-probe.h> +#include <elide.h> #include "pthread_rwlock_common.c" @@ -31,6 +32,10 @@ __pthread_rwlock_unlock (pthread_rwlock_t *rwlock) { LIBC_PROBE (rwlock_unlock, 1, rwlock); + if (ELIDE_UNLOCK ((rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 + && rwlock->__data.__writers_futex == 0)) + return 0; + /* We distinguish between having acquired a read vs. a write lock by looking at the writer TID. If it's equal to our TID, we must be the writer because nobody else can have stored this value. Also, if we are a diff --git a/nptl/pthread_rwlock_wrlock.c b/nptl/pthread_rwlock_wrlock.c index 335fcd1..d041aad 100644 --- a/nptl/pthread_rwlock_wrlock.c +++ b/nptl/pthread_rwlock_wrlock.c @@ -16,6 +16,8 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#include <elide.h> + #include "pthread_rwlock_common.c" /* See pthread_rwlock_common.c. */ @@ -24,6 +26,12 @@ __pthread_rwlock_wrlock (pthread_rwlock_t *rwlock) { LIBC_PROBE (wrlock_entry, 1, rwlock); + if (ELIDE_LOCK (rwlock->__data.__rwelision, + ((rwlock->__data.__readers & PTHREAD_RWLOCK_WRPHASE) && (rwlock->__data.__readers & PTHREAD_RWLOCK_WRLOCKED)) == 0 + && (rwlock->__data.__readers >> PTHREAD_RWLOCK_READER_SHIFT) == 0 + && rwlock->__data.__writers_futex == 0)) + return 0; + int result = __pthread_rwlock_wrlock_full (rwlock, NULL); LIBC_PROBE (wrlock_acquire_write, 1, rwlock); return result; diff --git a/sysdeps/x86/elide.h b/sysdeps/x86/elide.h index 53de418..3eaf15c 100644 --- a/sysdeps/x86/elide.h +++ b/sysdeps/x86/elide.h @@ -77,7 +77,7 @@ elision_adapt(signed char *adapt_count, unsigned int status) } \ _xabort (_ABORT_LOCK_BUSY); \ } \ - if (!elision_adapt (&(adapt_count), status)) \ + if (elision_adapt (&(adapt_count), status)) \ break; \ } \ } \