Message ID | 1501018343.30433.50.camel@redhat.com |
---|---|
State | New |
Headers | show |
On Wednesday 26 July 2017 03:02 AM, Torvald Riegel wrote: > 65810f0ef05e8c9e333f17a44e77808b163ca298 fixed a robust mutex bug but > introduced BZ 21778: if the CAS used to try to acquire a lock fails, the > expected value is not updated, which breaks other cases in the lock > acquisition loop. The fix is to simply update the expected value with > the value returned by the CAS, which ensures that behavior is as if the > first case with the CAS never happened (if the CAS fails). > > This is a regression introduced in the last release, so it would be good > to get this included in this release. I'll likely be AFK on Thursday, > so please just commit this once it has been approved. Tested on > x86_64-linux. > > > [BZ 21778] > * nptl/pthread_mutex_timedlock.c (__pthread_mutex_timedlock): Update > oldval if the CAS fails. > * nptl/pthread_mutex_lock.c (__pthread_mutex_lock_full): Likewise. > * nptl/tst-mutex7.c (ROBUST, DELAY_NSEC, ROUNDS, N): New. > (tf, do_test): Use them. > * nptl/tst-mutex7robust.c: New file. > * nptl/Makefile (tests): Add new test. > Looks good to me, but I think Carlos is also reviewing this, so please wait for his confirmation before you commit. Siddhesh > > robust-mutex-21778.patch > > > commit 77e7d626fa6d7f8800c9658e65e43d13c5cdc81d > Author: Torvald Riegel <triegel@redhat.com> > Date: Mon Jul 24 22:53:38 2017 +0200 > > Fix oversight in robust mutex lock acquisition. > > 65810f0ef05e8c9e333f17a44e77808b163ca298 fixed a robust mutex bug but > introduced BZ 21778: if the CAS used to try to acquire a lock fails, > the expected value is not updated, which breaks other cases in the lock > acquisition loop. The fix is to simply update the expected value with > the value returned by the CAS, which ensures that behavior is as if the > first case with the CAS never happened. > > 2017-07-25 Torvald Riegel <triegel@redhat.com> > > [BZ 21778] > * nptl/pthread_mutex_timedlock.c (__pthread_mutex_timedlock): Update > oldval if the CAS fails. > * nptl/pthread_mutex_lock.c (__pthread_mutex_lock_full): Likewise. > * nptl/tst-mutex7.c (ROBUST, DELAY_NSEC, ROUNDS, N): New. > (tf, do_test): Use them. > * nptl/tst-mutex7robust.c: New file. > * nptl/Makefile (tests): Add new test. > > diff --git a/nptl/Makefile b/nptl/Makefile > index dd01994..bca09bf 100644 > --- a/nptl/Makefile > +++ b/nptl/Makefile > @@ -230,7 +230,7 @@ LDLIBS-tst-thread_local1 = -lstdc++ > > tests = tst-attr1 tst-attr2 tst-attr3 tst-default-attr \ > tst-mutex1 tst-mutex2 tst-mutex3 tst-mutex4 tst-mutex5 tst-mutex6 \ > - tst-mutex7 tst-mutex9 tst-mutex5a tst-mutex7a \ > + tst-mutex7 tst-mutex9 tst-mutex5a tst-mutex7a tst-mutex7robust \ > tst-mutexpi1 tst-mutexpi2 tst-mutexpi3 tst-mutexpi4 tst-mutexpi5 \ > tst-mutexpi5a tst-mutexpi6 tst-mutexpi7 tst-mutexpi7a \ > tst-mutexpi9 \ > diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c > index b76475b..8c48503 100644 > --- a/nptl/pthread_mutex_lock.c > +++ b/nptl/pthread_mutex_lock.c > @@ -197,11 +197,14 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex) > { > /* Try to acquire the lock through a CAS from 0 (not acquired) to > our TID | assume_other_futex_waiters. */ > - if (__glibc_likely ((oldval == 0) > - && (atomic_compare_and_exchange_bool_acq > - (&mutex->__data.__lock, > - id | assume_other_futex_waiters, 0) == 0))) > - break; > + if (__glibc_likely (oldval == 0)) > + { > + oldval > + = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock, > + id | assume_other_futex_waiters, 0); > + if (__glibc_likely (oldval == 0)) > + break; > + } > > if ((oldval & FUTEX_OWNER_DIED) != 0) > { > diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c > index be53381..d5ec314 100644 > --- a/nptl/pthread_mutex_timedlock.c > +++ b/nptl/pthread_mutex_timedlock.c > @@ -154,11 +154,14 @@ __pthread_mutex_timedlock (pthread_mutex_t *mutex, > { > /* Try to acquire the lock through a CAS from 0 (not acquired) to > our TID | assume_other_futex_waiters. */ > - if (__glibc_likely ((oldval == 0) > - && (atomic_compare_and_exchange_bool_acq > - (&mutex->__data.__lock, > - id | assume_other_futex_waiters, 0) == 0))) > - break; > + if (__glibc_likely (oldval == 0)) > + { > + oldval > + = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock, > + id | assume_other_futex_waiters, 0); > + if (__glibc_likely (oldval == 0)) > + break; > + } > > if ((oldval & FUTEX_OWNER_DIED) != 0) > { > diff --git a/nptl/tst-mutex7.c b/nptl/tst-mutex7.c > index a11afdb..7990409 100644 > --- a/nptl/tst-mutex7.c > +++ b/nptl/tst-mutex7.c > @@ -26,13 +26,22 @@ > #ifndef TYPE > # define TYPE PTHREAD_MUTEX_DEFAULT > #endif > - > +#ifndef ROBUST > +# define ROBUST PTHREAD_MUTEX_STALLED > +#endif > +#ifndef DELAY_NSEC > +# define DELAY_NSEC 11000 > +#endif > +#ifndef ROUNDS > +# define ROUNDS 1000 > +#endif > +#ifndef N > +# define N 100 > +#endif > > static pthread_mutex_t lock; > > > -#define ROUNDS 1000 > -#define N 100 > > > static void * > @@ -40,7 +49,7 @@ tf (void *arg) > { > int nr = (long int) arg; > int cnt; > - struct timespec ts = { .tv_sec = 0, .tv_nsec = 11000 }; > + struct timespec ts = { .tv_sec = 0, .tv_nsec = DELAY_NSEC }; > > for (cnt = 0; cnt < ROUNDS; ++cnt) > { > @@ -56,7 +65,8 @@ tf (void *arg) > return (void *) 1l; > } > > - nanosleep (&ts, NULL); > + if ((ts.tv_sec > 0) || (ts.tv_nsec > 0)) > + nanosleep (&ts, NULL); > } > > return NULL; > @@ -80,6 +90,12 @@ do_test (void) > exit (1); > } > > + if (pthread_mutexattr_setrobust (&a, ROBUST) != 0) > + { > + puts ("mutexattr_setrobust failed"); > + exit (1); > + } > + > #ifdef ENABLE_PI > if (pthread_mutexattr_setprotocol (&a, PTHREAD_PRIO_INHERIT) != 0) > { > diff --git a/nptl/tst-mutex7robust.c b/nptl/tst-mutex7robust.c > new file mode 100644 > index 0000000..73eb66d > --- /dev/null > +++ b/nptl/tst-mutex7robust.c > @@ -0,0 +1,6 @@ > +#define TYPE PTHREAD_MUTEX_NORMAL > +#define ROBUST PTHREAD_MUTEX_ROBUST > +#define DELAY_NSEC 0 > +#define ROUNDS 1000 > +#define N 32 > +#include "tst-mutex7.c" >
On 07/28/2017 08:41 AM, Siddhesh Poyarekar wrote: > On Wednesday 26 July 2017 03:02 AM, Torvald Riegel wrote: >> 65810f0ef05e8c9e333f17a44e77808b163ca298 fixed a robust mutex bug but >> introduced BZ 21778: if the CAS used to try to acquire a lock fails, the >> expected value is not updated, which breaks other cases in the lock >> acquisition loop. The fix is to simply update the expected value with >> the value returned by the CAS, which ensures that behavior is as if the >> first case with the CAS never happened (if the CAS fails). >> >> This is a regression introduced in the last release, so it would be good >> to get this included in this release. I'll likely be AFK on Thursday, >> so please just commit this once it has been approved. Tested on >> x86_64-linux. >> >> >> [BZ 21778] >> * nptl/pthread_mutex_timedlock.c (__pthread_mutex_timedlock): Update >> oldval if the CAS fails. >> * nptl/pthread_mutex_lock.c (__pthread_mutex_lock_full): Likewise. >> * nptl/tst-mutex7.c (ROBUST, DELAY_NSEC, ROUNDS, N): New. >> (tf, do_test): Use them. >> * nptl/tst-mutex7robust.c: New file. >> * nptl/Makefile (tests): Add new test. >> > > Looks good to me, but I think Carlos is also reviewing this, so please > wait for his confirmation before you commit. So far I've finished confirming this fix on: - x86_64 - i686 - ppc64 With the following targets still lagging: - s390x - aarch64 - ppc64le - armv7hl I'd like to see those 4 last targets complete their test cycle before I commit this change. I've got a v2 which basically just adds some more comments to the testsuite. The bug is 100% reproducible and easily with the small critical section and the lock/unlock sequence of 100 threads, we hit the CAS very quickly, fail to set oldval and everything deadlocks. It's always easier to validate when the failure is in a high contention path, because creating contention is easy :-) Would you be opposed if I commit this slightly *after* the midnight UTC hard freeze? That way I can confirm the other four arches are OK? I expect that with the coverage I have here it won't break anyone else (and the patch is clearly correct, but I would like to really double check). Cheers, Carlos.
On 07/28/2017 06:47 PM, Carlos O'Donell wrote: > On 07/28/2017 08:41 AM, Siddhesh Poyarekar wrote: >> On Wednesday 26 July 2017 03:02 AM, Torvald Riegel wrote: >>> 65810f0ef05e8c9e333f17a44e77808b163ca298 fixed a robust mutex bug but >>> introduced BZ 21778: if the CAS used to try to acquire a lock fails, the >>> expected value is not updated, which breaks other cases in the lock >>> acquisition loop. The fix is to simply update the expected value with >>> the value returned by the CAS, which ensures that behavior is as if the >>> first case with the CAS never happened (if the CAS fails). >>> >>> This is a regression introduced in the last release, so it would be good >>> to get this included in this release. I'll likely be AFK on Thursday, >>> so please just commit this once it has been approved. Tested on >>> x86_64-linux. >>> >>> >>> [BZ 21778] >>> * nptl/pthread_mutex_timedlock.c (__pthread_mutex_timedlock): Update >>> oldval if the CAS fails. >>> * nptl/pthread_mutex_lock.c (__pthread_mutex_lock_full): Likewise. >>> * nptl/tst-mutex7.c (ROBUST, DELAY_NSEC, ROUNDS, N): New. >>> (tf, do_test): Use them. >>> * nptl/tst-mutex7robust.c: New file. >>> * nptl/Makefile (tests): Add new test. >>> >> >> Looks good to me, but I think Carlos is also reviewing this, so please >> wait for his confirmation before you commit. > > So far I've finished confirming this fix on: > - x86_64 > - i686 > - ppc64 > > With the following targets still lagging: > - s390x > - aarch64 > - ppc64le > - armv7hl Done. All 7 machines passed the aggressive regression test and validation. I was able to reproduce the issue on all of the machines and fix it with the patch. Bug 21778 is now fixed. Cheers, Carlos.
On Saturday 29 July 2017 04:17 AM, Carlos O'Donell wrote: > Would you be opposed if I commit this slightly *after* the midnight UTC hard > freeze? That way I can confirm the other four arches are OK? I expect that with > the coverage I have here it won't break anyone else (and the patch is clearly > correct, but I would like to really double check). Sure, this one is already agreed upon. Siddhesh
On 07/29/2017 12:28 AM, Siddhesh Poyarekar wrote: > On Saturday 29 July 2017 04:17 AM, Carlos O'Donell wrote: >> Would you be opposed if I commit this slightly *after* the midnight UTC hard >> freeze? That way I can confirm the other four arches are OK? I expect that with >> the coverage I have here it won't break anyone else (and the patch is clearly >> correct, but I would like to really double check). > > Sure, this one is already agreed upon. Thanks. I committed without your explicit approval, expecting that your review, and my testing were near enough to the freeze to be OK. I promise not to fix anything else ;-) My eyes are tired from code review and multi-machine testing :} Cheers, Carlos.
commit 77e7d626fa6d7f8800c9658e65e43d13c5cdc81d Author: Torvald Riegel <triegel@redhat.com> Date: Mon Jul 24 22:53:38 2017 +0200 Fix oversight in robust mutex lock acquisition. 65810f0ef05e8c9e333f17a44e77808b163ca298 fixed a robust mutex bug but introduced BZ 21778: if the CAS used to try to acquire a lock fails, the expected value is not updated, which breaks other cases in the lock acquisition loop. The fix is to simply update the expected value with the value returned by the CAS, which ensures that behavior is as if the first case with the CAS never happened. 2017-07-25 Torvald Riegel <triegel@redhat.com> [BZ 21778] * nptl/pthread_mutex_timedlock.c (__pthread_mutex_timedlock): Update oldval if the CAS fails. * nptl/pthread_mutex_lock.c (__pthread_mutex_lock_full): Likewise. * nptl/tst-mutex7.c (ROBUST, DELAY_NSEC, ROUNDS, N): New. (tf, do_test): Use them. * nptl/tst-mutex7robust.c: New file. * nptl/Makefile (tests): Add new test. diff --git a/nptl/Makefile b/nptl/Makefile index dd01994..bca09bf 100644 --- a/nptl/Makefile +++ b/nptl/Makefile @@ -230,7 +230,7 @@ LDLIBS-tst-thread_local1 = -lstdc++ tests = tst-attr1 tst-attr2 tst-attr3 tst-default-attr \ tst-mutex1 tst-mutex2 tst-mutex3 tst-mutex4 tst-mutex5 tst-mutex6 \ - tst-mutex7 tst-mutex9 tst-mutex5a tst-mutex7a \ + tst-mutex7 tst-mutex9 tst-mutex5a tst-mutex7a tst-mutex7robust \ tst-mutexpi1 tst-mutexpi2 tst-mutexpi3 tst-mutexpi4 tst-mutexpi5 \ tst-mutexpi5a tst-mutexpi6 tst-mutexpi7 tst-mutexpi7a \ tst-mutexpi9 \ diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c index b76475b..8c48503 100644 --- a/nptl/pthread_mutex_lock.c +++ b/nptl/pthread_mutex_lock.c @@ -197,11 +197,14 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex) { /* Try to acquire the lock through a CAS from 0 (not acquired) to our TID | assume_other_futex_waiters. */ - if (__glibc_likely ((oldval == 0) - && (atomic_compare_and_exchange_bool_acq - (&mutex->__data.__lock, - id | assume_other_futex_waiters, 0) == 0))) - break; + if (__glibc_likely (oldval == 0)) + { + oldval + = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock, + id | assume_other_futex_waiters, 0); + if (__glibc_likely (oldval == 0)) + break; + } if ((oldval & FUTEX_OWNER_DIED) != 0) { diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c index be53381..d5ec314 100644 --- a/nptl/pthread_mutex_timedlock.c +++ b/nptl/pthread_mutex_timedlock.c @@ -154,11 +154,14 @@ __pthread_mutex_timedlock (pthread_mutex_t *mutex, { /* Try to acquire the lock through a CAS from 0 (not acquired) to our TID | assume_other_futex_waiters. */ - if (__glibc_likely ((oldval == 0) - && (atomic_compare_and_exchange_bool_acq - (&mutex->__data.__lock, - id | assume_other_futex_waiters, 0) == 0))) - break; + if (__glibc_likely (oldval == 0)) + { + oldval + = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock, + id | assume_other_futex_waiters, 0); + if (__glibc_likely (oldval == 0)) + break; + } if ((oldval & FUTEX_OWNER_DIED) != 0) { diff --git a/nptl/tst-mutex7.c b/nptl/tst-mutex7.c index a11afdb..7990409 100644 --- a/nptl/tst-mutex7.c +++ b/nptl/tst-mutex7.c @@ -26,13 +26,22 @@ #ifndef TYPE # define TYPE PTHREAD_MUTEX_DEFAULT #endif - +#ifndef ROBUST +# define ROBUST PTHREAD_MUTEX_STALLED +#endif +#ifndef DELAY_NSEC +# define DELAY_NSEC 11000 +#endif +#ifndef ROUNDS +# define ROUNDS 1000 +#endif +#ifndef N +# define N 100 +#endif static pthread_mutex_t lock; -#define ROUNDS 1000 -#define N 100 static void * @@ -40,7 +49,7 @@ tf (void *arg) { int nr = (long int) arg; int cnt; - struct timespec ts = { .tv_sec = 0, .tv_nsec = 11000 }; + struct timespec ts = { .tv_sec = 0, .tv_nsec = DELAY_NSEC }; for (cnt = 0; cnt < ROUNDS; ++cnt) { @@ -56,7 +65,8 @@ tf (void *arg) return (void *) 1l; } - nanosleep (&ts, NULL); + if ((ts.tv_sec > 0) || (ts.tv_nsec > 0)) + nanosleep (&ts, NULL); } return NULL; @@ -80,6 +90,12 @@ do_test (void) exit (1); } + if (pthread_mutexattr_setrobust (&a, ROBUST) != 0) + { + puts ("mutexattr_setrobust failed"); + exit (1); + } + #ifdef ENABLE_PI if (pthread_mutexattr_setprotocol (&a, PTHREAD_PRIO_INHERIT) != 0) { diff --git a/nptl/tst-mutex7robust.c b/nptl/tst-mutex7robust.c new file mode 100644 index 0000000..73eb66d --- /dev/null +++ b/nptl/tst-mutex7robust.c @@ -0,0 +1,6 @@ +#define TYPE PTHREAD_MUTEX_NORMAL +#define ROBUST PTHREAD_MUTEX_ROBUST +#define DELAY_NSEC 0 +#define ROUNDS 1000 +#define N 32 +#include "tst-mutex7.c"