commit abdf7e3d264e56898f474405602205279895d9b6
Author: Stefan Liebler <stli@linux.vnet.ibm.com>
Date: Thu Jan 19 10:44:18 2017 +0100
S390: Adjust lock elision code after review.
This patch adjusts s390 specific lock elision code after review
of the following patches:
-S390: Use own tbegin macro instead of __builtin_tbegin.
(8bfc4a2ab4bebdf86c151665aae8a266e2f18fb4)
-S390: Use new __libc_tbegin_retry macro in elision-lock.c.
(53c5c3d5ac238901c13f28a73ba05b0678094e80)
-S390: Optimize lock-elision by decrementing adapt_count at unlock.
(dd037fb3df286b7c2d0b0c6f8d02a2dd8a8e8a08)
The futex value is not tested before starting a transaction,
__glibc_likely is used instead of __builtin_expect and comments
are adjusted.
ChangeLog:
* sysdeps/unix/sysv/linux/s390/htm.h: Adjust comments.
* sysdeps/unix/sysv/linux/s390/elision-unlock.c: Likewise.
* sysdeps/unix/sysv/linux/s390/elision-lock.c: Adjust comments.
(__lll_lock_elision): Do not test futex before starting a
transaction. Use __glibc_likely instead of __builtin_expect.
* sysdeps/unix/sysv/linux/s390/elision-trylock.c: Adjust comments.
(__lll_trylock_elision): Do not test futex before starting a
transaction. Use __glibc_likely instead of __builtin_expect.
@@ -50,27 +50,28 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
critical section uses lock elision) and outside of transactions. Thus,
we need to use atomic accesses to avoid data races. However, the
value of adapt_count is just a hint, so relaxed MO accesses are
- sufficient.
- Do not begin a transaction if another cpu has locked the
- futex with normal locking. If adapt_count is zero, it remains and the
- next pthread_mutex_lock call will try to start a transaction again. */
- if (atomic_load_relaxed (futex) == 0
- && atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0)
+ sufficient. */
+ if (atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0)
{
+ /* Start a transaction and retry it automatically if it aborts with
+ _HTM_TBEGIN_TRANSIENT. This macro calls tbegin at most retry_cnt
+ + 1 times. The second argument is considered as retry_cnt. */
int status = __libc_tbegin_retry ((void *) 0, aconf.try_tbegin - 1);
- if (__builtin_expect (status == _HTM_TBEGIN_STARTED,
- _HTM_TBEGIN_STARTED))
+ if (__glibc_likely (status == _HTM_TBEGIN_STARTED))
{
/* Check the futex to make sure nobody has touched it in the
mean time. This forces the futex into the cache and makes
- sure the transaction aborts if some other cpu uses the
- lock (writes the futex). */
- if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1))
+ sure the transaction aborts if another thread acquires the lock
+ concurrently. */
+ if (__glibc_likely (atomic_load_relaxed (futex) == 0))
/* Lock was free. Return to user code in a transaction. */
return 0;
- /* Lock was busy. Fall back to normal locking. */
- if (__builtin_expect (__libc_tx_nesting_depth () <= 1, 1))
+ /* Lock was busy. Fall back to normal locking.
+ This can be the case if e.g. adapt_count was decremented to zero
+ by a former release and another thread has been waken up and
+ acquired it. */
+ if (__glibc_likely (__libc_tx_nesting_depth () <= 1))
{
/* In a non-nested transaction there is no need to abort,
which is expensive. Simply end the started transaction. */
@@ -118,6 +119,7 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
}
}
- /* Use normal locking as fallback path if transaction does not succeed. */
+ /* Use normal locking as fallback path if the transaction does not
+ succeed. */
return LLL_LOCK ((*futex), private);
}
@@ -51,31 +51,29 @@ __lll_trylock_elision (int *futex, short *adapt_count)
critical section uses lock elision) and outside of transactions. Thus,
we need to use atomic accesses to avoid data races. However, the
value of adapt_count is just a hint, so relaxed MO accesses are
- sufficient.
- Do not begin a transaction if another cpu has locked the
- futex with normal locking. If adapt_count is zero, it remains and the
- next pthread_mutex_lock call will try to start a transaction again. */
- if (atomic_load_relaxed (futex) == 0
- && atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0)
+ sufficient. */
+ if (atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0)
{
int status = __libc_tbegin ((void *) 0);
- if (__builtin_expect (status == _HTM_TBEGIN_STARTED,
- _HTM_TBEGIN_STARTED))
+ if (__glibc_likely (status == _HTM_TBEGIN_STARTED))
{
/* Check the futex to make sure nobody has touched it in the
mean time. This forces the futex into the cache and makes
- sure the transaction aborts if some other cpu uses the
- lock (writes the futex). */
- if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1))
+ sure the transaction aborts if another thread acquires the lock
+ concurrently. */
+ if (__glibc_likely (atomic_load_relaxed (futex) == 0))
/* Lock was free. Return to user code in a transaction. */
return 0;
- /* Lock was busy. Fall back to normal locking. Since we are in
- a non-nested transaction there is no need to abort, which is
- expensive. Simply end the started transaction. */
+ /* Lock was busy. Fall back to normal locking.
+ This can be the case if e.g. adapt_count was decremented to zero
+ by a former release and another thread has been waken up and
+ acquired it.
+ Since we are in a non-nested transaction there is no need to abort,
+ which is expensive. Simply end the started transaction. */
__libc_tend ();
/* Note: Changing the adapt_count here might abort a transaction on a
- different cpu, but that could happen anyway when the futex is
+ different CPU, but that could happen anyway when the futex is
acquired, so there's no need to check the nesting depth here.
See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_busy > 0)
@@ -93,6 +91,7 @@ __lll_trylock_elision (int *futex, short *adapt_count)
/* Could do some retries here. */
}
- /* Use normal locking as fallback path if transaction does not succeed. */
+ /* Use normal locking as fallback path if the transaction does not
+ succeed. */
return lll_trylock (*futex);
}
@@ -26,8 +26,12 @@ __lll_unlock_elision(int *futex, short *adapt_count, int private)
/* If the lock is free, we elided the lock earlier. This does not
necessarily mean that we are in a transaction, because the user code may
have closed the transaction, but that is impossible to detect reliably.
- Relaxed MO access to futex is sufficient as we only need a hint, if we
- started a transaction or acquired the futex in e.g. elision-lock.c. */
+ Relaxed MO access to futex is sufficient because a correct program
+ will only release a lock it has acquired; therefore, it must either
+ changed the futex word's value to something !=0 or it must have used
+ elision; these are actions by the same thread, so these actions are
+ sequenced-before the relaxed load (and thus also happens-before the
+ relaxed load). Therefore, relaxed MO is sufficient. */
if (atomic_load_relaxed (futex) == 0)
{
__libc_tend ();
@@ -36,17 +40,17 @@ __lll_unlock_elision(int *futex, short *adapt_count, int private)
{
/* Update the adapt_count while unlocking before completing the critical
section. adapt_count is accessed concurrently outside of a
- transaction or an aquired lock e.g. in elision-lock.c so we need to use
- atomic accesses. However, the value of adapt_count is just a hint, so
- relaxed MO accesses are sufficient.
+ transaction or a critical section (e.g. in elision-lock.c). So we need
+ to use atomic accesses. However, the value of adapt_count is just a
+ hint, so relaxed MO accesses are sufficient.
If adapt_count would be decremented while locking, multiple
- CPUs trying to lock the locked mutex will decrement adapt_count to
+ CPUs, trying to lock the acquired mutex, will decrement adapt_count to
zero and another CPU will try to start a transaction, which will be
immediately aborted as the mutex is locked.
- If adapt_count would be decremented while unlocking after completing
- the critical section, possible waiters will be waked up before
- decrementing the adapt_count. Those waked up waiters could have
- destroyed and freed this mutex! */
+ The update of adapt_count is done before releasing the lock as POSIX'
+ mutex destruction requirements disallow accesses to the mutex after it
+ has been released and thus could have been acquired or destroyed by
+ another thread. */
short adapt_count_val = atomic_load_relaxed (adapt_count);
if (adapt_count_val > 0)
atomic_store_relaxed (adapt_count, adapt_count_val - 1);
@@ -119,11 +119,16 @@
ar modification and fp operations. Some \
program-interruptions (e.g. a null \
pointer access) are filtered and the \
- trancsaction will abort. In this case \
+ transaction will abort. In this case \
the normal lock path will execute it \
again and result in a core dump wich does \
now show at tbegin but the real executed \
- instruction. */ \
+ instruction. \
+ However it is not guaranteed that this \
+ retry operate on the same data and thus \
+ may not end in an program-interruption. \
+ Note: This could also be used to probe \
+ memory for being accessible! */ \
"2: tbegin 0, 0xFF0E\n\t" \
/* Branch away in abort case (this is the \
prefered sequence. See PoP in chapter 5 \
@@ -152,7 +157,8 @@
__ret; \
})
-/* These builtins are correct. Use them. */
+/* These builtins are usable in context of glibc lock elision code without any
+ changes. Use them. */
#define __libc_tend() \
({ __asm__ __volatile__ (".machine push\n\t" \
".machinemode \"zarch_nohighgprs\"\n\t" \