@@ -201,7 +201,6 @@ static bool __attribute__ ((unused))
__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
unsigned int *g1index, int private)
{
- const unsigned int maxspin = 0;
unsigned int g1 = *g1index;
/* If there is no waiter in G2, we don't do anything. The expression may
@@ -222,84 +221,46 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
* New waiters arriving concurrently with the group switching will all go
into G2 until we atomically make the switch. Waiters existing in G2
are not affected.
- * Waiters in G1 will be closed out immediately by setting a flag in
- __g_signals, which will prevent waiters from blocking using a futex on
- __g_signals and also notifies them that the group is closed. As a
- result, they will eventually remove their group reference, allowing us
- to close switch group roles. */
-
- /* First, set the closed flag on __g_signals. This tells waiters that are
- about to wait that they shouldn't do that anymore. This basically
- serves as an advance notificaton of the upcoming change to __g1_start;
- waiters interpret it as if __g1_start was larger than their waiter
- sequence position. This allows us to change __g1_start after waiting
- for all existing waiters with group references to leave, which in turn
- makes recovery after stealing a signal simpler because it then can be
- skipped if __g1_start indicates that the group is closed (otherwise,
- we would have to recover always because waiters don't know how big their
- groups are). Relaxed MO is fine. */
- atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, 1);
-
- /* Wait until there are no group references anymore. The fetch-or operation
- injects us into the modification order of __g_refs; release MO ensures
- that waiters incrementing __g_refs after our fetch-or see the previous
- changes to __g_signals and to __g1_start that had to happen before we can
- switch this G1 and alias with an older group (we have two groups, so
- aliasing requires switching group roles twice). Note that nobody else
- can have set the wake-request flag, so we do not have to act upon it.
-
- Also note that it is harmless if older waiters or waiters from this G1
- get a group reference after we have quiesced the group because it will
- remain closed for them either because of the closed flag in __g_signals
- or the later update to __g1_start. New waiters will never arrive here
- but instead continue to go into the still current G2. */
- unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, 0);
- while ((r >> 1) > 0)
- {
- for (unsigned int spin = maxspin; ((r >> 1) > 0) && (spin > 0); spin--)
- {
- /* TODO Back off. */
- r = atomic_load_relaxed (cond->__data.__g_refs + g1);
- }
- if ((r >> 1) > 0)
- {
- /* There is still a waiter after spinning. Set the wake-request
- flag and block. Relaxed MO is fine because this is just about
- this futex word.
-
- Update r to include the set wake-request flag so that the upcoming
- futex_wait only blocks if the flag is still set (otherwise, we'd
- violate the basic client-side futex protocol). */
- r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, 1) | 1;
-
- if ((r >> 1) > 0)
- futex_wait_simple (cond->__data.__g_refs + g1, r, private);
- /* Reload here so we eventually see the most recent value even if we
- do not spin. */
- r = atomic_load_relaxed (cond->__data.__g_refs + g1);
- }
- }
- /* Acquire MO so that we synchronize with the release operation that waiters
- use to decrement __g_refs and thus happen after the waiters we waited
- for. */
- atomic_thread_fence_acquire ();
+ * Waiters in G1 will be closed out immediately by the advancing of
+ __g_signals to the next "lowseq" (low 31 bits of the new g1_start),
+ which will prevent waiters from blocking using a futex on
+ __g_signals since it provides enough signals for all possible
+ remaining waiters. As a result, they can each consume a signal
+ and they will eventually remove their group reference. */
/* Update __g1_start, which finishes closing this group. The value we add
will never be negative because old_orig_size can only be zero when we
switch groups the first time after a condvar was initialized, in which
- case G1 will be at index 1 and we will add a value of 1. See above for
- why this takes place after waiting for quiescence of the group.
+ case G1 will be at index 1 and we will add a value of 1.
Relaxed MO is fine because the change comes with no additional
constraints that others would have to observe. */
__condvar_add_g1_start_relaxed (cond,
(old_orig_size << 1) + (g1 == 1 ? 1 : - 1));
- /* Now reopen the group, thus enabling waiters to again block using the
- futex controlled by __g_signals. Release MO so that observers that see
- no signals (and thus can block) also see the write __g1_start and thus
- that this is now a new group (see __pthread_cond_wait_common for the
- matching acquire MO loads). */
- atomic_store_release (cond->__data.__g_signals + g1, 0);
+ unsigned int lowseq = ((old_g1_start + old_orig_size) << 1) & ~1U;
+
+ /* If any waiters still hold group references (and thus could be blocked),
+ then wake them all up now and prevent any running ones from blocking.
+ This is effectively a catch-all for any possible current or future
+ bugs that can allow the group size to reach 0 before all G1 waiters
+ have been awakened or at least given signals to consume, or any
+ other case that can leave blocked (or about to block) older waiters.. */
+ if ((atomic_fetch_or_release (cond->__data.__g_refs + g1, 0) >> 1) > 0)
+ {
+ /* First advance signals to the end of the group (i.e. enough signals
+ for the entire G1 group) to ensure that waiters which have not
+ yet blocked in the futex will not block.
+ Note that in the vast majority of cases, this should never
+ actually be necessary, since __g_signals will have enough
+ signals for the remaining g_refs waiters. As an optimization,
+ we could check this first before proceeding, although that
+ could still leave the potential for futex lost wakeup bugs
+ if the signal count was non-zero but the futex wakeup
+ was somehow lost. */
+ atomic_store_release (cond->__data.__g_signals + g1, lowseq);
+
+ futex_wake (cond->__data.__g_signals + g1, INT_MAX, private);
+ }
/* At this point, the old G1 is now a valid new G2 (but not in use yet).
No old waiter can neither grab a signal nor acquire a reference without
@@ -311,6 +272,10 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
g1 ^= 1;
*g1index ^= 1;
+ /* Now advance the new G1 g_signals to the new lowseq, giving it
+ an effective signal count of 0 to start. */
+ atomic_store_release (cond->__data.__g_signals + g1, lowseq);
+
/* These values are just observed by signalers, and thus protected by the
lock. */
unsigned int orig_size = wseq - (old_g1_start + old_orig_size);
@@ -238,9 +238,7 @@ __condvar_cleanup_waiting (void *arg)
signaled), and a reference count.
The group reference count is used to maintain the number of waiters that
- are using the group's futex. Before a group can change its role, the
- reference count must show that no waiters are using the futex anymore; this
- prevents ABA issues on the futex word.
+ are using the group's futex.
To represent which intervals in the waiter sequence the groups cover (and
thus also which group slot contains G1 or G2), we use a 64b counter to
@@ -300,11 +298,12 @@ __condvar_cleanup_waiting (void *arg)
last reference.
* Reference count used by waiters concurrently with signalers that have
acquired the condvar-internal lock.
- __g_signals: The number of signals that can still be consumed.
+ __g_signals: The number of signals that can still be consumed, relative to
+ the current g1_start. (i.e. bits 31 to 1 of __g_signals are bits
+ 31 to 1 of g1_start with the signal count added)
* Used as a futex word by waiters. Used concurrently by waiters and
signalers.
- * LSB is true iff this group has been completely signaled (i.e., it is
- closed).
+ * LSB is currently reserved and 0.
__g_size: Waiters remaining in this group (i.e., which have not been
signaled yet.
* Accessed by signalers and waiters that cancel waiting (both do so only
@@ -328,18 +327,6 @@ __condvar_cleanup_waiting (void *arg)
sufficient because if a waiter can see a sufficiently large value, it could
have also consume a signal in the waiters group.
- Waiters try to grab a signal from __g_signals without holding a reference
- count, which can lead to stealing a signal from a more recent group after
- their own group was already closed. They cannot always detect whether they
- in fact did because they do not know when they stole, but they can
- conservatively add a signal back to the group they stole from; if they
- did so unnecessarily, all that happens is a spurious wake-up. To make this
- even less likely, __g1_start contains the index of the current g2 too,
- which allows waiters to check if there aliasing on the group slots; if
- there wasn't, they didn't steal from the current G1, which means that the
- G1 they stole from must have been already closed and they do not need to
- fix anything.
-
It is essential that the last field in pthread_cond_t is __g_signals[1]:
The previous condvar used a pointer-sized field in pthread_cond_t, so a
PTHREAD_COND_INITIALIZER from that condvar implementation might only
@@ -435,6 +422,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
{
while (1)
{
+ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
+ unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
+
/* Spin-wait first.
Note that spinning first without checking whether a timeout
passed might lead to what looks like a spurious wake-up even
@@ -446,35 +436,45 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
having to compare against the current time seems to be the right
choice from a performance perspective for most use cases. */
unsigned int spin = maxspin;
- while (signals == 0 && spin > 0)
+ while (spin > 0 && ((int)(signals - lowseq) < 2))
{
/* Check that we are not spinning on a group that's already
closed. */
- if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
- goto done;
+ if (seq < (g1_start >> 1))
+ break;
/* TODO Back off. */
/* Reload signals. See above for MO. */
signals = atomic_load_acquire (cond->__data.__g_signals + g);
+ g1_start = __condvar_load_g1_start_relaxed (cond);
+ lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
spin--;
}
- /* If our group will be closed as indicated by the flag on signals,
- don't bother grabbing a signal. */
- if (signals & 1)
- goto done;
-
- /* If there is an available signal, don't block. */
- if (signals != 0)
+ if (seq < (g1_start >> 1))
+ {
+ /* If the group is closed already,
+ then this waiter originally had enough extra signals to
+ consume, up until the time its group was closed. */
+ goto done;
+ }
+
+ /* If there is an available signal, don't block.
+ If __g1_start has advanced at all, then we must be in G1
+ by now, perhaps in the process of switching back to an older
+ G2, but in either case we're allowed to consume the available
+ signal and should not block anymore. */
+ if ((int)(signals - lowseq) >= 2)
break;
/* No signals available after spinning, so prepare to block.
We first acquire a group reference and use acquire MO for that so
that we synchronize with the dummy read-modify-write in
__condvar_quiesce_and_switch_g1 if we read from that. In turn,
- in this case this will make us see the closed flag on __g_signals
- that designates a concurrent attempt to reuse the group's slot.
+ in this case this will make us see the advancement of __g_signals
+ to the upcoming new g1_start that occurs with a concurrent
+ attempt to reuse the group's slot.
We use acquire MO for the __g_signals check to make the
__g1_start check work (see spinning above).
Note that the group reference acquisition will not mask the
@@ -482,15 +482,24 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
an atomic read-modify-write operation and thus extend the release
sequence. */
atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
- if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
- || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
+ signals = atomic_load_acquire (cond->__data.__g_signals + g);
+ g1_start = __condvar_load_g1_start_relaxed (cond);
+ lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
+
+ if (seq < (g1_start >> 1))
{
- /* Our group is closed. Wake up any signalers that might be
- waiting. */
+ /* group is closed already, so don't block */
__condvar_dec_grefs (cond, g, private);
goto done;
}
+ if ((int)(signals - lowseq) >= 2)
+ {
+ /* a signal showed up or G1/G2 switched after we grabbed the refcount */
+ __condvar_dec_grefs (cond, g, private);
+ break;
+ }
+
// Now block.
struct _pthread_cleanup_buffer buffer;
struct _condvar_cleanup_buffer cbuffer;
@@ -501,7 +510,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
__pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
err = __futex_abstimed_wait_cancelable64 (
- cond->__data.__g_signals + g, 0, clockid, abstime, private);
+ cond->__data.__g_signals + g, signals, clockid, abstime, private);
__pthread_cleanup_pop (&buffer, 0);
@@ -524,6 +533,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
signals = atomic_load_acquire (cond->__data.__g_signals + g);
}
+ if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
+ goto done;
}
/* Try to grab a signal. Use acquire MO so that we see an up-to-date value
of __g1_start below (see spinning above for a similar case). In
@@ -532,69 +543,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
&signals, signals - 2));
- /* We consumed a signal but we could have consumed from a more recent group
- that aliased with ours due to being in the same group slot. If this
- might be the case our group must be closed as visible through
- __g1_start. */
- uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
- if (seq < (g1_start >> 1))
- {
- /* We potentially stole a signal from a more recent group but we do not
- know which group we really consumed from.
- We do not care about groups older than current G1 because they are
- closed; we could have stolen from these, but then we just add a
- spurious wake-up for the current groups.
- We will never steal a signal from current G2 that was really intended
- for G2 because G2 never receives signals (until it becomes G1). We
- could have stolen a signal from G2 that was conservatively added by a
- previous waiter that also thought it stole a signal -- but given that
- that signal was added unnecessarily, it's not a problem if we steal
- it.
- Thus, the remaining case is that we could have stolen from the current
- G1, where "current" means the __g1_start value we observed. However,
- if the current G1 does not have the same slot index as we do, we did
- not steal from it and do not need to undo that. This is the reason
- for putting a bit with G2's index into__g1_start as well. */
- if (((g1_start & 1) ^ 1) == g)
- {
- /* We have to conservatively undo our potential mistake of stealing
- a signal. We can stop trying to do that when the current G1
- changes because other spinning waiters will notice this too and
- __condvar_quiesce_and_switch_g1 has checked that there are no
- futex waiters anymore before switching G1.
- Relaxed MO is fine for the __g1_start load because we need to
- merely be able to observe this fact and not have to observe
- something else as well.
- ??? Would it help to spin for a little while to see whether the
- current G1 gets closed? This might be worthwhile if the group is
- small or close to being closed. */
- unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
- while (__condvar_load_g1_start_relaxed (cond) == g1_start)
- {
- /* Try to add a signal. We don't need to acquire the lock
- because at worst we can cause a spurious wake-up. If the
- group is in the process of being closed (LSB is true), this
- has an effect similar to us adding a signal. */
- if (((s & 1) != 0)
- || atomic_compare_exchange_weak_relaxed
- (cond->__data.__g_signals + g, &s, s + 2))
- {
- /* If we added a signal, we also need to add a wake-up on
- the futex. We also need to do that if we skipped adding
- a signal because the group is being closed because
- while __condvar_quiesce_and_switch_g1 could have closed
- the group, it might stil be waiting for futex waiters to
- leave (and one of those waiters might be the one we stole
- the signal from, which cause it to block using the
- futex). */
- futex_wake (cond->__data.__g_signals + g, 1, private);
- break;
- }
- /* TODO Back off. */
- }
- }
- }
-
done:
/* Confirm that we have been woken. We do that before acquiring the mutex