Message ID | 1395789863-5026-2-git-send-email-andi@firstfloor.org |
---|---|
State | New |
Headers | show |
On Tue, 2014-03-25 at 16:24 -0700, Andi Kleen wrote: > From: Andi Kleen <ak@linux.intel.com> > > One difference of the C versions to the assembler wr/rdlock > is that the C compiler saves some registers which are unnecessary > for the fast path in the prologue of the functions. Split the > uncontended fast path out into a separate function. Only when contention is > detected is the full featured function called. This makes > the fast path code (nearly) identical to the assembler version, > and gives uncontended performance within a few cycles. > > nptl/: > 2014-03-25 Andi Kleen <ak@linux.intel.com> > > * pthread_rwlock_rdlock (__pthread_rwlock_rdlock): > Split into __do_pthread_rwlock_rdlock and __pthread_rwlock_rdlock. > * pthread_rwlock_wrlock (__pthread_rwlock_wrlock): > Split into __do_pthread_rwlock_wrlock and __pthread_wrlock_rdlock. > --- > nptl/pthread_rwlock_rdlock.c | 88 ++++++++++++++++++++++++++++++-------------- > nptl/pthread_rwlock_wrlock.c | 59 ++++++++++++++++++++--------- > 2 files changed, 103 insertions(+), 44 deletions(-) > > diff --git a/nptl/pthread_rwlock_rdlock.c b/nptl/pthread_rwlock_rdlock.c > index 3773f7d..a4deed4 100644 > --- a/nptl/pthread_rwlock_rdlock.c > +++ b/nptl/pthread_rwlock_rdlock.c > @@ -24,39 +24,16 @@ > #include <stap-probe.h> > > > -/* Acquire read lock for RWLOCK. */ > -int > -__pthread_rwlock_rdlock (rwlock) > - pthread_rwlock_t *rwlock; > +/* Acquire read lock for RWLOCK. Slow path. */ Double space before end of comment. > +static int __attribute__((noinline)) > +__do_pthread_rwlock_rdlock (pthread_rwlock_t *rwlock) I'd prefer renaming that to __pthread_rwlock_rdlock_slow. Alternatively, we could use the "_full" suffix, as the mutex code is doing. > { > int result = 0; > > - LIBC_PROBE (rdlock_entry, 1, rwlock); > - > - /* Make sure we are alone. */ > - lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); > + /* Lock is taken in caller. */ Double space before end of comment. > > while (1) > { > - /* Get the rwlock if there is no writer... */ > - if (rwlock->__data.__writer == 0 > - /* ...and if either no writer is waiting or we prefer readers. */ > - && (!rwlock->__data.__nr_writers_queued > - || PTHREAD_RWLOCK_PREFER_READER_P (rwlock))) > - { > - /* Increment the reader counter. Avoid overflow. */ > - if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0)) > - { > - /* Overflow on number of readers. */ > - --rwlock->__data.__nr_readers; > - result = EAGAIN; > - } > - else > - LIBC_PROBE (rdlock_acquire_read, 1, rwlock); > - > - break; > - } > - > /* Make sure we are not holding the rwlock as a writer. This is > a deadlock situation we recognize and report. */ > if (__builtin_expect (rwlock->__data.__writer > @@ -88,6 +65,25 @@ __pthread_rwlock_rdlock (rwlock) > lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); > > --rwlock->__data.__nr_readers_queued; > + > + /* Get the rwlock if there is no writer... */ > + if (rwlock->__data.__writer == 0 > + /* ...and if either no writer is waiting or we prefer readers. */ > + && (!rwlock->__data.__nr_writers_queued > + || PTHREAD_RWLOCK_PREFER_READER_P (rwlock))) > + { > + /* Increment the reader counter. Avoid overflow. */ > + if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0)) > + { > + /* Overflow on number of readers. */ > + --rwlock->__data.__nr_readers; > + result = EAGAIN; > + } > + else > + LIBC_PROBE (rdlock_acquire_read, 1, rwlock); > + > + break; > + } > } > > /* We are done, free the lock. */ > @@ -96,5 +92,43 @@ __pthread_rwlock_rdlock (rwlock) > return result; > } > > + > +/* Fast path of acquiring read lock on RWLOCK. */ > + > +int > +__pthread_rwlock_rdlock (pthread_rwlock_t *rwlock) > +{ > + int result = 0; > + > + LIBC_PROBE (rdlock_entry, 1, rwlock); > + > + /* Make sure we are alone. */ > + lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); > + > + /* Get the rwlock if there is no writer... */ > + if (rwlock->__data.__writer == 0 > + /* ...and if either no writer is waiting or we prefer readers. */ > + && (!rwlock->__data.__nr_writers_queued > + || PTHREAD_RWLOCK_PREFER_READER_P (rwlock))) > + { > + /* Increment the reader counter. Avoid overflow. */ > + if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0)) > + { > + /* Overflow on number of readers. */ > + --rwlock->__data.__nr_readers; > + result = EAGAIN; > + } > + else > + LIBC_PROBE (rdlock_acquire_read, 1, rwlock); > + > + /* We are done, free the lock. */ > + lll_unlock (rwlock->__data.__lock, rwlock->__data.__shared); > + > + return result; > + } > + > + return __do_pthread_rwlock_rdlock (rwlock); > +} > + > weak_alias (__pthread_rwlock_rdlock, pthread_rwlock_rdlock) > hidden_def (__pthread_rwlock_rdlock) > diff --git a/nptl/pthread_rwlock_wrlock.c b/nptl/pthread_rwlock_wrlock.c > index 1613d45..2907681 100644 > --- a/nptl/pthread_rwlock_wrlock.c > +++ b/nptl/pthread_rwlock_wrlock.c > @@ -25,29 +25,15 @@ > > > /* Acquire write lock for RWLOCK. */ > -int > -__pthread_rwlock_wrlock (rwlock) > - pthread_rwlock_t *rwlock; > +static int __attribute__((noinline)) > +__do_pthread_rwlock_wrlock (pthread_rwlock_t *rwlock) See above.
diff --git a/nptl/pthread_rwlock_rdlock.c b/nptl/pthread_rwlock_rdlock.c index 3773f7d..a4deed4 100644 --- a/nptl/pthread_rwlock_rdlock.c +++ b/nptl/pthread_rwlock_rdlock.c @@ -24,39 +24,16 @@ #include <stap-probe.h> -/* Acquire read lock for RWLOCK. */ -int -__pthread_rwlock_rdlock (rwlock) - pthread_rwlock_t *rwlock; +/* Acquire read lock for RWLOCK. Slow path. */ +static int __attribute__((noinline)) +__do_pthread_rwlock_rdlock (pthread_rwlock_t *rwlock) { int result = 0; - LIBC_PROBE (rdlock_entry, 1, rwlock); - - /* Make sure we are alone. */ - lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); + /* Lock is taken in caller. */ while (1) { - /* Get the rwlock if there is no writer... */ - if (rwlock->__data.__writer == 0 - /* ...and if either no writer is waiting or we prefer readers. */ - && (!rwlock->__data.__nr_writers_queued - || PTHREAD_RWLOCK_PREFER_READER_P (rwlock))) - { - /* Increment the reader counter. Avoid overflow. */ - if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0)) - { - /* Overflow on number of readers. */ - --rwlock->__data.__nr_readers; - result = EAGAIN; - } - else - LIBC_PROBE (rdlock_acquire_read, 1, rwlock); - - break; - } - /* Make sure we are not holding the rwlock as a writer. This is a deadlock situation we recognize and report. */ if (__builtin_expect (rwlock->__data.__writer @@ -88,6 +65,25 @@ __pthread_rwlock_rdlock (rwlock) lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); --rwlock->__data.__nr_readers_queued; + + /* Get the rwlock if there is no writer... */ + if (rwlock->__data.__writer == 0 + /* ...and if either no writer is waiting or we prefer readers. */ + && (!rwlock->__data.__nr_writers_queued + || PTHREAD_RWLOCK_PREFER_READER_P (rwlock))) + { + /* Increment the reader counter. Avoid overflow. */ + if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0)) + { + /* Overflow on number of readers. */ + --rwlock->__data.__nr_readers; + result = EAGAIN; + } + else + LIBC_PROBE (rdlock_acquire_read, 1, rwlock); + + break; + } } /* We are done, free the lock. */ @@ -96,5 +92,43 @@ __pthread_rwlock_rdlock (rwlock) return result; } + +/* Fast path of acquiring read lock on RWLOCK. */ + +int +__pthread_rwlock_rdlock (pthread_rwlock_t *rwlock) +{ + int result = 0; + + LIBC_PROBE (rdlock_entry, 1, rwlock); + + /* Make sure we are alone. */ + lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); + + /* Get the rwlock if there is no writer... */ + if (rwlock->__data.__writer == 0 + /* ...and if either no writer is waiting or we prefer readers. */ + && (!rwlock->__data.__nr_writers_queued + || PTHREAD_RWLOCK_PREFER_READER_P (rwlock))) + { + /* Increment the reader counter. Avoid overflow. */ + if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0)) + { + /* Overflow on number of readers. */ + --rwlock->__data.__nr_readers; + result = EAGAIN; + } + else + LIBC_PROBE (rdlock_acquire_read, 1, rwlock); + + /* We are done, free the lock. */ + lll_unlock (rwlock->__data.__lock, rwlock->__data.__shared); + + return result; + } + + return __do_pthread_rwlock_rdlock (rwlock); +} + weak_alias (__pthread_rwlock_rdlock, pthread_rwlock_rdlock) hidden_def (__pthread_rwlock_rdlock) diff --git a/nptl/pthread_rwlock_wrlock.c b/nptl/pthread_rwlock_wrlock.c index 1613d45..2907681 100644 --- a/nptl/pthread_rwlock_wrlock.c +++ b/nptl/pthread_rwlock_wrlock.c @@ -25,29 +25,15 @@ /* Acquire write lock for RWLOCK. */ -int -__pthread_rwlock_wrlock (rwlock) - pthread_rwlock_t *rwlock; +static int __attribute__((noinline)) +__do_pthread_rwlock_wrlock (pthread_rwlock_t *rwlock) { int result = 0; - LIBC_PROBE (wrlock_entry, 1, rwlock); - - /* Make sure we are alone. */ - lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); + /* Caller has taken the lock. */ while (1) { - /* Get the rwlock if there is no writer and no reader. */ - if (rwlock->__data.__writer == 0 && rwlock->__data.__nr_readers == 0) - { - /* Mark self as writer. */ - rwlock->__data.__writer = THREAD_GETMEM (THREAD_SELF, tid); - - LIBC_PROBE (wrlock_acquire_write, 1, rwlock); - break; - } - /* Make sure we are not holding the rwlock as a writer. This is a deadlock situation we recognize and report. */ if (__builtin_expect (rwlock->__data.__writer @@ -80,6 +66,16 @@ __pthread_rwlock_wrlock (rwlock) /* To start over again, remove the thread from the writer list. */ --rwlock->__data.__nr_writers_queued; + + /* Get the rwlock if there is no writer and no reader. */ + if (rwlock->__data.__writer == 0 && rwlock->__data.__nr_readers == 0) + { + /* Mark self as writer. */ + rwlock->__data.__writer = THREAD_GETMEM (THREAD_SELF, tid); + + LIBC_PROBE (wrlock_acquire_write, 1, rwlock); + break; + } } /* We are done, free the lock. */ @@ -88,5 +84,34 @@ __pthread_rwlock_wrlock (rwlock) return result; } +/* Fast path of acquiring write lock for RWLOCK. */ + +int +__pthread_rwlock_wrlock (pthread_rwlock_t *rwlock) +{ + LIBC_PROBE (wrlock_entry, 1, rwlock); + + /* Make sure we are alone. */ + lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); + + /* Get the rwlock if there is no writer and no reader. */ + if (__glibc_likely((rwlock->__data.__writer | + rwlock->__data.__nr_readers) == 0)) + { + /* Mark self as writer. */ + rwlock->__data.__writer = THREAD_GETMEM (THREAD_SELF, tid); + + LIBC_PROBE (wrlock_acquire_write, 1, rwlock); + + /* We are done, free the lock. */ + lll_unlock (rwlock->__data.__lock, rwlock->__data.__shared); + + return 0; + } + + return __do_pthread_rwlock_wrlock (rwlock); +} + + weak_alias (__pthread_rwlock_wrlock, pthread_rwlock_wrlock) hidden_def (__pthread_rwlock_wrlock)
From: Andi Kleen <ak@linux.intel.com> One difference of the C versions to the assembler wr/rdlock is that the C compiler saves some registers which are unnecessary for the fast path in the prologue of the functions. Split the uncontended fast path out into a separate function. Only when contention is detected is the full featured function called. This makes the fast path code (nearly) identical to the assembler version, and gives uncontended performance within a few cycles. nptl/: 2014-03-25 Andi Kleen <ak@linux.intel.com> * pthread_rwlock_rdlock (__pthread_rwlock_rdlock): Split into __do_pthread_rwlock_rdlock and __pthread_rwlock_rdlock. * pthread_rwlock_wrlock (__pthread_rwlock_wrlock): Split into __do_pthread_rwlock_wrlock and __pthread_wrlock_rdlock. --- nptl/pthread_rwlock_rdlock.c | 88 ++++++++++++++++++++++++++++++-------------- nptl/pthread_rwlock_wrlock.c | 59 ++++++++++++++++++++--------- 2 files changed, 103 insertions(+), 44 deletions(-)