diff mbox

[ovs-dev,v4] dpif-netdev: Remove PMD latency on seq_mutex

Message ID CAExiqTKwYo0OsdOUg_wpEp7iU9VbDx7rC9f8Eh61_BqEshLh5w@mail.gmail.com
State Not Applicable
Headers show

Commit Message

Daniele Di Proietto July 8, 2016, 11:19 p.m. UTC
I applied this to master with the below incremental.

We _usually_ use positive error numbers in int return value.

I think there was an extra COVERAGE_INC(seq_change)

Thanks for the patch!




2016-07-05 6:33 GMT-07:00 Flavio Leitner <fbl@redhat.com>:

> The PMD thread needs to keep processing RX queues in order
> to achieve maximum throughput. It also needs to sweep emc
> cache and quiesce which use seq_mutex. That mutex can
> eventually block the PMD thread causing latency spikes and
> affecting the throughput.
>
> Since there is no requirement for running those tasks at a
> specific time, this patch extend seq API to allow tentative
> locking instead.
>
> Reported-by: Karl Rister <krister@redhat.com>
> Co-authored-by: Karl Rister <krister@redhat.com>
> Signed-off-by: Flavio Leitner <fbl@redhat.com>
> ---
>  lib/dpif-netdev.c |  5 +++--
>  lib/ovs-rcu.c     | 37 +++++++++++++++++++++++++++++++++++--
>  lib/ovs-rcu.h     |  1 +
>  lib/seq.c         | 49 ++++++++++++++++++++++++++++++++++++++++++++++---
>  lib/seq.h         |  5 +++++
>  5 files changed, 90 insertions(+), 7 deletions(-)
>
> v4:
>    - return EBUSY if lock is busy.
>
> v3:
>    - addressed clang annotation feedbacks from Daniele
>    - tested over 4 days without spikes or other issues
>
> v2:
>    - expanded SEQ API instead of using recursive lock.
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 37c2631..dc5b02e 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -2869,9 +2869,10 @@ reload:
>
>              lc = 0;
>
> -            emc_cache_slow_sweep(&pmd->flow_cache);
>              coverage_try_clear();
> -            ovsrcu_quiesce();
> +            if (!ovsrcu_try_quiesce()) {
> +                emc_cache_slow_sweep(&pmd->flow_cache);
> +            }
>
>              atomic_read_relaxed(&pmd->change_seq, &seq);
>              if (seq != port_seq) {
> diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c
> index 0ff508e..7462579 100644
> --- a/lib/ovs-rcu.c
> +++ b/lib/ovs-rcu.c
> @@ -15,6 +15,7 @@
>   */
>
>  #include <config.h>
> +#include <errno.h>
>  #include "ovs-rcu.h"
>  #include "fatal-signal.h"
>  #include "guarded-list.h"
> @@ -57,6 +58,7 @@ static struct guarded_list flushed_cbsets;
>  static struct seq *flushed_cbsets_seq;
>
>  static void ovsrcu_init_module(void);
> +static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool);
>  static void ovsrcu_flush_cbset(struct ovsrcu_perthread *);
>  static void ovsrcu_unregister__(struct ovsrcu_perthread *);
>  static bool ovsrcu_call_postponed(void);
> @@ -151,6 +153,27 @@ ovsrcu_quiesce(void)
>      ovsrcu_quiesced();
>  }
>
> +int
> +ovsrcu_try_quiesce(void)
> +{
> +    struct ovsrcu_perthread *perthread;
> +    int ret = -EBUSY;
> +
> +    ovs_assert(!single_threaded());
> +    perthread = ovsrcu_perthread_get();
> +    if (!seq_try_lock()) {
> +        perthread->seqno = seq_read_protected(global_seqno);
> +        if (perthread->cbset) {
> +            ovsrcu_flush_cbset__(perthread, true);
> +        }
> +        seq_change_protected(global_seqno);
> +        seq_unlock();
> +        ovsrcu_quiesced();
> +        ret = 0;
> +    }
> +    return ret;
> +}
> +
>  bool
>  ovsrcu_is_quiescent(void)
>  {
> @@ -292,7 +315,7 @@ ovsrcu_postpone_thread(void *arg OVS_UNUSED)
>  }
>
>  static void
> -ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread)
> +ovsrcu_flush_cbset__(struct ovsrcu_perthread *perthread, bool protected)
>  {
>      struct ovsrcu_cbset *cbset = perthread->cbset;
>
> @@ -300,11 +323,21 @@ ovsrcu_flush_cbset(struct ovsrcu_perthread
> *perthread)
>          guarded_list_push_back(&flushed_cbsets, &cbset->list_node,
> SIZE_MAX);
>          perthread->cbset = NULL;
>
> -        seq_change(flushed_cbsets_seq);
> +        if (protected) {
> +            seq_change_protected(flushed_cbsets_seq);
> +        } else {
> +            seq_change(flushed_cbsets_seq);
> +        }
>      }
>  }
>
>  static void
> +ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread)
> +{
> +    ovsrcu_flush_cbset__(perthread, false);
> +}
> +
> +static void
>  ovsrcu_unregister__(struct ovsrcu_perthread *perthread)
>  {
>      if (perthread->cbset) {
> diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h
> index 8750ead..dc75749 100644
> --- a/lib/ovs-rcu.h
> +++ b/lib/ovs-rcu.h
> @@ -217,6 +217,7 @@ void ovsrcu_postpone__(void (*function)(void *aux),
> void *aux);
>  void ovsrcu_quiesce_start(void);
>  void ovsrcu_quiesce_end(void);
>  void ovsrcu_quiesce(void);
> +int ovsrcu_try_quiesce(void);
>  bool ovsrcu_is_quiescent(void);
>
>  /* Synchronization.  Waits for all non-quiescent threads to quiesce at
> least
> diff --git a/lib/seq.c b/lib/seq.c
> index 57e0775..4e99c6c 100644
> --- a/lib/seq.c
> +++ b/lib/seq.c
> @@ -100,6 +100,38 @@ seq_destroy(struct seq *seq)
>      ovs_mutex_unlock(&seq_mutex);
>  }
>
> +int
> +seq_try_lock(void)
> +{
> +    return ovs_mutex_trylock(&seq_mutex);
> +}
> +
> +void
> +seq_lock(void)
> +    OVS_ACQUIRES(seq_mutex)
> +{
> +    ovs_mutex_lock(&seq_mutex);
> +}
> +
> +void
> +seq_unlock(void)
> +    OVS_RELEASES(seq_mutex)
> +{
> +    ovs_mutex_unlock(&seq_mutex);
> +}
> +
> +/* Increments 'seq''s sequence number, waking up any threads that are
> waiting
> + * on 'seq'. */
> +void
> +seq_change_protected(struct seq *seq)
> +    OVS_REQUIRES(seq_mutex)
> +{
> +    COVERAGE_INC(seq_change);
> +
> +    seq->value = seq_next++;
> +    seq_wake_waiters(seq);
> +}
> +
>  /* Increments 'seq''s sequence number, waking up any threads that are
> waiting
>   * on 'seq'. */
>  void
> @@ -109,8 +141,7 @@ seq_change(struct seq *seq)
>      COVERAGE_INC(seq_change);
>
>      ovs_mutex_lock(&seq_mutex);
> -    seq->value = seq_next++;
> -    seq_wake_waiters(seq);
> +    seq_change_protected(seq);
>      ovs_mutex_unlock(&seq_mutex);
>  }
>
> @@ -120,13 +151,25 @@ seq_change(struct seq *seq)
>   * when an object changes, even without an ability to lock the object.
> See
>   * Usage in seq.h for details. */
>  uint64_t
> +seq_read_protected(const struct seq *seq)
> +    OVS_REQUIRES(seq_mutex)
> +{
> +    return seq->value;
> +}
> +
> +/* Returns 'seq''s current sequence number (which could change
> immediately).
> + *
> + * seq_read() and seq_wait() can be used together to yield a race-free
> wakeup
> + * when an object changes, even without an ability to lock the object.
> See
> + * Usage in seq.h for details. */
> +uint64_t
>  seq_read(const struct seq *seq)
>      OVS_EXCLUDED(seq_mutex)
>  {
>      uint64_t value;
>
>      ovs_mutex_lock(&seq_mutex);
> -    value = seq->value;
> +    value = seq_read_protected(seq);
>      ovs_mutex_unlock(&seq_mutex);
>
>      return value;
> diff --git a/lib/seq.h b/lib/seq.h
> index f3f4b53..221ab9a 100644
> --- a/lib/seq.h
> +++ b/lib/seq.h
> @@ -121,9 +121,14 @@
>  struct seq *seq_create(void);
>  void seq_destroy(struct seq *);
>  void seq_change(struct seq *);
> +void seq_change_protected(struct seq *);
> +void seq_lock(void);
> +int seq_try_lock(void);
> +void seq_unlock(void);
>
>  /* For observers. */
>  uint64_t seq_read(const struct seq *);
> +uint64_t seq_read_protected(const struct seq *);
>
>  void seq_wait_at(const struct seq *, uint64_t value, const char *where);
>  #define seq_wait(seq, value) seq_wait_at(seq, value, OVS_SOURCE_LOCATOR)
> --
> 2.5.5
>
>

Comments

Flavio Leitner July 11, 2016, 8:21 p.m. UTC | #1
On Fri, Jul 08, 2016 at 04:19:14PM -0700, Daniele Di Proietto wrote:
> I applied this to master with the below incremental.
> 
> We _usually_ use positive error numbers in int return value.
> 
> I think there was an extra COVERAGE_INC(seq_change)
> 
> Thanks for the patch!

Your Changes are good, thanks for fixing it.
Ciara Loftus July 15, 2016, 9:10 a.m. UTC | #2
> 

> I applied this to master with the below incremental.

> 

> We _usually_ use positive error numbers in int return value.

> 

> I think there was an extra COVERAGE_INC(seq_change)

> 

> Thanks for the patch!


Hi,

Could this be backported to the 2.5 branch?

Thanks,
Ciara

> 

> diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c

> index 7462579..8aef1f1 100644

> --- a/lib/ovs-rcu.c

> +++ b/lib/ovs-rcu.c

> @@ -157,7 +157,7 @@ int

>  ovsrcu_try_quiesce(void)

>  {

>      struct ovsrcu_perthread *perthread;

> -    int ret = -EBUSY;

> +    int ret = EBUSY;

> 

>      ovs_assert(!single_threaded());

>      perthread = ovsrcu_perthread_get();

> diff --git a/lib/seq.c b/lib/seq.c

> index 4e99c6c..b8b5b65 100644

> --- a/lib/seq.c

> +++ b/lib/seq.c

> @@ -138,8 +138,6 @@ void

>  seq_change(struct seq *seq)

>      OVS_EXCLUDED(seq_mutex)

>  {

> -    COVERAGE_INC(seq_change);

> -

>      ovs_mutex_lock(&seq_mutex);

>      seq_change_protected(seq);

>      ovs_mutex_unlock(&seq_mutex);

> 

> 

> 

> 2016-07-05 6:33 GMT-07:00 Flavio Leitner <fbl@redhat.com>:

> 

> > The PMD thread needs to keep processing RX queues in order

> > to achieve maximum throughput. It also needs to sweep emc

> > cache and quiesce which use seq_mutex. That mutex can

> > eventually block the PMD thread causing latency spikes and

> > affecting the throughput.

> >

> > Since there is no requirement for running those tasks at a

> > specific time, this patch extend seq API to allow tentative

> > locking instead.

> >

> > Reported-by: Karl Rister <krister@redhat.com>

> > Co-authored-by: Karl Rister <krister@redhat.com>

> > Signed-off-by: Flavio Leitner <fbl@redhat.com>

> > ---

> >  lib/dpif-netdev.c |  5 +++--

> >  lib/ovs-rcu.c     | 37 +++++++++++++++++++++++++++++++++++--

> >  lib/ovs-rcu.h     |  1 +

> >  lib/seq.c         | 49

> ++++++++++++++++++++++++++++++++++++++++++++++---

> >  lib/seq.h         |  5 +++++

> >  5 files changed, 90 insertions(+), 7 deletions(-)

> >

> > v4:

> >    - return EBUSY if lock is busy.

> >

> > v3:

> >    - addressed clang annotation feedbacks from Daniele

> >    - tested over 4 days without spikes or other issues

> >

> > v2:

> >    - expanded SEQ API instead of using recursive lock.

> >

> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c

> > index 37c2631..dc5b02e 100644

> > --- a/lib/dpif-netdev.c

> > +++ b/lib/dpif-netdev.c

> > @@ -2869,9 +2869,10 @@ reload:

> >

> >              lc = 0;

> >

> > -            emc_cache_slow_sweep(&pmd->flow_cache);

> >              coverage_try_clear();

> > -            ovsrcu_quiesce();

> > +            if (!ovsrcu_try_quiesce()) {

> > +                emc_cache_slow_sweep(&pmd->flow_cache);

> > +            }

> >

> >              atomic_read_relaxed(&pmd->change_seq, &seq);

> >              if (seq != port_seq) {

> > diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c

> > index 0ff508e..7462579 100644

> > --- a/lib/ovs-rcu.c

> > +++ b/lib/ovs-rcu.c

> > @@ -15,6 +15,7 @@

> >   */

> >

> >  #include <config.h>

> > +#include <errno.h>

> >  #include "ovs-rcu.h"

> >  #include "fatal-signal.h"

> >  #include "guarded-list.h"

> > @@ -57,6 +58,7 @@ static struct guarded_list flushed_cbsets;

> >  static struct seq *flushed_cbsets_seq;

> >

> >  static void ovsrcu_init_module(void);

> > +static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool);

> >  static void ovsrcu_flush_cbset(struct ovsrcu_perthread *);

> >  static void ovsrcu_unregister__(struct ovsrcu_perthread *);

> >  static bool ovsrcu_call_postponed(void);

> > @@ -151,6 +153,27 @@ ovsrcu_quiesce(void)

> >      ovsrcu_quiesced();

> >  }

> >

> > +int

> > +ovsrcu_try_quiesce(void)

> > +{

> > +    struct ovsrcu_perthread *perthread;

> > +    int ret = -EBUSY;

> > +

> > +    ovs_assert(!single_threaded());

> > +    perthread = ovsrcu_perthread_get();

> > +    if (!seq_try_lock()) {

> > +        perthread->seqno = seq_read_protected(global_seqno);

> > +        if (perthread->cbset) {

> > +            ovsrcu_flush_cbset__(perthread, true);

> > +        }

> > +        seq_change_protected(global_seqno);

> > +        seq_unlock();

> > +        ovsrcu_quiesced();

> > +        ret = 0;

> > +    }

> > +    return ret;

> > +}

> > +

> >  bool

> >  ovsrcu_is_quiescent(void)

> >  {

> > @@ -292,7 +315,7 @@ ovsrcu_postpone_thread(void *arg OVS_UNUSED)

> >  }

> >

> >  static void

> > -ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread)

> > +ovsrcu_flush_cbset__(struct ovsrcu_perthread *perthread, bool

> protected)

> >  {

> >      struct ovsrcu_cbset *cbset = perthread->cbset;

> >

> > @@ -300,11 +323,21 @@ ovsrcu_flush_cbset(struct ovsrcu_perthread

> > *perthread)

> >          guarded_list_push_back(&flushed_cbsets, &cbset->list_node,

> > SIZE_MAX);

> >          perthread->cbset = NULL;

> >

> > -        seq_change(flushed_cbsets_seq);

> > +        if (protected) {

> > +            seq_change_protected(flushed_cbsets_seq);

> > +        } else {

> > +            seq_change(flushed_cbsets_seq);

> > +        }

> >      }

> >  }

> >

> >  static void

> > +ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread)

> > +{

> > +    ovsrcu_flush_cbset__(perthread, false);

> > +}

> > +

> > +static void

> >  ovsrcu_unregister__(struct ovsrcu_perthread *perthread)

> >  {

> >      if (perthread->cbset) {

> > diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h

> > index 8750ead..dc75749 100644

> > --- a/lib/ovs-rcu.h

> > +++ b/lib/ovs-rcu.h

> > @@ -217,6 +217,7 @@ void ovsrcu_postpone__(void (*function)(void

> *aux),

> > void *aux);

> >  void ovsrcu_quiesce_start(void);

> >  void ovsrcu_quiesce_end(void);

> >  void ovsrcu_quiesce(void);

> > +int ovsrcu_try_quiesce(void);

> >  bool ovsrcu_is_quiescent(void);

> >

> >  /* Synchronization.  Waits for all non-quiescent threads to quiesce at

> > least

> > diff --git a/lib/seq.c b/lib/seq.c

> > index 57e0775..4e99c6c 100644

> > --- a/lib/seq.c

> > +++ b/lib/seq.c

> > @@ -100,6 +100,38 @@ seq_destroy(struct seq *seq)

> >      ovs_mutex_unlock(&seq_mutex);

> >  }

> >

> > +int

> > +seq_try_lock(void)

> > +{

> > +    return ovs_mutex_trylock(&seq_mutex);

> > +}

> > +

> > +void

> > +seq_lock(void)

> > +    OVS_ACQUIRES(seq_mutex)

> > +{

> > +    ovs_mutex_lock(&seq_mutex);

> > +}

> > +

> > +void

> > +seq_unlock(void)

> > +    OVS_RELEASES(seq_mutex)

> > +{

> > +    ovs_mutex_unlock(&seq_mutex);

> > +}

> > +

> > +/* Increments 'seq''s sequence number, waking up any threads that are

> > waiting

> > + * on 'seq'. */

> > +void

> > +seq_change_protected(struct seq *seq)

> > +    OVS_REQUIRES(seq_mutex)

> > +{

> > +    COVERAGE_INC(seq_change);

> > +

> > +    seq->value = seq_next++;

> > +    seq_wake_waiters(seq);

> > +}

> > +

> >  /* Increments 'seq''s sequence number, waking up any threads that are

> > waiting

> >   * on 'seq'. */

> >  void

> > @@ -109,8 +141,7 @@ seq_change(struct seq *seq)

> >      COVERAGE_INC(seq_change);

> >

> >      ovs_mutex_lock(&seq_mutex);

> > -    seq->value = seq_next++;

> > -    seq_wake_waiters(seq);

> > +    seq_change_protected(seq);

> >      ovs_mutex_unlock(&seq_mutex);

> >  }

> >

> > @@ -120,13 +151,25 @@ seq_change(struct seq *seq)

> >   * when an object changes, even without an ability to lock the object.

> > See

> >   * Usage in seq.h for details. */

> >  uint64_t

> > +seq_read_protected(const struct seq *seq)

> > +    OVS_REQUIRES(seq_mutex)

> > +{

> > +    return seq->value;

> > +}

> > +

> > +/* Returns 'seq''s current sequence number (which could change

> > immediately).

> > + *

> > + * seq_read() and seq_wait() can be used together to yield a race-free

> > wakeup

> > + * when an object changes, even without an ability to lock the object.

> > See

> > + * Usage in seq.h for details. */

> > +uint64_t

> >  seq_read(const struct seq *seq)

> >      OVS_EXCLUDED(seq_mutex)

> >  {

> >      uint64_t value;

> >

> >      ovs_mutex_lock(&seq_mutex);

> > -    value = seq->value;

> > +    value = seq_read_protected(seq);

> >      ovs_mutex_unlock(&seq_mutex);

> >

> >      return value;

> > diff --git a/lib/seq.h b/lib/seq.h

> > index f3f4b53..221ab9a 100644

> > --- a/lib/seq.h

> > +++ b/lib/seq.h

> > @@ -121,9 +121,14 @@

> >  struct seq *seq_create(void);

> >  void seq_destroy(struct seq *);

> >  void seq_change(struct seq *);

> > +void seq_change_protected(struct seq *);

> > +void seq_lock(void);

> > +int seq_try_lock(void);

> > +void seq_unlock(void);

> >

> >  /* For observers. */

> >  uint64_t seq_read(const struct seq *);

> > +uint64_t seq_read_protected(const struct seq *);

> >

> >  void seq_wait_at(const struct seq *, uint64_t value, const char *where);

> >  #define seq_wait(seq, value) seq_wait_at(seq, value,

> OVS_SOURCE_LOCATOR)

> > --

> > 2.5.5

> >

> >

> _______________________________________________

> dev mailing list

> dev@openvswitch.org

> http://openvswitch.org/mailman/listinfo/dev
diff mbox

Patch

diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c
index 7462579..8aef1f1 100644
--- a/lib/ovs-rcu.c
+++ b/lib/ovs-rcu.c
@@ -157,7 +157,7 @@  int
 ovsrcu_try_quiesce(void)
 {
     struct ovsrcu_perthread *perthread;
-    int ret = -EBUSY;
+    int ret = EBUSY;

     ovs_assert(!single_threaded());
     perthread = ovsrcu_perthread_get();
diff --git a/lib/seq.c b/lib/seq.c
index 4e99c6c..b8b5b65 100644
--- a/lib/seq.c
+++ b/lib/seq.c
@@ -138,8 +138,6 @@  void
 seq_change(struct seq *seq)
     OVS_EXCLUDED(seq_mutex)
 {
-    COVERAGE_INC(seq_change);
-
     ovs_mutex_lock(&seq_mutex);
     seq_change_protected(seq);
     ovs_mutex_unlock(&seq_mutex);