diff mbox

[ovs-dev,2/2] dpif-netdev: Per numa node barriers for pmd threads.

Message ID 1450794408-17970-3-git-send-email-i.maximets@samsung.com
State Superseded
Headers show

Commit Message

Ilya Maximets Dec. 22, 2015, 2:26 p.m. UTC
While reloading, two PMD threads, one already reloaded and
one not yet reloaded, can poll same queue of the same port.

Fix that by introducing per numa node barriers prior to polling.

Also, as soon as we have such barriers, we may replace the
separated configuration and starting of threads in
dp_netdev_set_pmds_on_numa() with barrier right after
first-time configuration of PMD threads.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---
 lib/dpif-netdev.c | 53 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 13 deletions(-)

Comments

Ilya Maximets Dec. 31, 2015, 11:30 a.m. UTC | #1
Superseded by "[PATCH RFC] dpif-netdev: Rework of rx queue management."
http://openvswitch.org/pipermail/dev/2015-December/063920.html

On 22.12.2015 17:26, Ilya Maximets wrote:
> While reloading, two PMD threads, one already reloaded and
> one not yet reloaded, can poll same queue of the same port.
> 
> Fix that by introducing per numa node barriers prior to polling.
> 
> Also, as soon as we have such barriers, we may replace the
> separated configuration and starting of threads in
> dp_netdev_set_pmds_on_numa() with barrier right after
> first-time configuration of PMD threads.
> 
> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
> ---
>  lib/dpif-netdev.c | 53 ++++++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 40 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 3efbed0..ad4a665 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -212,6 +212,8 @@ struct dp_netdev {
>  
>      /* Stores all 'struct dp_netdev_pmd_thread's. */
>      struct cmap poll_threads;
> +    /* Per numa node reconfiguration barriers for pmd threads. */
> +    struct ovs_barrier *numa_barriers;
>  
>      /* Protects the access of the 'struct dp_netdev_pmd_thread'
>       * instance for non-pmd thread. */
> @@ -822,7 +824,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
>      OVS_REQUIRES(dp_netdev_mutex)
>  {
>      struct dp_netdev *dp;
> -    int error;
> +    int n_numas, i, error;
>  
>      dp = xzalloc(sizeof *dp);
>      shash_add(&dp_netdevs, name, dp);
> @@ -834,6 +836,13 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
>  
>      ovs_mutex_init(&dp->port_mutex);
>      cmap_init(&dp->ports);
> +
> +    n_numas = ovs_numa_get_n_numas();
> +    dp->numa_barriers = xmalloc(n_numas * sizeof *dp->numa_barriers);
> +    for (i = 0; i < n_numas; i++) {
> +        ovs_barrier_init(&dp->numa_barriers[i], 0);
> +    }
> +
>      dp->port_seq = seq_create();
>      fat_rwlock_init(&dp->upcall_rwlock);
>  
> @@ -906,11 +915,19 @@ dp_netdev_free(struct dp_netdev *dp)
>      OVS_REQUIRES(dp_netdev_mutex)
>  {
>      struct dp_netdev_port *port;
> +    int i, n_numas;
>  
>      shash_find_and_delete(&dp_netdevs, dp->name);
>  
>      dp_netdev_destroy_all_pmds(dp);
>      cmap_destroy(&dp->poll_threads);
> +
> +    n_numas = ovs_numa_get_n_numas();
> +    for (i = 0; i < n_numas; i++) {
> +        ovs_barrier_destroy(&dp->numa_barriers[i]);
> +    }
> +    free(dp->numa_barriers);
> +
>      ovs_mutex_destroy(&dp->non_pmd_mutex);
>      ovsthread_key_delete(dp->per_pmd_key);
>  
> @@ -2651,6 +2668,9 @@ pmd_thread_main(void *f_)
>      /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
>      ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
>      pmd_thread_setaffinity_cpu(pmd->core_id);
> +    /* Synchronize to be sure that all PMD threads of this numa
> +     * node already configured before pmd_load_queues(). */
> +    ovs_barrier_block(&pmd->dp->numa_barriers[pmd->numa_id]);
>  reload:
>      emc_cache_init(&pmd->flow_cache);
>      poll_cnt = pmd_load_queues(pmd, &poll_list, poll_cnt);
> @@ -2663,6 +2683,10 @@ reload:
>      /* Signal here to make sure the pmd finishes
>       * reloading the updated configuration. */
>      dp_netdev_pmd_reload_done(pmd);
> +    /* Synchronize all threads of this numa node to avoid situation
> +     * when two threads (one already reloaded and one not yet
> +     * reloaded) trying to receive on same queue of the same port. */
> +    ovs_barrier_block(&pmd->dp->numa_barriers[pmd->numa_id]);
>  
>      for (;;) {
>          int i;
> @@ -2906,6 +2930,14 @@ dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id)
>      }
>  }
>  
> +/* Changes size of given numa barrier. *barrier must be initialized. */
> +static void
> +dp_netdev_reset_numa_barrier(struct ovs_barrier *barrier, uint32_t size)
> +{
> +    ovs_barrier_destroy(barrier);
> +    ovs_barrier_init(barrier, size);
> +}
> +
>  /* Checks the numa node id of 'netdev' and starts pmd threads for
>   * the numa node. */
>  static void
> @@ -2926,7 +2958,6 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
>       * pmd threads for the numa node. */
>      if (!n_pmds) {
>          int can_have, n_unpinned, i;
> -        struct dp_netdev_pmd_thread **pmds;
>  
>          n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
>          if (!n_unpinned) {
> @@ -2938,22 +2969,18 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
>          /* If cpu mask is specified, uses all unpinned cores, otherwise
>           * tries creating NR_PMD_THREADS pmd threads. */
>          can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS);
> -        pmds = xzalloc(can_have * sizeof *pmds);
> +
> +        dp_netdev_reset_numa_barrier(&dp->numa_barriers[numa_id], can_have);
> +
>          for (i = 0; i < can_have; i++) {
> +            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
>              unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
> -            pmds[i] = xzalloc(sizeof **pmds);
> -            dp_netdev_configure_pmd(pmds[i], dp, i, core_id, numa_id);
> -        }
> -        /* The pmd thread code needs to see all the others configured pmd
> -         * threads on the same numa node.  That's why we call
> -         * 'dp_netdev_configure_pmd()' on all the threads and then we actually
> -         * start them. */
> -        for (i = 0; i < can_have; i++) {
> +
> +            dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
>              /* Each thread will distribute all devices rx-queues among
>               * themselves. */
> -            pmds[i]->thread = ovs_thread_create("pmd", pmd_thread_main, pmds[i]);
> +            pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
>          }
> -        free(pmds);
>          VLOG_INFO("Created %d pmd threads on numa node %d", can_have, numa_id);
>      }
>  }
>
diff mbox

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 3efbed0..ad4a665 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -212,6 +212,8 @@  struct dp_netdev {
 
     /* Stores all 'struct dp_netdev_pmd_thread's. */
     struct cmap poll_threads;
+    /* Per numa node reconfiguration barriers for pmd threads. */
+    struct ovs_barrier *numa_barriers;
 
     /* Protects the access of the 'struct dp_netdev_pmd_thread'
      * instance for non-pmd thread. */
@@ -822,7 +824,7 @@  create_dp_netdev(const char *name, const struct dpif_class *class,
     OVS_REQUIRES(dp_netdev_mutex)
 {
     struct dp_netdev *dp;
-    int error;
+    int n_numas, i, error;
 
     dp = xzalloc(sizeof *dp);
     shash_add(&dp_netdevs, name, dp);
@@ -834,6 +836,13 @@  create_dp_netdev(const char *name, const struct dpif_class *class,
 
     ovs_mutex_init(&dp->port_mutex);
     cmap_init(&dp->ports);
+
+    n_numas = ovs_numa_get_n_numas();
+    dp->numa_barriers = xmalloc(n_numas * sizeof *dp->numa_barriers);
+    for (i = 0; i < n_numas; i++) {
+        ovs_barrier_init(&dp->numa_barriers[i], 0);
+    }
+
     dp->port_seq = seq_create();
     fat_rwlock_init(&dp->upcall_rwlock);
 
@@ -906,11 +915,19 @@  dp_netdev_free(struct dp_netdev *dp)
     OVS_REQUIRES(dp_netdev_mutex)
 {
     struct dp_netdev_port *port;
+    int i, n_numas;
 
     shash_find_and_delete(&dp_netdevs, dp->name);
 
     dp_netdev_destroy_all_pmds(dp);
     cmap_destroy(&dp->poll_threads);
+
+    n_numas = ovs_numa_get_n_numas();
+    for (i = 0; i < n_numas; i++) {
+        ovs_barrier_destroy(&dp->numa_barriers[i]);
+    }
+    free(dp->numa_barriers);
+
     ovs_mutex_destroy(&dp->non_pmd_mutex);
     ovsthread_key_delete(dp->per_pmd_key);
 
@@ -2651,6 +2668,9 @@  pmd_thread_main(void *f_)
     /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
     ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
     pmd_thread_setaffinity_cpu(pmd->core_id);
+    /* Synchronize to be sure that all PMD threads of this numa
+     * node already configured before pmd_load_queues(). */
+    ovs_barrier_block(&pmd->dp->numa_barriers[pmd->numa_id]);
 reload:
     emc_cache_init(&pmd->flow_cache);
     poll_cnt = pmd_load_queues(pmd, &poll_list, poll_cnt);
@@ -2663,6 +2683,10 @@  reload:
     /* Signal here to make sure the pmd finishes
      * reloading the updated configuration. */
     dp_netdev_pmd_reload_done(pmd);
+    /* Synchronize all threads of this numa node to avoid situation
+     * when two threads (one already reloaded and one not yet
+     * reloaded) trying to receive on same queue of the same port. */
+    ovs_barrier_block(&pmd->dp->numa_barriers[pmd->numa_id]);
 
     for (;;) {
         int i;
@@ -2906,6 +2930,14 @@  dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id)
     }
 }
 
+/* Changes size of given numa barrier. *barrier must be initialized. */
+static void
+dp_netdev_reset_numa_barrier(struct ovs_barrier *barrier, uint32_t size)
+{
+    ovs_barrier_destroy(barrier);
+    ovs_barrier_init(barrier, size);
+}
+
 /* Checks the numa node id of 'netdev' and starts pmd threads for
  * the numa node. */
 static void
@@ -2926,7 +2958,6 @@  dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
      * pmd threads for the numa node. */
     if (!n_pmds) {
         int can_have, n_unpinned, i;
-        struct dp_netdev_pmd_thread **pmds;
 
         n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
         if (!n_unpinned) {
@@ -2938,22 +2969,18 @@  dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
         /* If cpu mask is specified, uses all unpinned cores, otherwise
          * tries creating NR_PMD_THREADS pmd threads. */
         can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS);
-        pmds = xzalloc(can_have * sizeof *pmds);
+
+        dp_netdev_reset_numa_barrier(&dp->numa_barriers[numa_id], can_have);
+
         for (i = 0; i < can_have; i++) {
+            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
             unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
-            pmds[i] = xzalloc(sizeof **pmds);
-            dp_netdev_configure_pmd(pmds[i], dp, i, core_id, numa_id);
-        }
-        /* The pmd thread code needs to see all the others configured pmd
-         * threads on the same numa node.  That's why we call
-         * 'dp_netdev_configure_pmd()' on all the threads and then we actually
-         * start them. */
-        for (i = 0; i < can_have; i++) {
+
+            dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
             /* Each thread will distribute all devices rx-queues among
              * themselves. */
-            pmds[i]->thread = ovs_thread_create("pmd", pmd_thread_main, pmds[i]);
+            pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
         }
-        free(pmds);
         VLOG_INFO("Created %d pmd threads on numa node %d", can_have, numa_id);
     }
 }