[ovs-dev,v4,1/3] dpif-netdev: Incremental addition/deletion of PMD threads.

Message ID	1500641502-31926-2-git-send-email-i.maximets@samsung.com
State	Accepted
Headers	show Return-Path: <ovs-dev-bounces@openvswitch.org> From: Ilya Maximets <i.maximets@samsung.com> To: ovs-dev@openvswitch.org, Darrell Ball <dball@vmware.com> Date: Fri, 21 Jul 2017 15:51:40 +0300 Message-id: <1500641502-31926-2-git-send-email-i.maximets@samsung.com> In-reply-to: <1500641502-31926-1-git-send-email-i.maximets@samsung.com> CMS-TYPE: 201P References: <1500641502-31926-1-git-send-email-i.maximets@samsung.com> <CGME20170721125155eucas1p1606c600876109033dbecfe8f1a2b7932@eucas1p1.samsung.com> Cc: Heetae Ahn <heetae82.ahn@samsung.com>, Ilya Maximets <i.maximets@samsung.com> Subject: [ovs-dev] [PATCH v4 1/3] dpif-netdev: Incremental addition/deletion of PMD threads. Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: ovs-dev-bounces@openvswitch.org Errors-To: ovs-dev-bounces@openvswitch.org

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 47a9fa0..4de3678 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -48,6 +48,7 @@ #include "fat-rwlock.h" #include "flow.h" #include "hmapx.h" +#include "id-pool.h" #include "latch.h" #include "netdev.h" #include "netdev-vport.h" @@ -281,6 +282,9 @@ struct dp_netdev { /* Stores all 'struct dp_netdev_pmd_thread's. */ struct cmap poll_threads; + /* id pool for per thread static_tx_qid. */ + struct id_pool *tx_qid_pool; + struct ovs_mutex tx_qid_pool_mutex; /* Protects the access of the 'struct dp_netdev_pmd_thread' * instance for non-pmd thread. */ @@ -567,7 +571,7 @@ struct dp_netdev_pmd_thread { /* Queue id used by this pmd thread to send packets on all netdevs if * XPS disabled for this netdev. All static_tx_qid's are unique and less * than 'cmap_count(dp->poll_threads)'. */ - const int static_tx_qid; + uint32_t static_tx_qid; struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. */ /* List of rx queues to poll. */ @@ -647,6 +651,8 @@ static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id); static struct dp_netdev_pmd_thread * dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos); +static void dp_netdev_del_pmd(struct dp_netdev *dp, + struct dp_netdev_pmd_thread *pmd); static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp, bool non_pmd); static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd); static void dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd, @@ -1185,10 +1191,17 @@ create_dp_netdev(const char *name, const struct dpif_class *class, atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); cmap_init(&dp->poll_threads); + + ovs_mutex_init(&dp->tx_qid_pool_mutex); + /* We need 1 Tx queue for each possible core + 1 for non-PMD threads. */ + dp->tx_qid_pool = id_pool_create(0, ovs_numa_get_n_cores() + 1); + ovs_mutex_init_recursive(&dp->non_pmd_mutex); ovsthread_key_create(&dp->per_pmd_key, NULL); ovs_mutex_lock(&dp->port_mutex); + /* non-PMD will be created before all other threads and will + * allocate static_tx_qid = 0. */ dp_netdev_set_nonpmd(dp); error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class, @@ -1283,6 +1296,9 @@ dp_netdev_free(struct dp_netdev *dp) dp_netdev_destroy_all_pmds(dp, true); cmap_destroy(&dp->poll_threads); + ovs_mutex_destroy(&dp->tx_qid_pool_mutex); + id_pool_destroy(dp->tx_qid_pool); + ovs_mutex_destroy(&dp->non_pmd_mutex); ovsthread_key_delete(dp->per_pmd_key); @@ -3303,12 +3319,29 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex) } static void +reload_affected_pmds(struct dp_netdev *dp) +{ + struct dp_netdev_pmd_thread *pmd; + + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { + if (pmd->need_reload) { + dp_netdev_reload_pmd__(pmd); + pmd->need_reload = false; + } + } +} + +static void reconfigure_pmd_threads(struct dp_netdev *dp) OVS_REQUIRES(dp->port_mutex) { struct dp_netdev_pmd_thread *pmd; struct ovs_numa_dump *pmd_cores; + struct ovs_numa_info_core *core; + struct hmapx to_delete = HMAPX_INITIALIZER(&to_delete); + struct hmapx_node *node; bool changed = false; + bool need_to_adjust_static_tx_qids = false; /* The pmd threads should be started only if there's a pmd port in the * datapath. If the user didn't provide any "pmd-cpu-mask", we start @@ -3321,40 +3354,64 @@ reconfigure_pmd_threads(struct dp_netdev *dp) pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS); } - /* Check for changed configuration */ - if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) { - changed = true; - } else { - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { - if (pmd->core_id != NON_PMD_CORE_ID - && !ovs_numa_dump_contains_core(pmd_cores, - pmd->numa_id, - pmd->core_id)) { - changed = true; - break; - } + /* We need to adjust 'static_tx_qid's only if we're reducing number of + * PMD threads. Otherwise, new threads will allocate all the freed ids. */ + if (ovs_numa_dump_count(pmd_cores) < cmap_count(&dp->poll_threads) - 1) { + /* Adjustment is required to keep 'static_tx_qid's sequential and + * avoid possible issues, for example, imbalanced tx queue usage + * and unnecessary locking caused by remapping on netdev level. */ + need_to_adjust_static_tx_qids = true; + } + + /* Check for unwanted pmd threads */ + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { + if (pmd->core_id == NON_PMD_CORE_ID) { + continue; + } + if (!ovs_numa_dump_contains_core(pmd_cores, pmd->numa_id, + pmd->core_id)) { + hmapx_add(&to_delete, pmd); + } else if (need_to_adjust_static_tx_qids) { + pmd->need_reload = true; } } - /* Destroy the old and recreate the new pmd threads. We don't perform an - * incremental update because we would have to adjust 'static_tx_qid'. */ - if (changed) { - struct ovs_numa_info_core *core; - struct ovs_numa_info_numa *numa; + HMAPX_FOR_EACH (node, &to_delete) { + pmd = (struct dp_netdev_pmd_thread *) node->data; + VLOG_INFO("PMD thread on numa_id: %d, core id: %2d destroyed.", + pmd->numa_id, pmd->core_id); + dp_netdev_del_pmd(dp, pmd); + } + changed = !hmapx_is_empty(&to_delete); + hmapx_destroy(&to_delete); - /* Do not destroy the non pmd thread. */ - dp_netdev_destroy_all_pmds(dp, false); - FOR_EACH_CORE_ON_DUMP (core, pmd_cores) { - struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); + if (need_to_adjust_static_tx_qids) { + /* 'static_tx_qid's are not sequential now. + * Reload remaining threads to fix this. */ + reload_affected_pmds(dp); + } + /* Check for required new pmd threads */ + FOR_EACH_CORE_ON_DUMP(core, pmd_cores) { + pmd = dp_netdev_get_pmd(dp, core->core_id); + if (!pmd) { + pmd = xzalloc(sizeof *pmd); dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id); - pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); + VLOG_INFO("PMD thread on numa_id: %d, core id: %2d created.", + pmd->numa_id, pmd->core_id); + changed = true; + } else { + dp_netdev_pmd_unref(pmd); } + } + + if (changed) { + struct ovs_numa_info_numa *numa; /* Log the number of pmd threads per numa node. */ FOR_EACH_NUMA_ON_DUMP (numa, pmd_cores) { - VLOG_INFO("Created %"PRIuSIZE" pmd threads on numa node %d", + VLOG_INFO("There are %"PRIuSIZE" pmd threads on numa node %d", numa->n_cores, numa->numa_id); } } @@ -3363,19 +3420,6 @@ reconfigure_pmd_threads(struct dp_netdev *dp) } static void -reload_affected_pmds(struct dp_netdev *dp) -{ - struct dp_netdev_pmd_thread *pmd; - - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { - if (pmd->need_reload) { - dp_netdev_reload_pmd__(pmd); - pmd->need_reload = false; - } - } -} - -static void pmd_remove_stale_ports(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd) OVS_EXCLUDED(pmd->port_mutex) @@ -3683,6 +3727,28 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd) } } +static void +pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd) +{ + ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex); + if (!id_pool_alloc_id(pmd->dp->tx_qid_pool, &pmd->static_tx_qid)) { + VLOG_ABORT("static_tx_qid allocation failed for PMD on core %2d" + ", numa_id %d.", pmd->core_id, pmd->numa_id); + } + ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex); + + VLOG_DBG("static_tx_qid = %d allocated for PMD thread on core %2d" + ", numa_id %d.", pmd->static_tx_qid, pmd->core_id, pmd->numa_id); +} + +static void +pmd_free_static_tx_qid(struct dp_netdev_pmd_thread *pmd) +{ + ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex); + id_pool_free_id(pmd->dp->tx_qid_pool, pmd->static_tx_qid); + ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex); +} + static int pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd, struct polled_queue **ppoll_list) @@ -3729,6 +3795,7 @@ pmd_thread_main(void *f_) dpdk_set_lcore_id(pmd->core_id); poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list); reload: + pmd_alloc_static_tx_qid(pmd); emc_cache_init(&pmd->flow_cache); /* List port/core affinity */ @@ -3784,6 +3851,7 @@ reload: dp_netdev_pmd_reload_done(pmd); emc_cache_uninit(&pmd->flow_cache); + pmd_free_static_tx_qid(pmd); if (!exiting) { goto reload; @@ -4194,8 +4262,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, pmd->numa_id = numa_id; pmd->need_reload = false; - *CONST_CAST(int *, &pmd->static_tx_qid) = cmap_count(&dp->poll_threads); - ovs_refcount_init(&pmd->ref_cnt); latch_init(&pmd->exit_latch); pmd->reload_seq = seq_create(); @@ -4216,6 +4282,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, * actual thread created for NON_PMD_CORE_ID. */ if (core_id == NON_PMD_CORE_ID) { emc_cache_init(&pmd->flow_cache); + pmd_alloc_static_tx_qid(pmd); } cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node), hash_int(core_id, 0)); @@ -4258,6 +4325,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd) ovs_mutex_lock(&dp->non_pmd_mutex); emc_cache_uninit(&pmd->flow_cache); pmd_free_cached_ports(pmd); + pmd_free_static_tx_qid(pmd); ovs_mutex_unlock(&dp->non_pmd_mutex); } else { latch_set(&pmd->exit_latch); diff --git a/tests/pmd.at b/tests/pmd.at index f95a016..b6732ea 100644 --- a/tests/pmd.at +++ b/tests/pmd.at @@ -38,7 +38,7 @@ dnl dnl Whaits for creation of 'n_threads' or at least 1 thread if $1 not dnl passed. Checking starts from line number 'line' in ovs-vswithd.log . m4_define([CHECK_PMD_THREADS_CREATED], [ - PATTERN="Created [[0-9]]* pmd threads on numa node $2" + PATTERN="There are [[0-9]]* pmd threads on numa node $2" line_st=$3 if [[ -z "$line_st" ]] then

[ovs-dev,v4,1/3] dpif-netdev: Incremental addition/deletion of PMD threads.

Commit Message

Patch