From patchwork Tue Sep 27 12:46:03 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 675566 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from archives.nicira.com (archives.nicira.com [96.126.127.54]) by ozlabs.org (Postfix) with ESMTP id 3sk0wN3nfcz9s4x for ; Tue, 27 Sep 2016 22:47:16 +1000 (AEST) Received: from archives.nicira.com (localhost [127.0.0.1]) by archives.nicira.com (Postfix) with ESMTP id 79C73102BE; Tue, 27 Sep 2016 05:46:18 -0700 (PDT) X-Original-To: dev@openvswitch.org Delivered-To: dev@openvswitch.org Received: from mx3v3.cudamail.com (mx3.cudamail.com [64.34.241.5]) by archives.nicira.com (Postfix) with ESMTPS id 2C5521023A for ; Tue, 27 Sep 2016 05:46:14 -0700 (PDT) Received: from bar6.cudamail.com (localhost [127.0.0.1]) by mx3v3.cudamail.com (Postfix) with ESMTPS id B3A9E1622AE for ; Tue, 27 Sep 2016 06:46:13 -0600 (MDT) X-ASG-Debug-ID: 1474980371-0b32373c7e105830001-byXFYA Received: from mx1-pf1.cudamail.com ([192.168.24.1]) by bar6.cudamail.com with ESMTP id RdfyYssb7iPCWtRN (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=NO) for ; Tue, 27 Sep 2016 06:46:12 -0600 (MDT) X-Barracuda-Envelope-From: paulb@mellanox.com X-Barracuda-RBL-Trusted-Forwarder: 192.168.24.1 Received: from unknown (HELO mellanox.co.il) (193.47.165.129) by mx1-pf1.cudamail.com with SMTP; 27 Sep 2016 12:46:11 -0000 Received-SPF: pass (mx1-pf1.cudamail.com: SPF record at _mtablock1.salesforce.com designates 193.47.165.129 as permitted sender) X-Barracuda-Apparent-Source-IP: 193.47.165.129 X-Barracuda-RBL-IP: 193.47.165.129 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 27 Sep 2016 15:46:07 +0300 Received: from r-vnc04.mtr.labs.mlnx (r-vnc04.mtr.labs.mlnx [10.208.0.116]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id u8RCk6J1028432; Tue, 27 Sep 2016 15:46:06 +0300 X-CudaMail-Envelope-Sender: paulb@mellanox.com From: Paul Blakey To: dev@openvswitch.org X-CudaMail-MID: CM-E1-926012539 X-CudaMail-DTE: 092716 X-CudaMail-Originating-IP: 193.47.165.129 Date: Tue, 27 Sep 2016 15:46:03 +0300 X-ASG-Orig-Subj: [##CM-E1-926012539##][PATCH ovs RFC 8/9] dpif-hw-netlink: support for flow dump from tc Message-Id: <1474980364-9291-9-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.7.8.2 In-Reply-To: <1474980364-9291-1-git-send-email-paulb@mellanox.com> References: <1474980364-9291-1-git-send-email-paulb@mellanox.com> X-Barracuda-Connect: UNKNOWN[192.168.24.1] X-Barracuda-Start-Time: 1474980371 X-Barracuda-Encrypted: ECDHE-RSA-AES256-GCM-SHA384 X-Barracuda-URL: https://web.cudamail.com:443/cgi-mod/mark.cgi X-Virus-Scanned: by bsmtpd at cudamail.com X-Barracuda-BRTS-Status: 1 X-Barracuda-Spam-Score: 0.60 X-Barracuda-Spam-Status: No, SCORE=0.60 using global scores of TAG_LEVEL=3.5 QUARANTINE_LEVEL=1000.0 KILL_LEVEL=4.0 tests=BSF_SC5_MJ1963, RDNS_NONE, UNPARSEABLE_RELAY X-Barracuda-Spam-Report: Code version 3.2, rules version 3.2.3.33260 Rule breakdown below pts rule name description ---- ---------------------- -------------------------------------------------- 0.00 UNPARSEABLE_RELAY Informational: message has unparseable relay lines 0.10 RDNS_NONE Delivered to trusted network by a host with no rDNS 0.50 BSF_SC5_MJ1963 Custom Rule MJ1963 Cc: Shahar Klein , Andy Gospodarek , Rony Efraim , Paul Blakey , Simon Horman , Or Gerlitz Subject: [ovs-dev] [PATCH ovs RFC 8/9] dpif-hw-netlink: support for flow dump from tc X-BeenThere: dev@openvswitch.org X-Mailman-Version: 2.1.16 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: dev-bounces@openvswitch.org Sender: "dev" added support dump flows from tc. Signed-off-by: Paul Blakey Signed-off-by: Shahar Klein --- lib/dpif-hw-netlink.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 199 insertions(+), 16 deletions(-) diff --git a/lib/dpif-hw-netlink.c b/lib/dpif-hw-netlink.c index 9473832..663b15b 100644 --- a/lib/dpif-hw-netlink.c +++ b/lib/dpif-hw-netlink.c @@ -852,63 +852,246 @@ static int dpif_hw_netlink_flow_flush(struct dpif *dpif_) { struct dpif_hw_netlink *dpif = dpif_hw_netlink_cast(dpif_); + struct port_netdev_hash_data *data; + VLOG_DBG("%s %d %s, (%p) flush start\n", __FILE__, __LINE__, __func__, + dpif); + HMAP_FOR_EACH(data, node, &dpif->port_to_netdev) { + if (data->netdev) { + VLOG_DBG("%s %d %s, (%p) flusing port: %d, netdev: %p\n", __FILE__, + __LINE__, __func__, dpif, data->port, data->netdev); + tc_flush_flower(netdev_get_ifindex(data->netdev)); + } + } + + VLOG_DBG("%s %d %s, (%p) flush end\n", __FILE__, __LINE__, __func__, dpif); return dpif->lp_dpif_netlink->dpif_class-> flow_flush(dpif->lp_dpif_netlink); } +struct dpif_hw_netlink_flow_dump { + struct dpif_flow_dump up; + struct dpif_flow_dump *netlink_dump; + struct nl_dump *flow_dumps; + int num_dumps; + int given; + struct ovs_mutex lock; + odp_port_t ports[10]; + struct netdev *netdevs[10]; +}; + +static struct dpif_hw_netlink_flow_dump * +dpif_hw_netlink_flow_dump_cast(struct dpif_flow_dump *dump) +{ + return CONTAINER_OF(dump, struct dpif_hw_netlink_flow_dump, up); +} + static struct dpif_flow_dump * dpif_hw_netlink_flow_dump_create(const struct dpif *dpif_, bool terse) { - struct dpif_flow_dump *dump; + struct dpif_hw_netlink_flow_dump *dump; struct dpif_hw_netlink *dpif = dpif_hw_netlink_cast(dpif_); + struct nl_dump *flow_dumps = 0; + int count = 0; - dump = - dpif->lp_dpif_netlink->dpif_class-> - flow_dump_create(dpif->lp_dpif_netlink, terse); - dump->dpif = CONST_CAST(struct dpif *, dpif_); + int num_ports = hmap_count(&dpif->port_to_netdev); + + dump = xmalloc(sizeof *dump); + dpif_flow_dump_init(&dump->up, dpif_); + dump->up.terse = terse; - return dump; + if (num_ports) { + flow_dumps = xmalloc(sizeof (struct nl_dump) * num_ports); + struct port_netdev_hash_data *data; + + HMAP_FOR_EACH(data, node, &dpif->port_to_netdev) { + if (data->netdev) { + dump->ports[count] = data->port; + dump->netdevs[count] = data->netdev; + + tc_dump_flower_start(netdev_get_ifindex(data->netdev), + &flow_dumps[count]); + count++; + } + } + } + + dump->netlink_dump = + dpif->lp_dpif_netlink->dpif_class-> + flow_dump_create(dpif->lp_dpif_netlink, terse); + dump->flow_dumps = flow_dumps; + dump->num_dumps = count; + dump->given = 0; + ovs_mutex_init(&dump->lock); + return &dump->up; } static int dpif_hw_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_) { + int error; + struct dpif_hw_netlink_flow_dump *dump = + dpif_hw_netlink_flow_dump_cast(dump_); struct dpif_hw_netlink *dpif = dpif_hw_netlink_cast(dump_->dpif); - dump_->dpif = dpif->lp_dpif_netlink; - return dpif->lp_dpif_netlink->dpif_class->flow_dump_destroy(dump_); + int cur = 0; + + for (cur = 0; cur < dump->num_dumps; cur++) { + struct nl_dump *nl_dump = &dump->flow_dumps[cur]; + + int ret = nl_dump_done(nl_dump); + + if (ret != 0) + VLOG_ERR("nl_dump_done error ret[%d]: %d\n", cur, ret); + } + + error = + dpif->lp_dpif_netlink->dpif_class-> + flow_dump_destroy(dump->netlink_dump); + + if (dump->flow_dumps) + free(dump->flow_dumps); + free(dump); + + return error; +} + +struct dpif_hw_netlink_flow_dump_thread { + struct dpif_flow_dump_thread up; + struct dpif_flow_dump_thread *netlink_thread; + struct dpif_hw_netlink_flow_dump *dump; + struct ofpbuf nl_flows; + struct ofpbuf temp_buf; + int current_dump; + int flower_done; +}; + +static void +dpif_hw_netlink_get_next_dump(struct dpif_hw_netlink_flow_dump_thread *thread) +{ + /* TODO:Consider changing to a atomc dump->given... */ + + struct dpif_hw_netlink_flow_dump *dump = thread->dump; + + ovs_mutex_lock(&dump->lock); + /* if we haven't finished (dumped everything) */ + if (dump->given < dump->num_dumps) { + /* if we are the first to find that given dump is finished (for race + * condition, e.g 3 finish dump 0 at the same time) */ + if (thread->current_dump == dump->given) { + thread->current_dump = ++dump->given; + /* did we just finish the last dump? done. */ + if (dump->given == dump->num_dumps) { + thread->flower_done = 1; + } + } else + /* otherwise, we are behind, catch up */ + thread->current_dump = dump->given; + } else { + /* some other thread finished */ + thread->flower_done = 1; + } + ovs_mutex_unlock(&dump->lock); } static struct dpif_flow_dump_thread * dpif_hw_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_) { + struct dpif_hw_netlink_flow_dump *dump = + dpif_hw_netlink_flow_dump_cast(dump_); struct dpif_hw_netlink *dpif = dpif_hw_netlink_cast(dump_->dpif); + struct dpif_hw_netlink_flow_dump_thread *thread; - return dpif->lp_dpif_netlink->dpif_class->flow_dump_thread_create(dump_); + thread = xmalloc(sizeof *thread); + dpif_flow_dump_thread_init(&thread->up, &dump->up); + thread->netlink_thread = + dpif->lp_dpif_netlink->dpif_class-> + flow_dump_thread_create(dump->netlink_dump); + thread->dump = dump; + + /* + * A thread can be created at any time, + * so another thread might finish the dump already (and advance dump->given), + * so we might be done before we even started. + */ + + ovs_mutex_lock(&dump->lock); + thread->current_dump = dump->given; + thread->flower_done = dump->given < dump->num_dumps ? 0 : 1; + ovs_mutex_unlock(&dump->lock); + + if (!thread->flower_done) { + ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE); /* TODO: + * uninit + * where? */ + ofpbuf_init(&thread->temp_buf, NL_DUMP_BUFSIZE); + } + /* another option is setting current to -1, and calling get_next_dump, but + * its kinda ugly */ + return &thread->up; } +static struct dpif_hw_netlink_flow_dump_thread * +dpif_hw_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread) +{ + return CONTAINER_OF(thread, struct dpif_hw_netlink_flow_dump_thread, up); +} + static void dpif_hw_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_) { struct dpif_hw_netlink *dpif = dpif_hw_netlink_cast(thread_->dpif); + struct dpif_hw_netlink_flow_dump_thread *thread + = dpif_hw_netlink_flow_dump_thread_cast(thread_); + + dpif->lp_dpif_netlink->dpif_class-> + flow_dump_thread_destroy(thread->netlink_thread); - thread_->dpif = dpif->lp_dpif_netlink; - return dpif->lp_dpif_netlink-> - dpif_class->flow_dump_thread_destroy(thread_); + free(thread); } static int dpif_hw_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_, struct dpif_flow *flows, int max_flows) { - struct dpif_hw_netlink *dpif = dpif_hw_netlink_cast(thread_->dpif); + struct dpif_hw_netlink_flow_dump_thread *thread + = dpif_hw_netlink_flow_dump_thread_cast(thread_); + struct dpif_hw_netlink_flow_dump *dump = thread->dump; + struct dpif_hw_netlink *dpif = dpif_hw_netlink_cast(thread->up.dpif); + int n_flows = 0; + + while (!thread->flower_done && n_flows < max_flows) { + int cur = thread->current_dump; + odp_port_t inport = dump->ports[cur]; + struct netdev *indev = dump->netdevs[cur]; + struct ofpbuf nl_flow; + struct nl_dump *nl_dump = &dump->flow_dumps[cur]; + + if (nl_dump_next(nl_dump, &nl_flow, &thread->nl_flows)) { + struct tc_flow tc_flow; + + if (parse_tc_flow(&nl_flow, &tc_flow)) + continue; + + /* if we got handle, convert netlink flow to dpif_flow */ + if (tc_flow.handle) + dpif_hw_tc_flow_to_dpif_flow(dpif, &tc_flow, &flows[n_flows++], + inport, &thread->temp_buf, indev); + } else + dpif_hw_netlink_get_next_dump(thread); + } - thread_->dpif = dpif->lp_dpif_netlink; - return dpif->lp_dpif_netlink->dpif_class->flow_dump_next(thread_, flows, - max_flows); + /* if we got here, flower done or got to max flows if flow done and not + * got got max, call kernel datapath to dump remaining flows */ + if (thread->flower_done && n_flows < max_flows) { + return n_flows + + dpif->lp_dpif_netlink->dpif_class-> + flow_dump_next(thread->netlink_thread, flows + n_flows, + max_flows - n_flows); + } + return n_flows; } static bool