From patchwork Sun Feb 23 11:45:02 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1242646 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48QNgJ6G0Rz9sP7 for ; Sun, 23 Feb 2020 22:45:48 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727274AbgBWLpn (ORCPT ); Sun, 23 Feb 2020 06:45:43 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:49076 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727237AbgBWLpn (ORCPT ); Sun, 23 Feb 2020 06:45:43 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 23 Feb 2020 13:45:36 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01NBjZEU006598; Sun, 23 Feb 2020 13:45:35 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Cc: Pablo Neira Ayuso Subject: [PATCH net-next 1/6] netfilter: flowtable: pass flowtable to nf_flow_table_iterate() Date: Sun, 23 Feb 2020 13:45:02 +0200 Message-Id: <1582458307-17067-2-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1582458307-17067-1-git-send-email-paulb@mellanox.com> References: <1582458307-17067-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Pablo Neira Ayuso Pass flowtable object to the iteration callback that is called from nf_flow_table_iterate(). Signed-off-by: Pablo Neira Ayuso Signed-off-by: Paul Blakey --- net/netfilter/nf_flow_table_core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 8af28e1..face98b 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -312,7 +312,8 @@ struct flow_offload_tuple_rhash * static int nf_flow_table_iterate(struct nf_flowtable *flow_table, - void (*iter)(struct flow_offload *flow, void *data), + void (*iter)(struct nf_flowtable *flow_table, + struct flow_offload *flow, void *data), void *data) { struct flow_offload_tuple_rhash *tuplehash; @@ -336,7 +337,7 @@ struct flow_offload_tuple_rhash * flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); - iter(flow, data); + iter(flow_table, flow, data); } rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); @@ -344,10 +345,9 @@ struct flow_offload_tuple_rhash * return err; } -static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) +static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, + struct flow_offload *flow, void *data) { - struct nf_flowtable *flow_table = data; - if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { if (test_bit(NF_FLOW_HW, &flow->flags)) { @@ -368,7 +368,7 @@ static void nf_flow_offload_work_gc(struct work_struct *work) struct nf_flowtable *flow_table; flow_table = container_of(work, struct nf_flowtable, gc_work.work); - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } @@ -511,7 +511,8 @@ int nf_flow_table_init(struct nf_flowtable *flowtable) } EXPORT_SYMBOL_GPL(nf_flow_table_init); -static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) +static void nf_flow_table_do_cleanup(struct nf_flowtable *flowtable, + struct flow_offload *flow, void *data) { struct net_device *dev = data; @@ -552,7 +553,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) mutex_unlock(&flowtable_lock); cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); nf_flow_table_offload_flush(flow_table); rhashtable_destroy(&flow_table->rhashtable); } From patchwork Sun Feb 23 11:45:03 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1242642 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48QNgG5h6Bz9sRL for ; Sun, 23 Feb 2020 22:45:46 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727226AbgBWLpj (ORCPT ); Sun, 23 Feb 2020 06:45:39 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:47891 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726236AbgBWLpi (ORCPT ); Sun, 23 Feb 2020 06:45:38 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 23 Feb 2020 13:45:36 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01NBjZEV006598; Sun, 23 Feb 2020 13:45:36 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Cc: Pablo Neira Ayuso Subject: [PATCH net-next 2/6] netfilter: flowtable: nf_flow_table_iterate() returns the number of entries Date: Sun, 23 Feb 2020 13:45:03 +0200 Message-Id: <1582458307-17067-3-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1582458307-17067-1-git-send-email-paulb@mellanox.com> References: <1582458307-17067-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Pablo Neira Ayuso Update this iterator to return the number of entries in this flowtable. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Paul Blakey --- net/netfilter/nf_flow_table_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index face98b..83bc456 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -319,7 +319,7 @@ struct flow_offload_tuple_rhash * struct flow_offload_tuple_rhash *tuplehash; struct rhashtable_iter hti; struct flow_offload *flow; - int err = 0; + int err = 0, cnt = 0; rhashtable_walk_enter(&flow_table->rhashtable, &hti); rhashtable_walk_start(&hti); @@ -338,11 +338,12 @@ struct flow_offload_tuple_rhash * flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); iter(flow_table, flow, data); + cnt++; } rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); - return err; + return err < 0 ? err : cnt; } static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, From patchwork Sun Feb 23 11:45:05 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1242644 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48QNgH6dQxz9sRp for ; Sun, 23 Feb 2020 22:45:47 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727337AbgBWLpo (ORCPT ); Sun, 23 Feb 2020 06:45:44 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:47925 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726236AbgBWLpo (ORCPT ); Sun, 23 Feb 2020 06:45:44 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 23 Feb 2020 13:45:36 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01NBjZEX006598; Sun, 23 Feb 2020 13:45:36 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next 4/6] net/sched: act_ct: Create nf flow table per zone Date: Sun, 23 Feb 2020 13:45:05 +0200 Message-Id: <1582458307-17067-5-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1582458307-17067-1-git-send-email-paulb@mellanox.com> References: <1582458307-17067-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Use the NF flow tables infrastructure for CT offload. Create a nf flow table per zone. Next patches will add FT entries to this table, and do the software offload. Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + include/net/tc_act/tc_ct.h | 2 + net/sched/Kconfig | 2 +- net/sched/act_ct.c | 159 +++++++++++++++++++++++- 4 files changed, 162 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 70b5fe2..eb16136 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include "en.h" diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index a8b1564..cf3492e 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -25,6 +25,8 @@ struct tcf_ct_params { u16 ct_action; struct rcu_head rcu; + + struct tcf_ct_flow_table *ct_ft; }; struct tcf_ct { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index edde0e5..bfbefb7 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE help Say Y here to allow sending the packets to conntrack module. diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f685c0d..4267d7d 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include #include +#include #include #include #include @@ -31,6 +33,133 @@ #include #include +static struct workqueue_struct *act_ct_wq; + +struct tcf_ct_flow_table { + struct rhash_head node; /* In zones tables */ + + struct rcu_work rwork; + struct nf_flowtable nf_ft; + u16 zone; + u32 ref; + + bool dying; +}; + +static const struct rhashtable_params zones_params = { + .head_offset = offsetof(struct tcf_ct_flow_table, node), + .key_offset = offsetof(struct tcf_ct_flow_table, zone), + .key_len = sizeof_field(struct tcf_ct_flow_table, zone), + .automatic_shrinking = true, +}; + +static struct rhashtable zones_ht; +static DEFINE_SPINLOCK(zones_lock); + +static void tcf_ct_flow_table_put(struct tcf_ct_params *params) +{ + spin_lock(&zones_lock); + --params->ct_ft->ref; + spin_unlock(&zones_lock); +} + +static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) +{ + struct tcf_ct_flow_table *ct_ft; + + ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table, + rwork); + nf_flow_table_free(&ct_ft->nf_ft); + kfree(ct_ft); + + module_put(THIS_MODULE); +} + +static int tcf_ct_flow_table_cleanup(struct nf_flowtable *nf_ft) +{ + struct tcf_ct_flow_table *ct_ft; + int err = -EBUSY; + + spin_lock(&zones_lock); + + /* Delete the FT if there is no rules with CT action for this zone. + * + * This method will be called by the FT GC until the FT table will + * be freed. Set the dying flags to avoid multiple cleanups + * till it's actually freed in work cb. + */ + ct_ft = container_of(nf_ft, struct tcf_ct_flow_table, nf_ft); + if (ct_ft->ref == 0 && !ct_ft->dying) { + ct_ft->dying = true; + + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); + INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work); + queue_rcu_work(act_ct_wq, &ct_ft->rwork); + + err = 0; + } + + spin_unlock(&zones_lock); + + return err; +} + +static struct nf_flowtable_type flowtable_ct = { + .cleanup = tcf_ct_flow_table_cleanup, + .owner = THIS_MODULE, +}; + +static int tcf_ct_flow_table_get(struct tcf_ct_params *params) +{ + struct tcf_ct_flow_table *ct_ft; + int err = -ENOMEM; + + spin_lock(&zones_lock); + ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); + if (ct_ft) + goto take_ref; + + ct_ft = kzalloc(sizeof(*ct_ft), GFP_KERNEL); + if (!ct_ft) + goto alloc_err; + + ct_ft->zone = params->zone; + err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params); + if (err) + goto insert_err; + + ct_ft->nf_ft.type = &flowtable_ct; + err = nf_flow_table_init(&ct_ft->nf_ft); + if (err) + goto init_err; + + __module_get(THIS_MODULE); +take_ref: + params->ct_ft = ct_ft; + ct_ft->ref++; + spin_unlock(&zones_lock); + + return 0; + +init_err: + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); +insert_err: + kfree(ct_ft); +alloc_err: + spin_unlock(&zones_lock); + return err; +} + +static int tcf_ct_flow_tables_init(void) +{ + return rhashtable_init(&zones_ht, &zones_params); +} + +static void tcf_ct_flow_tables_uninit(void) +{ + rhashtable_destroy(&zones_ht); +} + static struct tc_action_ops act_ct_ops; static unsigned int ct_net_id; @@ -207,6 +336,8 @@ static void tcf_ct_params_free(struct rcu_head *head) struct tcf_ct_params *params = container_of(head, struct tcf_ct_params, rcu); + tcf_ct_flow_table_put(params); + if (params->tmpl) nf_conntrack_put(¶ms->tmpl->ct_general); kfree(params); @@ -730,6 +861,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (err) goto cleanup; + err = tcf_ct_flow_table_get(params); + if (err) + goto cleanup; + spin_lock_bh(&c->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); params = rcu_replace_pointer(c->params, params, @@ -974,12 +1109,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list) static int __init ct_init_module(void) { - return tcf_register_action(&act_ct_ops, &ct_net_ops); + int err; + + act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0); + if (!act_ct_wq) + return -ENOMEM; + + err = tcf_ct_flow_tables_init(); + if (err) + goto err_tbl_init; + + err = tcf_register_action(&act_ct_ops, &ct_net_ops); + if (err) + goto err_register; + + return 0; + +err_tbl_init: + destroy_workqueue(act_ct_wq); +err_register: + tcf_ct_flow_tables_uninit(); + return err; } static void __exit ct_cleanup_module(void) { tcf_unregister_action(&act_ct_ops, &ct_net_ops); + tcf_ct_flow_tables_uninit(); + destroy_workqueue(act_ct_wq); } module_init(ct_init_module); From patchwork Sun Feb 23 11:45:06 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1242643 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48QNgH1hdKz9sRN for ; Sun, 23 Feb 2020 22:45:47 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727312AbgBWLpn (ORCPT ); Sun, 23 Feb 2020 06:45:43 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:49078 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727239AbgBWLpn (ORCPT ); Sun, 23 Feb 2020 06:45:43 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 23 Feb 2020 13:45:36 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01NBjZEY006598; Sun, 23 Feb 2020 13:45:36 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next 5/6] net/sched: act_ct: Offload established connections to flow table Date: Sun, 23 Feb 2020 13:45:06 +0200 Message-Id: <1582458307-17067-6-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1582458307-17067-1-git-send-email-paulb@mellanox.com> References: <1582458307-17067-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Add a ft entry when connections enter an established state and delete the connections when they leave the established state. The flow table assumes ownership of the connection. In the following patch act_ct will lookup the ct state from the FT. In future patches, drivers will register for callbacks for ft add/del events and will be able to use the information to offload the connections. Note that connection aging is managed by the FT. Signed-off-by: Paul Blakey Acked-by: Jiri Pirko --- net/sched/act_ct.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 4267d7d..b2bc885 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -150,6 +150,67 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params) return err; } +static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, + struct nf_conn *ct, + bool tcp) +{ + struct flow_offload *entry; + int err; + + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) + return; + + entry = flow_offload_alloc(ct); + if (!entry) { + WARN_ON_ONCE(1); + goto err_alloc; + } + + if (tcp) { + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + } + + err = flow_offload_add(&ct_ft->nf_ft, entry); + if (err) + goto err_add; + + return; + +err_add: + flow_offload_free(entry); +err_alloc: + clear_bit(IPS_OFFLOAD_BIT, &ct->status); +} + +static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + bool tcp = false; + + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) + return; + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + tcp = true; + if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) + return; + break; + case IPPROTO_UDP: + break; + default: + return; + } + + if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || + ct->status & IPS_SEQ_ADJUST) + return; + + tcf_ct_flow_table_add(ct_ft, ct, tcp); +} + static int tcf_ct_flow_tables_init(void) { return rhashtable_init(&zones_ht, &zones_params); @@ -603,6 +664,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, nf_conntrack_confirm(skb); } + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); + out_push: skb_push_rcsum(skb, nh_ofs); From patchwork Sun Feb 23 11:45:07 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1242645 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48QNgJ39bLz9sRs for ; Sun, 23 Feb 2020 22:45:48 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727365AbgBWLpp (ORCPT ); Sun, 23 Feb 2020 06:45:45 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:47926 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727227AbgBWLpn (ORCPT ); Sun, 23 Feb 2020 06:45:43 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 23 Feb 2020 13:45:36 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01NBjZEZ006598; Sun, 23 Feb 2020 13:45:36 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next 6/6] net/sched: act_ct: Software offload of established flows Date: Sun, 23 Feb 2020 13:45:07 +0200 Message-Id: <1582458307-17067-7-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1582458307-17067-1-git-send-email-paulb@mellanox.com> References: <1582458307-17067-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Offload nf conntrack processing by looking up the 5-tuple in the zone's flow table. The nf conntrack module will process the packets until a connection is in established state. Once in established state, the ct state pointer (nf_conn) will be restored on the skb from a successful ft lookup. Signed-off-by: Paul Blakey Acked-by: Jiri Pirko --- net/sched/act_ct.c | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 160 insertions(+), 3 deletions(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b2bc885..3592e24 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -211,6 +211,157 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, tcf_ct_flow_table_add(ct_ft, ct, tcp); } +static bool +tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, + struct flow_offload_tuple *tuple) +{ + struct flow_ports *ports; + unsigned int thoff; + struct iphdr *iph; + + if (!pskb_may_pull(skb, sizeof(*iph))) + return false; + + iph = ip_hdr(skb); + thoff = iph->ihl * 4; + + if (ip_is_fragment(iph) || + unlikely(thoff != sizeof(struct iphdr))) + return false; + + if (iph->protocol != IPPROTO_TCP && + iph->protocol != IPPROTO_UDP) + return false; + + if (iph->ttl <= 1) + return false; + + thoff = iph->ihl * 4; + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + return false; + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + + tuple->src_v4.s_addr = iph->saddr; + tuple->dst_v4.s_addr = iph->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET; + tuple->l4proto = iph->protocol; + + return true; +} + +static bool +tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, + struct flow_offload_tuple *tuple) +{ + struct flow_ports *ports; + struct ipv6hdr *ip6h; + unsigned int thoff; + + if (!pskb_may_pull(skb, sizeof(*ip6h))) + return false; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_TCP && + ip6h->nexthdr != IPPROTO_UDP) + return false; + + if (ip6h->hop_limit <= 1) + return false; + + thoff = sizeof(*ip6h); + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + return false; + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + + tuple->src_v6 = ip6h->saddr; + tuple->dst_v6 = ip6h->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET6; + tuple->l4proto = ip6h->nexthdr; + + return true; +} + +static bool tcf_ct_flow_table_check_tcp(struct flow_offload *flow, int proto, + struct sk_buff *skb, + unsigned int thoff) +{ + struct tcphdr *tcph; + + if (proto != IPPROTO_TCP) + return true; + + if (!pskb_may_pull(skb, thoff + sizeof(*tcph))) + return false; + + tcph = (void *)(skb_network_header(skb) + thoff); + if (unlikely(tcph->fin || tcph->rst)) { + flow_offload_teardown(flow); + return false; + } + + return true; +} + +static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, + struct sk_buff *skb, + u8 family) +{ + struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft; + struct flow_offload_tuple_rhash *tuplehash; + struct flow_offload_tuple tuple = {}; + enum ip_conntrack_info ctinfo; + struct flow_offload *flow; + struct nf_conn *ct; + unsigned int thoff; + u8 dir; + + /* Previously seen or loopback */ + ct = nf_ct_get(skb, &ctinfo); + if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) + return false; + + switch (family) { + case NFPROTO_IPV4: + if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple)) + return false; + break; + case NFPROTO_IPV6: + if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple)) + return false; + break; + default: + return false; + } + + tuplehash = flow_offload_lookup(nf_ft, &tuple); + if (!tuplehash) + return false; + + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + ct = flow->ct; + + ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED : + IP_CT_ESTABLISHED_REPLY; + + thoff = ip_hdr(skb)->ihl * 4; + if (!tcf_ct_flow_table_check_tcp(flow, ip_hdr(skb)->protocol, skb, + thoff)) + return false; + + nf_conntrack_get(&ct->ct_general); + nf_ct_set(skb, ct, ctinfo); + + return true; +} + static int tcf_ct_flow_tables_init(void) { return rhashtable_init(&zones_ht, &zones_params); @@ -579,6 +730,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, struct nf_hook_state state; int nh_ofs, err, retval; struct tcf_ct_params *p; + bool skip_add = false; struct nf_conn *ct; u8 family; @@ -628,6 +780,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); if (!cached) { + if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) { + skip_add = true; + goto do_nat; + } + /* Associate skb with specified zone. */ if (tmpl) { ct = nf_ct_get(skb, &ctinfo); @@ -645,6 +802,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, goto out_push; } +do_nat: ct = nf_ct_get(skb, &ctinfo); if (!ct) goto out_push; @@ -662,9 +820,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, * even if the connection is already confirmed. */ nf_conntrack_confirm(skb); - } - - tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); + } else if (!skip_add) + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); out_push: skb_push_rcsum(skb, nh_ofs);