Message ID | 1582458307-17067-5-git-send-email-paulb@mellanox.com |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | act_ct: Software offload of conntrack_in | expand |
On 23/02/2020 11:45, Paul Blakey wrote: > Use the NF flow tables infrastructure for CT offload. > > Create a nf flow table per zone. > > Next patches will add FT entries to this table, and do > the software offload. > > Signed-off-by: Paul Blakey <paulb@mellanox.com> > Reviewed-by: Jiri Pirko <jiri@mellanox.com> > --- > drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + > include/net/tc_act/tc_ct.h | 2 + > net/sched/Kconfig | 2 +- > net/sched/act_ct.c | 159 +++++++++++++++++++++++- > 4 files changed, 162 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c > index 70b5fe2..eb16136 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c > @@ -45,6 +45,7 @@ > #include <net/tc_act/tc_tunnel_key.h> > #include <net/tc_act/tc_pedit.h> > #include <net/tc_act/tc_csum.h> > +#include <net/tc_act/tc_ct.h> > #include <net/arp.h> > #include <net/ipv6_stubs.h> > #include "en.h" > diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h > index a8b1564..cf3492e 100644 > --- a/include/net/tc_act/tc_ct.h > +++ b/include/net/tc_act/tc_ct.h > @@ -25,6 +25,8 @@ struct tcf_ct_params { > u16 ct_action; > > struct rcu_head rcu; > + > + struct tcf_ct_flow_table *ct_ft; > }; > > struct tcf_ct { > diff --git a/net/sched/Kconfig b/net/sched/Kconfig > index edde0e5..bfbefb7 100644 > --- a/net/sched/Kconfig > +++ b/net/sched/Kconfig > @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY > > config NET_ACT_CT > tristate "connection tracking tc action" > - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT > + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE Is it not possible to keep sensible/old behaviour in the case of NF_FLOW_TABLE=n? (And what about NF_FLOW_TABLE=m, which is what its Kconfig help seems to advise...) > help > Say Y here to allow sending the packets to conntrack module. > > diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c > index f685c0d..4267d7d 100644 > --- a/net/sched/act_ct.c > +++ b/net/sched/act_ct.c > @@ -15,6 +15,7 @@ > #include <linux/pkt_cls.h> > #include <linux/ip.h> > #include <linux/ipv6.h> > +#include <linux/rhashtable.h> > #include <net/netlink.h> > #include <net/pkt_sched.h> > #include <net/pkt_cls.h> > @@ -24,6 +25,7 @@ > #include <uapi/linux/tc_act/tc_ct.h> > #include <net/tc_act/tc_ct.h> > > +#include <net/netfilter/nf_flow_table.h> > #include <net/netfilter/nf_conntrack.h> > #include <net/netfilter/nf_conntrack_core.h> > #include <net/netfilter/nf_conntrack_zones.h> > @@ -31,6 +33,133 @@ > #include <net/netfilter/ipv6/nf_defrag_ipv6.h> > #include <uapi/linux/netfilter/nf_nat.h> > > +static struct workqueue_struct *act_ct_wq; > + > +struct tcf_ct_flow_table { > + struct rhash_head node; /* In zones tables */ > + > + struct rcu_work rwork; > + struct nf_flowtable nf_ft; > + u16 zone; > + u32 ref; Any reason this isn't using a refcount_t? -ed
On 2/24/2020 5:58 PM, Edward Cree wrote: > On 23/02/2020 11:45, Paul Blakey wrote: >> Use the NF flow tables infrastructure for CT offload. >> >> Create a nf flow table per zone. >> >> Next patches will add FT entries to this table, and do >> the software offload. >> >> Signed-off-by: Paul Blakey <paulb@mellanox.com> >> Reviewed-by: Jiri Pirko <jiri@mellanox.com> >> --- >> drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + >> include/net/tc_act/tc_ct.h | 2 + >> net/sched/Kconfig | 2 +- >> net/sched/act_ct.c | 159 +++++++++++++++++++++++- >> 4 files changed, 162 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c >> index 70b5fe2..eb16136 100644 >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c >> @@ -45,6 +45,7 @@ >> #include <net/tc_act/tc_tunnel_key.h> >> #include <net/tc_act/tc_pedit.h> >> #include <net/tc_act/tc_csum.h> >> +#include <net/tc_act/tc_ct.h> >> #include <net/arp.h> >> #include <net/ipv6_stubs.h> >> #include "en.h" >> diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h >> index a8b1564..cf3492e 100644 >> --- a/include/net/tc_act/tc_ct.h >> +++ b/include/net/tc_act/tc_ct.h >> @@ -25,6 +25,8 @@ struct tcf_ct_params { >> u16 ct_action; >> >> struct rcu_head rcu; >> + >> + struct tcf_ct_flow_table *ct_ft; >> }; >> >> struct tcf_ct { >> diff --git a/net/sched/Kconfig b/net/sched/Kconfig >> index edde0e5..bfbefb7 100644 >> --- a/net/sched/Kconfig >> +++ b/net/sched/Kconfig >> @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY >> >> config NET_ACT_CT >> tristate "connection tracking tc action" >> - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT >> + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE > Is it not possible to keep sensible/old behaviour in the case > of NF_FLOW_TABLE=n? (And what about NF_FLOW_TABLE=m, which is > what its Kconfig help seems to advise...) No problem with it being a module. It is possible to allow compilation without flow table, but it will create confusion for people trying to offload conntrack, as it would silently not happen. > >> help >> Say Y here to allow sending the packets to conntrack module. >> >> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c >> index f685c0d..4267d7d 100644 >> --- a/net/sched/act_ct.c >> +++ b/net/sched/act_ct.c >> @@ -15,6 +15,7 @@ >> #include <linux/pkt_cls.h> >> #include <linux/ip.h> >> #include <linux/ipv6.h> >> +#include <linux/rhashtable.h> >> #include <net/netlink.h> >> #include <net/pkt_sched.h> >> #include <net/pkt_cls.h> >> @@ -24,6 +25,7 @@ >> #include <uapi/linux/tc_act/tc_ct.h> >> #include <net/tc_act/tc_ct.h> >> >> +#include <net/netfilter/nf_flow_table.h> >> #include <net/netfilter/nf_conntrack.h> >> #include <net/netfilter/nf_conntrack_core.h> >> #include <net/netfilter/nf_conntrack_zones.h> >> @@ -31,6 +33,133 @@ >> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> >> #include <uapi/linux/netfilter/nf_nat.h> >> >> +static struct workqueue_struct *act_ct_wq; >> + >> +struct tcf_ct_flow_table { >> + struct rhash_head node; /* In zones tables */ >> + >> + struct rcu_work rwork; >> + struct nf_flowtable nf_ft; >> + u16 zone; >> + u32 ref; > Any reason this isn't using a refcount_t? > -ed it is updated under lock, so there was no need for atomic as well
Hi, On Sun, Feb 23, 2020 at 01:45:05PM +0200, Paul Blakey wrote: > Use the NF flow tables infrastructure for CT offload. > > Create a nf flow table per zone. > > Next patches will add FT entries to this table, and do > the software offload. > > Signed-off-by: Paul Blakey <paulb@mellanox.com> > Reviewed-by: Jiri Pirko <jiri@mellanox.com> > --- > drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + > include/net/tc_act/tc_ct.h | 2 + > net/sched/Kconfig | 2 +- > net/sched/act_ct.c | 159 +++++++++++++++++++++++- > 4 files changed, 162 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c > index 70b5fe2..eb16136 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c > @@ -45,6 +45,7 @@ > #include <net/tc_act/tc_tunnel_key.h> > #include <net/tc_act/tc_pedit.h> > #include <net/tc_act/tc_csum.h> > +#include <net/tc_act/tc_ct.h> > #include <net/arp.h> > #include <net/ipv6_stubs.h> > #include "en.h" This chunk is not really needed for this patchset, right?
On 2/28/2020 3:45 PM, Marcelo Ricardo Leitner wrote: > Hi, > > On Sun, Feb 23, 2020 at 01:45:05PM +0200, Paul Blakey wrote: >> Use the NF flow tables infrastructure for CT offload. >> >> Create a nf flow table per zone. >> >> Next patches will add FT entries to this table, and do >> the software offload. >> >> Signed-off-by: Paul Blakey <paulb@mellanox.com> >> Reviewed-by: Jiri Pirko <jiri@mellanox.com> >> --- >> drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + >> include/net/tc_act/tc_ct.h | 2 + >> net/sched/Kconfig | 2 +- >> net/sched/act_ct.c | 159 +++++++++++++++++++++++- >> 4 files changed, 162 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c >> index 70b5fe2..eb16136 100644 >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c >> @@ -45,6 +45,7 @@ >> #include <net/tc_act/tc_tunnel_key.h> >> #include <net/tc_act/tc_pedit.h> >> #include <net/tc_act/tc_csum.h> >> +#include <net/tc_act/tc_ct.h> >> #include <net/arp.h> >> #include <net/ipv6_stubs.h> >> #include "en.h" > This chunk is not really needed for this patchset, right? Removed thanks
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 70b5fe2..eb16136 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -45,6 +45,7 @@ #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> +#include <net/tc_act/tc_ct.h> #include <net/arp.h> #include <net/ipv6_stubs.h> #include "en.h" diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index a8b1564..cf3492e 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -25,6 +25,8 @@ struct tcf_ct_params { u16 ct_action; struct rcu_head rcu; + + struct tcf_ct_flow_table *ct_ft; }; struct tcf_ct { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index edde0e5..bfbefb7 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE help Say Y here to allow sending the packets to conntrack module. diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f685c0d..4267d7d 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -15,6 +15,7 @@ #include <linux/pkt_cls.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/rhashtable.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -24,6 +25,7 @@ #include <uapi/linux/tc_act/tc_ct.h> #include <net/tc_act/tc_ct.h> +#include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -31,6 +33,133 @@ #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <uapi/linux/netfilter/nf_nat.h> +static struct workqueue_struct *act_ct_wq; + +struct tcf_ct_flow_table { + struct rhash_head node; /* In zones tables */ + + struct rcu_work rwork; + struct nf_flowtable nf_ft; + u16 zone; + u32 ref; + + bool dying; +}; + +static const struct rhashtable_params zones_params = { + .head_offset = offsetof(struct tcf_ct_flow_table, node), + .key_offset = offsetof(struct tcf_ct_flow_table, zone), + .key_len = sizeof_field(struct tcf_ct_flow_table, zone), + .automatic_shrinking = true, +}; + +static struct rhashtable zones_ht; +static DEFINE_SPINLOCK(zones_lock); + +static void tcf_ct_flow_table_put(struct tcf_ct_params *params) +{ + spin_lock(&zones_lock); + --params->ct_ft->ref; + spin_unlock(&zones_lock); +} + +static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) +{ + struct tcf_ct_flow_table *ct_ft; + + ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table, + rwork); + nf_flow_table_free(&ct_ft->nf_ft); + kfree(ct_ft); + + module_put(THIS_MODULE); +} + +static int tcf_ct_flow_table_cleanup(struct nf_flowtable *nf_ft) +{ + struct tcf_ct_flow_table *ct_ft; + int err = -EBUSY; + + spin_lock(&zones_lock); + + /* Delete the FT if there is no rules with CT action for this zone. + * + * This method will be called by the FT GC until the FT table will + * be freed. Set the dying flags to avoid multiple cleanups + * till it's actually freed in work cb. + */ + ct_ft = container_of(nf_ft, struct tcf_ct_flow_table, nf_ft); + if (ct_ft->ref == 0 && !ct_ft->dying) { + ct_ft->dying = true; + + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); + INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work); + queue_rcu_work(act_ct_wq, &ct_ft->rwork); + + err = 0; + } + + spin_unlock(&zones_lock); + + return err; +} + +static struct nf_flowtable_type flowtable_ct = { + .cleanup = tcf_ct_flow_table_cleanup, + .owner = THIS_MODULE, +}; + +static int tcf_ct_flow_table_get(struct tcf_ct_params *params) +{ + struct tcf_ct_flow_table *ct_ft; + int err = -ENOMEM; + + spin_lock(&zones_lock); + ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); + if (ct_ft) + goto take_ref; + + ct_ft = kzalloc(sizeof(*ct_ft), GFP_KERNEL); + if (!ct_ft) + goto alloc_err; + + ct_ft->zone = params->zone; + err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params); + if (err) + goto insert_err; + + ct_ft->nf_ft.type = &flowtable_ct; + err = nf_flow_table_init(&ct_ft->nf_ft); + if (err) + goto init_err; + + __module_get(THIS_MODULE); +take_ref: + params->ct_ft = ct_ft; + ct_ft->ref++; + spin_unlock(&zones_lock); + + return 0; + +init_err: + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); +insert_err: + kfree(ct_ft); +alloc_err: + spin_unlock(&zones_lock); + return err; +} + +static int tcf_ct_flow_tables_init(void) +{ + return rhashtable_init(&zones_ht, &zones_params); +} + +static void tcf_ct_flow_tables_uninit(void) +{ + rhashtable_destroy(&zones_ht); +} + static struct tc_action_ops act_ct_ops; static unsigned int ct_net_id; @@ -207,6 +336,8 @@ static void tcf_ct_params_free(struct rcu_head *head) struct tcf_ct_params *params = container_of(head, struct tcf_ct_params, rcu); + tcf_ct_flow_table_put(params); + if (params->tmpl) nf_conntrack_put(¶ms->tmpl->ct_general); kfree(params); @@ -730,6 +861,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (err) goto cleanup; + err = tcf_ct_flow_table_get(params); + if (err) + goto cleanup; + spin_lock_bh(&c->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); params = rcu_replace_pointer(c->params, params, @@ -974,12 +1109,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list) static int __init ct_init_module(void) { - return tcf_register_action(&act_ct_ops, &ct_net_ops); + int err; + + act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0); + if (!act_ct_wq) + return -ENOMEM; + + err = tcf_ct_flow_tables_init(); + if (err) + goto err_tbl_init; + + err = tcf_register_action(&act_ct_ops, &ct_net_ops); + if (err) + goto err_register; + + return 0; + +err_tbl_init: + destroy_workqueue(act_ct_wq); +err_register: + tcf_ct_flow_tables_uninit(); + return err; } static void __exit ct_cleanup_module(void) { tcf_unregister_action(&act_ct_ops, &ct_net_ops); + tcf_ct_flow_tables_uninit(); + destroy_workqueue(act_ct_wq); } module_init(ct_init_module);