Message ID | 1583067523-1960-2-git-send-email-paulb@mellanox.com |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | [net-next,v2,1/3] net/sched: act_ct: Create nf flow table per zone | expand |
Sun, Mar 01, 2020 at 01:58:41PM CET, paulb@mellanox.com wrote: >Use the NF flow tables infrastructure for CT offload. > >Create a nf flow table per zone. > >Next patches will add FT entries to this table, and do >the software offload. > >Signed-off-by: Paul Blakey <paulb@mellanox.com> >--- >Changelog: > v1->v2: > Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep) > Free ft on last tc act instance instead of last instance + last offloaded tuple, > this removes cleanup cb and netfilter patches, and is simpler > Removed accidental mlx5/core/en_tc.c change > Removed reviewed by Jiri - patch changed > > include/net/tc_act/tc_ct.h | 2 + > net/sched/Kconfig | 2 +- > net/sched/act_ct.c | 143 ++++++++++++++++++++++++++++++++++++++++++++- > 3 files changed, 145 insertions(+), 2 deletions(-) > >diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h >index a8b1564..cf3492e 100644 >--- a/include/net/tc_act/tc_ct.h >+++ b/include/net/tc_act/tc_ct.h >@@ -25,6 +25,8 @@ struct tcf_ct_params { > u16 ct_action; > > struct rcu_head rcu; >+ >+ struct tcf_ct_flow_table *ct_ft; > }; > > struct tcf_ct { >diff --git a/net/sched/Kconfig b/net/sched/Kconfig >index edde0e5..bfbefb7 100644 >--- a/net/sched/Kconfig >+++ b/net/sched/Kconfig >@@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY > > config NET_ACT_CT > tristate "connection tracking tc action" >- depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT >+ depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE > help > Say Y here to allow sending the packets to conntrack module. > >diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c >index f685c0d..43dfdd1 100644 >--- a/net/sched/act_ct.c >+++ b/net/sched/act_ct.c >@@ -15,6 +15,7 @@ > #include <linux/pkt_cls.h> > #include <linux/ip.h> > #include <linux/ipv6.h> >+#include <linux/rhashtable.h> > #include <net/netlink.h> > #include <net/pkt_sched.h> > #include <net/pkt_cls.h> >@@ -24,6 +25,7 @@ > #include <uapi/linux/tc_act/tc_ct.h> > #include <net/tc_act/tc_ct.h> > >+#include <net/netfilter/nf_flow_table.h> > #include <net/netfilter/nf_conntrack.h> > #include <net/netfilter/nf_conntrack_core.h> > #include <net/netfilter/nf_conntrack_zones.h> >@@ -31,6 +33,117 @@ > #include <net/netfilter/ipv6/nf_defrag_ipv6.h> > #include <uapi/linux/netfilter/nf_nat.h> > >+static struct workqueue_struct *act_ct_wq; >+static struct rhashtable zones_ht; >+static DEFINE_SPINLOCK(zones_lock); >+ >+struct tcf_ct_flow_table { >+ struct rhash_head node; /* In zones tables */ >+ >+ struct rcu_work rwork; >+ struct nf_flowtable nf_ft; >+ u16 zone; >+ u32 ref; >+ >+ bool dying; >+}; >+ >+static const struct rhashtable_params zones_params = { >+ .head_offset = offsetof(struct tcf_ct_flow_table, node), >+ .key_offset = offsetof(struct tcf_ct_flow_table, zone), >+ .key_len = sizeof_field(struct tcf_ct_flow_table, zone), >+ .automatic_shrinking = true, >+}; >+ >+static struct nf_flowtable_type flowtable_ct = { >+ .owner = THIS_MODULE, >+}; >+ >+static int tcf_ct_flow_table_get(struct tcf_ct_params *params) >+{ >+ struct tcf_ct_flow_table *ct_ft, *new_ct_ft; >+ int err; >+ >+ spin_lock_bh(&zones_lock); >+ ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); >+ if (ct_ft) >+ goto take_ref; >+ >+ spin_unlock_bh(&zones_lock); >+ new_ct_ft = kzalloc(sizeof(*new_ct_ft), GFP_KERNEL); Don't unlock-lock and just use GFP_ATOMIC. >+ if (!new_ct_ft) >+ return -ENOMEM; >+ >+ new_ct_ft->zone = params->zone; >+ spin_lock_bh(&zones_lock); >+ ct_ft = rhashtable_lookup_get_insert_fast(&zones_ht, &new_ct_ft->node, >+ zones_params); >+ if (IS_ERR(ct_ft)) { >+ err = PTR_ERR(ct_ft); >+ goto err_insert; >+ } else if (ct_ft) { >+ /* Already exists */ >+ kfree(new_ct_ft); >+ goto take_ref; >+ } >+ >+ ct_ft = new_ct_ft; >+ ct_ft->nf_ft.type = &flowtable_ct; >+ err = nf_flow_table_init(&ct_ft->nf_ft); >+ if (err) >+ goto err_init; >+ >+ __module_get(THIS_MODULE); >+take_ref: >+ params->ct_ft = ct_ft; >+ ct_ft->ref++; >+ spin_unlock_bh(&zones_lock); >+ >+ return 0; >+ >+err_init: >+ rhashtable_remove_fast(&zones_ht, &new_ct_ft->node, zones_params); >+err_insert: >+ spin_unlock_bh(&zones_lock); >+ kfree(new_ct_ft); >+ return err; >+} >+ >+static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) >+{ >+ struct tcf_ct_flow_table *ct_ft; >+ >+ ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table, >+ rwork); >+ nf_flow_table_free(&ct_ft->nf_ft); >+ kfree(ct_ft); >+ >+ module_put(THIS_MODULE); >+} >+ >+static void tcf_ct_flow_table_put(struct tcf_ct_params *params) >+{ >+ struct tcf_ct_flow_table *ct_ft = params->ct_ft; >+ >+ spin_lock_bh(&zones_lock); >+ if (--params->ct_ft->ref == 0) { >+ rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); >+ INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work); >+ queue_rcu_work(act_ct_wq, &ct_ft->rwork); >+ } >+ spin_unlock_bh(&zones_lock); >+} >+ >+static int tcf_ct_flow_tables_init(void) >+{ >+ return rhashtable_init(&zones_ht, &zones_params); >+} >+ >+static void tcf_ct_flow_tables_uninit(void) >+{ >+ rhashtable_destroy(&zones_ht); >+} >+ > static struct tc_action_ops act_ct_ops; > static unsigned int ct_net_id; > >@@ -207,6 +320,8 @@ static void tcf_ct_params_free(struct rcu_head *head) > struct tcf_ct_params *params = container_of(head, > struct tcf_ct_params, rcu); > >+ tcf_ct_flow_table_put(params); >+ > if (params->tmpl) > nf_conntrack_put(¶ms->tmpl->ct_general); > kfree(params); >@@ -730,6 +845,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, > if (err) > goto cleanup; > >+ err = tcf_ct_flow_table_get(params); >+ if (err) >+ goto cleanup; >+ > spin_lock_bh(&c->tcf_lock); > goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); > params = rcu_replace_pointer(c->params, params, >@@ -974,12 +1093,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list) > > static int __init ct_init_module(void) > { >- return tcf_register_action(&act_ct_ops, &ct_net_ops); >+ int err; >+ >+ act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0); >+ if (!act_ct_wq) >+ return -ENOMEM; >+ >+ err = tcf_ct_flow_tables_init(); >+ if (err) >+ goto err_tbl_init; >+ >+ err = tcf_register_action(&act_ct_ops, &ct_net_ops); >+ if (err) >+ goto err_register; >+ >+ return 0; >+ >+err_tbl_init: >+ destroy_workqueue(act_ct_wq); >+err_register: >+ tcf_ct_flow_tables_uninit(); >+ return err; > } > > static void __exit ct_cleanup_module(void) > { > tcf_unregister_action(&act_ct_ops, &ct_net_ops); >+ tcf_ct_flow_tables_uninit(); >+ destroy_workqueue(act_ct_wq); > } > > module_init(ct_init_module); >-- >1.8.3.1 >
On 3/1/2020 5:47 PM, Jiri Pirko wrote: > Sun, Mar 01, 2020 at 01:58:41PM CET, paulb@mellanox.com wrote: >> Use the NF flow tables infrastructure for CT offload. >> >> Create a nf flow table per zone. >> >> Next patches will add FT entries to this table, and do >> the software offload. >> >> Signed-off-by: Paul Blakey <paulb@mellanox.com> >> --- >> Changelog: >> v1->v2: >> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep) >> Free ft on last tc act instance instead of last instance + last offloaded tuple, >> this removes cleanup cb and netfilter patches, and is simpler >> Removed accidental mlx5/core/en_tc.c change >> Removed reviewed by Jiri - patch changed >> >> include/net/tc_act/tc_ct.h | 2 + >> net/sched/Kconfig | 2 +- >> net/sched/act_ct.c | 143 ++++++++++++++++++++++++++++++++++++++++++++- >> 3 files changed, 145 insertions(+), 2 deletions(-) >> >> diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h >> index a8b1564..cf3492e 100644 >> --- a/include/net/tc_act/tc_ct.h >> +++ b/include/net/tc_act/tc_ct.h >> @@ -25,6 +25,8 @@ struct tcf_ct_params { >> u16 ct_action; >> >> struct rcu_head rcu; >> + >> + struct tcf_ct_flow_table *ct_ft; >> }; >> >> struct tcf_ct { >> diff --git a/net/sched/Kconfig b/net/sched/Kconfig >> index edde0e5..bfbefb7 100644 >> --- a/net/sched/Kconfig >> +++ b/net/sched/Kconfig >> @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY >> >> config NET_ACT_CT >> tristate "connection tracking tc action" >> - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT >> + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE >> help >> Say Y here to allow sending the packets to conntrack module. >> >> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c >> index f685c0d..43dfdd1 100644 >> --- a/net/sched/act_ct.c >> +++ b/net/sched/act_ct.c >> @@ -15,6 +15,7 @@ >> #include <linux/pkt_cls.h> >> #include <linux/ip.h> >> #include <linux/ipv6.h> >> +#include <linux/rhashtable.h> >> #include <net/netlink.h> >> #include <net/pkt_sched.h> >> #include <net/pkt_cls.h> >> @@ -24,6 +25,7 @@ >> #include <uapi/linux/tc_act/tc_ct.h> >> #include <net/tc_act/tc_ct.h> >> >> +#include <net/netfilter/nf_flow_table.h> >> #include <net/netfilter/nf_conntrack.h> >> #include <net/netfilter/nf_conntrack_core.h> >> #include <net/netfilter/nf_conntrack_zones.h> >> @@ -31,6 +33,117 @@ >> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> >> #include <uapi/linux/netfilter/nf_nat.h> >> >> +static struct workqueue_struct *act_ct_wq; >> +static struct rhashtable zones_ht; >> +static DEFINE_SPINLOCK(zones_lock); >> + >> +struct tcf_ct_flow_table { >> + struct rhash_head node; /* In zones tables */ >> + >> + struct rcu_work rwork; >> + struct nf_flowtable nf_ft; >> + u16 zone; >> + u32 ref; >> + >> + bool dying; >> +}; >> + >> +static const struct rhashtable_params zones_params = { >> + .head_offset = offsetof(struct tcf_ct_flow_table, node), >> + .key_offset = offsetof(struct tcf_ct_flow_table, zone), >> + .key_len = sizeof_field(struct tcf_ct_flow_table, zone), >> + .automatic_shrinking = true, >> +}; >> + >> +static struct nf_flowtable_type flowtable_ct = { >> + .owner = THIS_MODULE, >> +}; >> + >> +static int tcf_ct_flow_table_get(struct tcf_ct_params *params) >> +{ >> + struct tcf_ct_flow_table *ct_ft, *new_ct_ft; >> + int err; >> + >> + spin_lock_bh(&zones_lock); >> + ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); >> + if (ct_ft) >> + goto take_ref; >> + >> + spin_unlock_bh(&zones_lock); >> + new_ct_ft = kzalloc(sizeof(*new_ct_ft), GFP_KERNEL); > Don't unlock-lock and just use GFP_ATOMIC. Sure will do. > > >> + if (!new_ct_ft) >> + return -ENOMEM; >> + >> + new_ct_ft->zone = params->zone; >> + spin_lock_bh(&zones_lock); >> + ct_ft = rhashtable_lookup_get_insert_fast(&zones_ht, &new_ct_ft->node, >> + zones_params); >> + if (IS_ERR(ct_ft)) { >> + err = PTR_ERR(ct_ft); >> + goto err_insert; >> + } else if (ct_ft) { >> + /* Already exists */ >> + kfree(new_ct_ft); >> + goto take_ref; >> + } >> + >> + ct_ft = new_ct_ft; >> + ct_ft->nf_ft.type = &flowtable_ct; >> + err = nf_flow_table_init(&ct_ft->nf_ft); >> + if (err) >> + goto err_init; >> + >> + __module_get(THIS_MODULE); >> +take_ref: >> + params->ct_ft = ct_ft; >> + ct_ft->ref++; >> + spin_unlock_bh(&zones_lock); >> + >> + return 0; >> + >> +err_init: >> + rhashtable_remove_fast(&zones_ht, &new_ct_ft->node, zones_params); >> +err_insert: >> + spin_unlock_bh(&zones_lock); >> + kfree(new_ct_ft); >> + return err; >> +} >> + >> +static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) >> +{ >> + struct tcf_ct_flow_table *ct_ft; >> + >> + ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table, >> + rwork); >> + nf_flow_table_free(&ct_ft->nf_ft); >> + kfree(ct_ft); >> + >> + module_put(THIS_MODULE); >> +} >> + >> +static void tcf_ct_flow_table_put(struct tcf_ct_params *params) >> +{ >> + struct tcf_ct_flow_table *ct_ft = params->ct_ft; >> + >> + spin_lock_bh(&zones_lock); >> + if (--params->ct_ft->ref == 0) { >> + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); >> + INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work); >> + queue_rcu_work(act_ct_wq, &ct_ft->rwork); >> + } >> + spin_unlock_bh(&zones_lock); >> +} >> + >> +static int tcf_ct_flow_tables_init(void) >> +{ >> + return rhashtable_init(&zones_ht, &zones_params); >> +} >> + >> +static void tcf_ct_flow_tables_uninit(void) >> +{ >> + rhashtable_destroy(&zones_ht); >> +} >> + >> static struct tc_action_ops act_ct_ops; >> static unsigned int ct_net_id; >> >> @@ -207,6 +320,8 @@ static void tcf_ct_params_free(struct rcu_head *head) >> struct tcf_ct_params *params = container_of(head, >> struct tcf_ct_params, rcu); >> >> + tcf_ct_flow_table_put(params); >> + >> if (params->tmpl) >> nf_conntrack_put(¶ms->tmpl->ct_general); >> kfree(params); >> @@ -730,6 +845,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, >> if (err) >> goto cleanup; >> >> + err = tcf_ct_flow_table_get(params); >> + if (err) >> + goto cleanup; >> + >> spin_lock_bh(&c->tcf_lock); >> goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); >> params = rcu_replace_pointer(c->params, params, >> @@ -974,12 +1093,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list) >> >> static int __init ct_init_module(void) >> { >> - return tcf_register_action(&act_ct_ops, &ct_net_ops); >> + int err; >> + >> + act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0); >> + if (!act_ct_wq) >> + return -ENOMEM; >> + >> + err = tcf_ct_flow_tables_init(); >> + if (err) >> + goto err_tbl_init; >> + >> + err = tcf_register_action(&act_ct_ops, &ct_net_ops); >> + if (err) >> + goto err_register; >> + >> + return 0; >> + >> +err_tbl_init: >> + destroy_workqueue(act_ct_wq); >> +err_register: >> + tcf_ct_flow_tables_uninit(); >> + return err; >> } >> >> static void __exit ct_cleanup_module(void) >> { >> tcf_unregister_action(&act_ct_ops, &ct_net_ops); >> + tcf_ct_flow_tables_uninit(); >> + destroy_workqueue(act_ct_wq); >> } >> >> module_init(ct_init_module); >> -- >> 1.8.3.1 >>
On 3/1/2020 6:11 PM, Paul Blakey wrote: > On 3/1/2020 5:47 PM, Jiri Pirko wrote: >> Sun, Mar 01, 2020 at 01:58:41PM CET, paulb@mellanox.com wrote: >>> Use the NF flow tables infrastructure for CT offload. >>> >>> Create a nf flow table per zone. >>> >>> Next patches will add FT entries to this table, and do >>> the software offload. >>> >>> Signed-off-by: Paul Blakey <paulb@mellanox.com> >>> --- >>> Changelog: >>> v1->v2: >>> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep) >>> Free ft on last tc act instance instead of last instance + last offloaded tuple, >>> this removes cleanup cb and netfilter patches, and is simpler >>> Removed accidental mlx5/core/en_tc.c change >>> Removed reviewed by Jiri - patch changed >>> >>> include/net/tc_act/tc_ct.h | 2 + >>> net/sched/Kconfig | 2 +- >>> net/sched/act_ct.c | 143 ++++++++++++++++++++++++++++++++++++++++++++- >>> 3 files changed, 145 insertions(+), 2 deletions(-) >>> >>> diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h >>> index a8b1564..cf3492e 100644 >>> --- a/include/net/tc_act/tc_ct.h >>> +++ b/include/net/tc_act/tc_ct.h >>> @@ -25,6 +25,8 @@ struct tcf_ct_params { >>> u16 ct_action; >>> >>> struct rcu_head rcu; >>> + >>> + struct tcf_ct_flow_table *ct_ft; >>> }; >>> >>> struct tcf_ct { >>> diff --git a/net/sched/Kconfig b/net/sched/Kconfig >>> index edde0e5..bfbefb7 100644 >>> --- a/net/sched/Kconfig >>> +++ b/net/sched/Kconfig >>> @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY >>> >>> config NET_ACT_CT >>> tristate "connection tracking tc action" >>> - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT >>> + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE >>> help >>> Say Y here to allow sending the packets to conntrack module. >>> >>> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c >>> index f685c0d..43dfdd1 100644 >>> --- a/net/sched/act_ct.c >>> +++ b/net/sched/act_ct.c >>> @@ -15,6 +15,7 @@ >>> #include <linux/pkt_cls.h> >>> #include <linux/ip.h> >>> #include <linux/ipv6.h> >>> +#include <linux/rhashtable.h> >>> #include <net/netlink.h> >>> #include <net/pkt_sched.h> >>> #include <net/pkt_cls.h> >>> @@ -24,6 +25,7 @@ >>> #include <uapi/linux/tc_act/tc_ct.h> >>> #include <net/tc_act/tc_ct.h> >>> >>> +#include <net/netfilter/nf_flow_table.h> >>> #include <net/netfilter/nf_conntrack.h> >>> #include <net/netfilter/nf_conntrack_core.h> >>> #include <net/netfilter/nf_conntrack_zones.h> >>> @@ -31,6 +33,117 @@ >>> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> >>> #include <uapi/linux/netfilter/nf_nat.h> >>> >>> +static struct workqueue_struct *act_ct_wq; >>> +static struct rhashtable zones_ht; >>> +static DEFINE_SPINLOCK(zones_lock); >>> + >>> +struct tcf_ct_flow_table { >>> + struct rhash_head node; /* In zones tables */ >>> + >>> + struct rcu_work rwork; >>> + struct nf_flowtable nf_ft; >>> + u16 zone; >>> + u32 ref; >>> + >>> + bool dying; >>> +}; >>> + >>> +static const struct rhashtable_params zones_params = { >>> + .head_offset = offsetof(struct tcf_ct_flow_table, node), >>> + .key_offset = offsetof(struct tcf_ct_flow_table, zone), >>> + .key_len = sizeof_field(struct tcf_ct_flow_table, zone), >>> + .automatic_shrinking = true, >>> +}; >>> + >>> +static struct nf_flowtable_type flowtable_ct = { >>> + .owner = THIS_MODULE, >>> +}; >>> + >>> +static int tcf_ct_flow_table_get(struct tcf_ct_params *params) >>> +{ >>> + struct tcf_ct_flow_table *ct_ft, *new_ct_ft; >>> + int err; >>> + >>> + spin_lock_bh(&zones_lock); >>> + ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); >>> + if (ct_ft) >>> + goto take_ref; >>> + >>> + spin_unlock_bh(&zones_lock); >>> + new_ct_ft = kzalloc(sizeof(*new_ct_ft), GFP_KERNEL); >> Don't unlock-lock and just use GFP_ATOMIC. > Sure will do. Sent v3. >> >>> + if (!new_ct_ft) >>> + return -ENOMEM; >>> + >>> + new_ct_ft->zone = params->zone; >>> + spin_lock_bh(&zones_lock); >>> + ct_ft = rhashtable_lookup_get_insert_fast(&zones_ht, &new_ct_ft->node, >>> + zones_params); >>> + if (IS_ERR(ct_ft)) { >>> + err = PTR_ERR(ct_ft); >>> + goto err_insert; >>> + } else if (ct_ft) { >>> + /* Already exists */ >>> + kfree(new_ct_ft); >>> + goto take_ref; >>> + } >>> + >>> + ct_ft = new_ct_ft; >>> + ct_ft->nf_ft.type = &flowtable_ct; >>> + err = nf_flow_table_init(&ct_ft->nf_ft); >>> + if (err) >>> + goto err_init; >>> + >>> + __module_get(THIS_MODULE); >>> +take_ref: >>> + params->ct_ft = ct_ft; >>> + ct_ft->ref++; >>> + spin_unlock_bh(&zones_lock); >>> + >>> + return 0; >>> + >>> +err_init: >>> + rhashtable_remove_fast(&zones_ht, &new_ct_ft->node, zones_params); >>> +err_insert: >>> + spin_unlock_bh(&zones_lock); >>> + kfree(new_ct_ft); >>> + return err; >>> +} >>> + >>> +static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) >>> +{ >>> + struct tcf_ct_flow_table *ct_ft; >>> + >>> + ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table, >>> + rwork); >>> + nf_flow_table_free(&ct_ft->nf_ft); >>> + kfree(ct_ft); >>> + >>> + module_put(THIS_MODULE); >>> +} >>> + >>> +static void tcf_ct_flow_table_put(struct tcf_ct_params *params) >>> +{ >>> + struct tcf_ct_flow_table *ct_ft = params->ct_ft; >>> + >>> + spin_lock_bh(&zones_lock); >>> + if (--params->ct_ft->ref == 0) { >>> + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); >>> + INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work); >>> + queue_rcu_work(act_ct_wq, &ct_ft->rwork); >>> + } >>> + spin_unlock_bh(&zones_lock); >>> +} >>> + >>> +static int tcf_ct_flow_tables_init(void) >>> +{ >>> + return rhashtable_init(&zones_ht, &zones_params); >>> +} >>> + >>> +static void tcf_ct_flow_tables_uninit(void) >>> +{ >>> + rhashtable_destroy(&zones_ht); >>> +} >>> + >>> static struct tc_action_ops act_ct_ops; >>> static unsigned int ct_net_id; >>> >>> @@ -207,6 +320,8 @@ static void tcf_ct_params_free(struct rcu_head *head) >>> struct tcf_ct_params *params = container_of(head, >>> struct tcf_ct_params, rcu); >>> >>> + tcf_ct_flow_table_put(params); >>> + >>> if (params->tmpl) >>> nf_conntrack_put(¶ms->tmpl->ct_general); >>> kfree(params); >>> @@ -730,6 +845,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, >>> if (err) >>> goto cleanup; >>> >>> + err = tcf_ct_flow_table_get(params); >>> + if (err) >>> + goto cleanup; >>> + >>> spin_lock_bh(&c->tcf_lock); >>> goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); >>> params = rcu_replace_pointer(c->params, params, >>> @@ -974,12 +1093,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list) >>> >>> static int __init ct_init_module(void) >>> { >>> - return tcf_register_action(&act_ct_ops, &ct_net_ops); >>> + int err; >>> + >>> + act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0); >>> + if (!act_ct_wq) >>> + return -ENOMEM; >>> + >>> + err = tcf_ct_flow_tables_init(); >>> + if (err) >>> + goto err_tbl_init; >>> + >>> + err = tcf_register_action(&act_ct_ops, &ct_net_ops); >>> + if (err) >>> + goto err_register; >>> + >>> + return 0; >>> + >>> +err_tbl_init: >>> + destroy_workqueue(act_ct_wq); >>> +err_register: >>> + tcf_ct_flow_tables_uninit(); >>> + return err; >>> } >>> >>> static void __exit ct_cleanup_module(void) >>> { >>> tcf_unregister_action(&act_ct_ops, &ct_net_ops); >>> + tcf_ct_flow_tables_uninit(); >>> + destroy_workqueue(act_ct_wq); >>> } >>> >>> module_init(ct_init_module); >>> -- >>> 1.8.3.1 >>>
diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index a8b1564..cf3492e 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -25,6 +25,8 @@ struct tcf_ct_params { u16 ct_action; struct rcu_head rcu; + + struct tcf_ct_flow_table *ct_ft; }; struct tcf_ct { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index edde0e5..bfbefb7 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE help Say Y here to allow sending the packets to conntrack module. diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f685c0d..43dfdd1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -15,6 +15,7 @@ #include <linux/pkt_cls.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/rhashtable.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -24,6 +25,7 @@ #include <uapi/linux/tc_act/tc_ct.h> #include <net/tc_act/tc_ct.h> +#include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -31,6 +33,117 @@ #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <uapi/linux/netfilter/nf_nat.h> +static struct workqueue_struct *act_ct_wq; +static struct rhashtable zones_ht; +static DEFINE_SPINLOCK(zones_lock); + +struct tcf_ct_flow_table { + struct rhash_head node; /* In zones tables */ + + struct rcu_work rwork; + struct nf_flowtable nf_ft; + u16 zone; + u32 ref; + + bool dying; +}; + +static const struct rhashtable_params zones_params = { + .head_offset = offsetof(struct tcf_ct_flow_table, node), + .key_offset = offsetof(struct tcf_ct_flow_table, zone), + .key_len = sizeof_field(struct tcf_ct_flow_table, zone), + .automatic_shrinking = true, +}; + +static struct nf_flowtable_type flowtable_ct = { + .owner = THIS_MODULE, +}; + +static int tcf_ct_flow_table_get(struct tcf_ct_params *params) +{ + struct tcf_ct_flow_table *ct_ft, *new_ct_ft; + int err; + + spin_lock_bh(&zones_lock); + ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); + if (ct_ft) + goto take_ref; + + spin_unlock_bh(&zones_lock); + new_ct_ft = kzalloc(sizeof(*new_ct_ft), GFP_KERNEL); + if (!new_ct_ft) + return -ENOMEM; + + new_ct_ft->zone = params->zone; + spin_lock_bh(&zones_lock); + ct_ft = rhashtable_lookup_get_insert_fast(&zones_ht, &new_ct_ft->node, + zones_params); + if (IS_ERR(ct_ft)) { + err = PTR_ERR(ct_ft); + goto err_insert; + } else if (ct_ft) { + /* Already exists */ + kfree(new_ct_ft); + goto take_ref; + } + + ct_ft = new_ct_ft; + ct_ft->nf_ft.type = &flowtable_ct; + err = nf_flow_table_init(&ct_ft->nf_ft); + if (err) + goto err_init; + + __module_get(THIS_MODULE); +take_ref: + params->ct_ft = ct_ft; + ct_ft->ref++; + spin_unlock_bh(&zones_lock); + + return 0; + +err_init: + rhashtable_remove_fast(&zones_ht, &new_ct_ft->node, zones_params); +err_insert: + spin_unlock_bh(&zones_lock); + kfree(new_ct_ft); + return err; +} + +static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) +{ + struct tcf_ct_flow_table *ct_ft; + + ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table, + rwork); + nf_flow_table_free(&ct_ft->nf_ft); + kfree(ct_ft); + + module_put(THIS_MODULE); +} + +static void tcf_ct_flow_table_put(struct tcf_ct_params *params) +{ + struct tcf_ct_flow_table *ct_ft = params->ct_ft; + + spin_lock_bh(&zones_lock); + if (--params->ct_ft->ref == 0) { + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); + INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work); + queue_rcu_work(act_ct_wq, &ct_ft->rwork); + } + spin_unlock_bh(&zones_lock); +} + +static int tcf_ct_flow_tables_init(void) +{ + return rhashtable_init(&zones_ht, &zones_params); +} + +static void tcf_ct_flow_tables_uninit(void) +{ + rhashtable_destroy(&zones_ht); +} + static struct tc_action_ops act_ct_ops; static unsigned int ct_net_id; @@ -207,6 +320,8 @@ static void tcf_ct_params_free(struct rcu_head *head) struct tcf_ct_params *params = container_of(head, struct tcf_ct_params, rcu); + tcf_ct_flow_table_put(params); + if (params->tmpl) nf_conntrack_put(¶ms->tmpl->ct_general); kfree(params); @@ -730,6 +845,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (err) goto cleanup; + err = tcf_ct_flow_table_get(params); + if (err) + goto cleanup; + spin_lock_bh(&c->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); params = rcu_replace_pointer(c->params, params, @@ -974,12 +1093,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list) static int __init ct_init_module(void) { - return tcf_register_action(&act_ct_ops, &ct_net_ops); + int err; + + act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0); + if (!act_ct_wq) + return -ENOMEM; + + err = tcf_ct_flow_tables_init(); + if (err) + goto err_tbl_init; + + err = tcf_register_action(&act_ct_ops, &ct_net_ops); + if (err) + goto err_register; + + return 0; + +err_tbl_init: + destroy_workqueue(act_ct_wq); +err_register: + tcf_ct_flow_tables_uninit(); + return err; } static void __exit ct_cleanup_module(void) { tcf_unregister_action(&act_ct_ops, &ct_net_ops); + tcf_ct_flow_tables_uninit(); + destroy_workqueue(act_ct_wq); } module_init(ct_init_module);
Use the NF flow tables infrastructure for CT offload. Create a nf flow table per zone. Next patches will add FT entries to this table, and do the software offload. Signed-off-by: Paul Blakey <paulb@mellanox.com> --- Changelog: v1->v2: Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep) Free ft on last tc act instance instead of last instance + last offloaded tuple, this removes cleanup cb and netfilter patches, and is simpler Removed accidental mlx5/core/en_tc.c change Removed reviewed by Jiri - patch changed include/net/tc_act/tc_ct.h | 2 + net/sched/Kconfig | 2 +- net/sched/act_ct.c | 143 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 145 insertions(+), 2 deletions(-)