Message ID | 1583251072-10396-4-git-send-email-paulb@mellanox.com |
---|---|
State | Superseded |
Delegated to: | David Miller |
Headers | show |
Series | act_ct: Software offload of conntrack_in | expand |
On Tue, Mar 03, 2020 at 05:57:52PM +0200, Paul Blakey wrote: > Offload nf conntrack processing by looking up the 5-tuple in the > zone's flow table. > > The nf conntrack module will process the packets until a connection is > in established state. Once in established state, the ct state pointer > (nf_conn) will be restored on the skb from a successful ft lookup. > > Signed-off-by: Paul Blakey <paulb@mellanox.com> > Acked-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> > --- > Changelog: > v5->v6: > Refactor tcp fin/rst check, get tcp header inside filler, and test in caller > Pull tcp with ports instead of two pulls > v4->v5: > Re-read ip/ip6 header after pulling as skb ptrs may change > Use pskb_network_may_pull instaed of pskb_may_pull > v1->v2: > Add !skip_add curly braces > Removed extra setting thoff again > Check tcp proto outside of tcf_ct_flow_table_check_tcp > > net/sched/act_ct.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 150 insertions(+), 2 deletions(-) > > diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c > index 2ab38431..23eba61 100644 > --- a/net/sched/act_ct.c > +++ b/net/sched/act_ct.c > @@ -186,6 +186,147 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, > tcf_ct_flow_table_add(ct_ft, ct, tcp); > } > > +static bool > +tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, > + struct flow_offload_tuple *tuple, > + struct tcphdr **tcph) > +{ > + struct flow_ports *ports; > + unsigned int thoff; > + struct iphdr *iph; > + > + if (!pskb_network_may_pull(skb, sizeof(*iph))) > + return false; > + > + iph = ip_hdr(skb); > + thoff = iph->ihl * 4; > + > + if (ip_is_fragment(iph) || > + unlikely(thoff != sizeof(struct iphdr))) > + return false; > + > + if (iph->protocol != IPPROTO_TCP && > + iph->protocol != IPPROTO_UDP) > + return false; > + > + if (iph->ttl <= 1) > + return false; > + > + if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ? > + thoff + sizeof(struct tcphdr) : > + thoff + sizeof(*ports))) > + return false; > + > + iph = ip_hdr(skb); > + if (iph->protocol == IPPROTO_TCP) > + *tcph = (void *)(skb_network_header(skb) + thoff); > + > + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); > + tuple->src_v4.s_addr = iph->saddr; > + tuple->dst_v4.s_addr = iph->daddr; > + tuple->src_port = ports->source; > + tuple->dst_port = ports->dest; > + tuple->l3proto = AF_INET; > + tuple->l4proto = iph->protocol; > + > + return true; > +} > + > +static bool > +tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, > + struct flow_offload_tuple *tuple, > + struct tcphdr **tcph) > +{ > + struct flow_ports *ports; > + struct ipv6hdr *ip6h; > + unsigned int thoff; > + > + if (!pskb_network_may_pull(skb, sizeof(*ip6h))) > + return false; > + > + ip6h = ipv6_hdr(skb); > + > + if (ip6h->nexthdr != IPPROTO_TCP && > + ip6h->nexthdr != IPPROTO_UDP) > + return false; > + > + if (ip6h->hop_limit <= 1) > + return false; > + > + thoff = sizeof(*ip6h); > + if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ? > + thoff + sizeof(struct tcphdr) : > + thoff + sizeof(*ports))) > + return false; > + > + ip6h = ipv6_hdr(skb); > + if (ip6h->nexthdr == IPPROTO_TCP) > + *tcph = (void *)(skb_network_header(skb) + thoff); > + > + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); > + tuple->src_v6 = ip6h->saddr; > + tuple->dst_v6 = ip6h->daddr; > + tuple->src_port = ports->source; > + tuple->dst_port = ports->dest; > + tuple->l3proto = AF_INET6; > + tuple->l4proto = ip6h->nexthdr; > + > + return true; > +} > + > +static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, > + struct sk_buff *skb, > + u8 family) > +{ > + struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft; > + struct flow_offload_tuple_rhash *tuplehash; > + struct flow_offload_tuple tuple = {}; > + enum ip_conntrack_info ctinfo; > + struct tcphdr *tcph = NULL; > + struct flow_offload *flow; > + struct nf_conn *ct; > + u8 dir; > + > + /* Previously seen or loopback */ > + ct = nf_ct_get(skb, &ctinfo); > + if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) > + return false; > + > + switch (family) { > + case NFPROTO_IPV4: > + if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph)) > + return false; > + break; > + case NFPROTO_IPV6: > + if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple, &tcph)) > + return false; > + break; > + default: > + return false; > + } > + > + tuplehash = flow_offload_lookup(nf_ft, &tuple); > + if (!tuplehash) > + return false; > + > + dir = tuplehash->tuple.dir; > + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); > + ct = flow->ct; > + > + if (tcph && (unlikely(tcph->fin || tcph->rst))) { > + flow_offload_teardown(flow); > + return false; > + } > + > + ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED : > + IP_CT_ESTABLISHED_REPLY; > + > + nf_conntrack_get(&ct->ct_general); > + nf_ct_set(skb, ct, ctinfo); > + > + return true; > +} > + > static int tcf_ct_flow_tables_init(void) > { > return rhashtable_init(&zones_ht, &zones_params); > @@ -554,6 +695,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, > struct nf_hook_state state; > int nh_ofs, err, retval; > struct tcf_ct_params *p; > + bool skip_add = false; > struct nf_conn *ct; > u8 family; > > @@ -603,6 +745,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, > */ > cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); > if (!cached) { > + if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) { > + skip_add = true; > + goto do_nat; > + } > + > /* Associate skb with specified zone. */ > if (tmpl) { > ct = nf_ct_get(skb, &ctinfo); > @@ -620,6 +767,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, > goto out_push; > } > > +do_nat: > ct = nf_ct_get(skb, &ctinfo); > if (!ct) > goto out_push; > @@ -637,10 +785,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, > * even if the connection is already confirmed. > */ > nf_conntrack_confirm(skb); > + } else if (!skip_add) { > + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); > } > > - tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); > - > out_push: > skb_push_rcsum(skb, nh_ofs); > > -- > 1.8.3.1 >
On 03/03/2020 17:57, Paul Blakey wrote: > Offload nf conntrack processing by looking up the 5-tuple in the > zone's flow table. > > The nf conntrack module will process the packets until a connection is > in established state. Once in established state, the ct state pointer > (nf_conn) will be restored on the skb from a successful ft lookup. > > Signed-off-by: Paul Blakey <paulb@mellanox.com> > Acked-by: Jiri Pirko <jiri@mellanox.com> > --- > Changelog: > v5->v6: > Refactor tcp fin/rst check, get tcp header inside filler, and test in caller > Pull tcp with ports instead of two pulls > v4->v5: > Re-read ip/ip6 header after pulling as skb ptrs may change > Use pskb_network_may_pull instaed of pskb_may_pull > v1->v2: > Add !skip_add curly braces > Removed extra setting thoff again > Check tcp proto outside of tcf_ct_flow_table_check_tcp > Looks good to me, thanks! In the future please add all reviewers to the CC list. Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> > net/sched/act_ct.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 150 insertions(+), 2 deletions(-) > > diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c > index 2ab38431..23eba61 100644 > --- a/net/sched/act_ct.c > +++ b/net/sched/act_ct.c > @@ -186,6 +186,147 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, > tcf_ct_flow_table_add(ct_ft, ct, tcp); > } > > +static bool > +tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, > + struct flow_offload_tuple *tuple, > + struct tcphdr **tcph) > +{ > + struct flow_ports *ports; > + unsigned int thoff; > + struct iphdr *iph; > + > + if (!pskb_network_may_pull(skb, sizeof(*iph))) > + return false; > + > + iph = ip_hdr(skb); > + thoff = iph->ihl * 4; > + > + if (ip_is_fragment(iph) || > + unlikely(thoff != sizeof(struct iphdr))) > + return false; > + > + if (iph->protocol != IPPROTO_TCP && > + iph->protocol != IPPROTO_UDP) > + return false; > + > + if (iph->ttl <= 1) > + return false; > + > + if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ? > + thoff + sizeof(struct tcphdr) : > + thoff + sizeof(*ports))) > + return false; > + > + iph = ip_hdr(skb); > + if (iph->protocol == IPPROTO_TCP) > + *tcph = (void *)(skb_network_header(skb) + thoff); > + > + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); > + tuple->src_v4.s_addr = iph->saddr; > + tuple->dst_v4.s_addr = iph->daddr; > + tuple->src_port = ports->source; > + tuple->dst_port = ports->dest; > + tuple->l3proto = AF_INET; > + tuple->l4proto = iph->protocol; > + > + return true; > +} > + > +static bool > +tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, > + struct flow_offload_tuple *tuple, > + struct tcphdr **tcph) > +{ > + struct flow_ports *ports; > + struct ipv6hdr *ip6h; > + unsigned int thoff; > + > + if (!pskb_network_may_pull(skb, sizeof(*ip6h))) > + return false; > + > + ip6h = ipv6_hdr(skb); > + > + if (ip6h->nexthdr != IPPROTO_TCP && > + ip6h->nexthdr != IPPROTO_UDP) > + return false; > + > + if (ip6h->hop_limit <= 1) > + return false; > + > + thoff = sizeof(*ip6h); > + if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ? > + thoff + sizeof(struct tcphdr) : > + thoff + sizeof(*ports))) > + return false; > + > + ip6h = ipv6_hdr(skb); > + if (ip6h->nexthdr == IPPROTO_TCP) > + *tcph = (void *)(skb_network_header(skb) + thoff); > + > + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); > + tuple->src_v6 = ip6h->saddr; > + tuple->dst_v6 = ip6h->daddr; > + tuple->src_port = ports->source; > + tuple->dst_port = ports->dest; > + tuple->l3proto = AF_INET6; > + tuple->l4proto = ip6h->nexthdr; > + > + return true; > +} > + > +static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, > + struct sk_buff *skb, > + u8 family) > +{ > + struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft; > + struct flow_offload_tuple_rhash *tuplehash; > + struct flow_offload_tuple tuple = {}; > + enum ip_conntrack_info ctinfo; > + struct tcphdr *tcph = NULL; > + struct flow_offload *flow; > + struct nf_conn *ct; > + u8 dir; > + > + /* Previously seen or loopback */ > + ct = nf_ct_get(skb, &ctinfo); > + if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) > + return false; > + > + switch (family) { > + case NFPROTO_IPV4: > + if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph)) > + return false; > + break; > + case NFPROTO_IPV6: > + if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple, &tcph)) > + return false; > + break; > + default: > + return false; > + } > + > + tuplehash = flow_offload_lookup(nf_ft, &tuple); > + if (!tuplehash) > + return false; > + > + dir = tuplehash->tuple.dir; > + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); > + ct = flow->ct; > + > + if (tcph && (unlikely(tcph->fin || tcph->rst))) { > + flow_offload_teardown(flow); > + return false; > + } > + > + ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED : > + IP_CT_ESTABLISHED_REPLY; > + > + nf_conntrack_get(&ct->ct_general); > + nf_ct_set(skb, ct, ctinfo); > + > + return true; > +} > + > static int tcf_ct_flow_tables_init(void) > { > return rhashtable_init(&zones_ht, &zones_params); > @@ -554,6 +695,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, > struct nf_hook_state state; > int nh_ofs, err, retval; > struct tcf_ct_params *p; > + bool skip_add = false; > struct nf_conn *ct; > u8 family; > > @@ -603,6 +745,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, > */ > cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); > if (!cached) { > + if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) { > + skip_add = true; > + goto do_nat; > + } > + > /* Associate skb with specified zone. */ > if (tmpl) { > ct = nf_ct_get(skb, &ctinfo); > @@ -620,6 +767,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, > goto out_push; > } > > +do_nat: > ct = nf_ct_get(skb, &ctinfo); > if (!ct) > goto out_push; > @@ -637,10 +785,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, > * even if the connection is already confirmed. > */ > nf_conntrack_confirm(skb); > + } else if (!skip_add) { > + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); > } > > - tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); > - > out_push: > skb_push_rcsum(skb, nh_ofs); > >
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 2ab38431..23eba61 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -186,6 +186,147 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, tcf_ct_flow_table_add(ct_ft, ct, tcp); } +static bool +tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, + struct flow_offload_tuple *tuple, + struct tcphdr **tcph) +{ + struct flow_ports *ports; + unsigned int thoff; + struct iphdr *iph; + + if (!pskb_network_may_pull(skb, sizeof(*iph))) + return false; + + iph = ip_hdr(skb); + thoff = iph->ihl * 4; + + if (ip_is_fragment(iph) || + unlikely(thoff != sizeof(struct iphdr))) + return false; + + if (iph->protocol != IPPROTO_TCP && + iph->protocol != IPPROTO_UDP) + return false; + + if (iph->ttl <= 1) + return false; + + if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ? + thoff + sizeof(struct tcphdr) : + thoff + sizeof(*ports))) + return false; + + iph = ip_hdr(skb); + if (iph->protocol == IPPROTO_TCP) + *tcph = (void *)(skb_network_header(skb) + thoff); + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_v4.s_addr = iph->saddr; + tuple->dst_v4.s_addr = iph->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET; + tuple->l4proto = iph->protocol; + + return true; +} + +static bool +tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, + struct flow_offload_tuple *tuple, + struct tcphdr **tcph) +{ + struct flow_ports *ports; + struct ipv6hdr *ip6h; + unsigned int thoff; + + if (!pskb_network_may_pull(skb, sizeof(*ip6h))) + return false; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_TCP && + ip6h->nexthdr != IPPROTO_UDP) + return false; + + if (ip6h->hop_limit <= 1) + return false; + + thoff = sizeof(*ip6h); + if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ? + thoff + sizeof(struct tcphdr) : + thoff + sizeof(*ports))) + return false; + + ip6h = ipv6_hdr(skb); + if (ip6h->nexthdr == IPPROTO_TCP) + *tcph = (void *)(skb_network_header(skb) + thoff); + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_v6 = ip6h->saddr; + tuple->dst_v6 = ip6h->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET6; + tuple->l4proto = ip6h->nexthdr; + + return true; +} + +static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, + struct sk_buff *skb, + u8 family) +{ + struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft; + struct flow_offload_tuple_rhash *tuplehash; + struct flow_offload_tuple tuple = {}; + enum ip_conntrack_info ctinfo; + struct tcphdr *tcph = NULL; + struct flow_offload *flow; + struct nf_conn *ct; + u8 dir; + + /* Previously seen or loopback */ + ct = nf_ct_get(skb, &ctinfo); + if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) + return false; + + switch (family) { + case NFPROTO_IPV4: + if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph)) + return false; + break; + case NFPROTO_IPV6: + if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple, &tcph)) + return false; + break; + default: + return false; + } + + tuplehash = flow_offload_lookup(nf_ft, &tuple); + if (!tuplehash) + return false; + + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + ct = flow->ct; + + if (tcph && (unlikely(tcph->fin || tcph->rst))) { + flow_offload_teardown(flow); + return false; + } + + ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED : + IP_CT_ESTABLISHED_REPLY; + + nf_conntrack_get(&ct->ct_general); + nf_ct_set(skb, ct, ctinfo); + + return true; +} + static int tcf_ct_flow_tables_init(void) { return rhashtable_init(&zones_ht, &zones_params); @@ -554,6 +695,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, struct nf_hook_state state; int nh_ofs, err, retval; struct tcf_ct_params *p; + bool skip_add = false; struct nf_conn *ct; u8 family; @@ -603,6 +745,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); if (!cached) { + if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) { + skip_add = true; + goto do_nat; + } + /* Associate skb with specified zone. */ if (tmpl) { ct = nf_ct_get(skb, &ctinfo); @@ -620,6 +767,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, goto out_push; } +do_nat: ct = nf_ct_get(skb, &ctinfo); if (!ct) goto out_push; @@ -637,10 +785,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, * even if the connection is already confirmed. */ nf_conntrack_confirm(skb); + } else if (!skip_add) { + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); } - tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); - out_push: skb_push_rcsum(skb, nh_ofs);