Message ID | 1461272848-62263-3-git-send-email-joe@ovn.org |
---|---|
State | Superseded |
Headers | show |
On Thu, Apr 21, 2016 at 2:07 PM, Joe Stringer <joe@ovn.org> wrote: > diff --git a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h > index fe99ced37227..a3b86dab2c9c 100644 > --- a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h > +++ b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h > @@ -16,17 +16,17 @@ > #define OVS_NF_DEFRAG6_BACKPORT 1 > struct sk_buff *rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, > u32 user); > +#define nf_ct_frag6_gather rpl_nf_ct_frag6_gather > +#endif /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ > + > +#ifdef OVS_NF_DEFRAG6_BACKPORT > int __init rpl_nf_ct_frag6_init(void); > void rpl_nf_ct_frag6_cleanup(void); > -void rpl_nf_ct_frag6_consume_orig(struct sk_buff *skb); > -#define nf_ct_frag6_gather rpl_nf_ct_frag6_gather > -#else /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ > +#else /* !OVS_NF_DEFRAG6_BACKPORT */ > static inline int __init rpl_nf_ct_frag6_init(void) { return 0; } > static inline void rpl_nf_ct_frag6_cleanup(void) { } > -static inline void rpl_nf_ct_frag6_consume_orig(struct sk_buff *skb) { } > -#endif /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ > +#endif /* OVS_NF_DEFRAG6_BACKPORT */ > #define nf_ct_frag6_init rpl_nf_ct_frag6_init > #define nf_ct_frag6_cleanup rpl_nf_ct_frag6_cleanup > -#define nf_ct_frag6_consume_orig rpl_nf_ct_frag6_consume_orig I'm not sure that I totally understand what is going on in this file. In particular, although not directly related to this patch, I'm not sure why we define rpl_nf_ct_frag6_init()/cleanup() to be no-ops in cases where we don't have OVS_NF_DEFRAG6_BACKPORT. On new kernels, shouldn't we be using the upstream definitions of these functions? The values initialized seem to be used by functions which aren't backported. More minor but it also seems odd that rpl_nf_ct_frag6_gather() is declared in the first #ifdef but then actually defined under OVS_NF_DEFRAG6_BACKPORT like all of the other functions. What's the reason for separating it out this way?
On 26 April 2016 at 19:44, Jesse Gross <jesse@kernel.org> wrote: > On Thu, Apr 21, 2016 at 2:07 PM, Joe Stringer <joe@ovn.org> wrote: >> diff --git a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h >> index fe99ced37227..a3b86dab2c9c 100644 >> --- a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h >> +++ b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h >> @@ -16,17 +16,17 @@ >> #define OVS_NF_DEFRAG6_BACKPORT 1 >> struct sk_buff *rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, >> u32 user); >> +#define nf_ct_frag6_gather rpl_nf_ct_frag6_gather >> +#endif /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ >> + >> +#ifdef OVS_NF_DEFRAG6_BACKPORT >> int __init rpl_nf_ct_frag6_init(void); >> void rpl_nf_ct_frag6_cleanup(void); >> -void rpl_nf_ct_frag6_consume_orig(struct sk_buff *skb); >> -#define nf_ct_frag6_gather rpl_nf_ct_frag6_gather >> -#else /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ >> +#else /* !OVS_NF_DEFRAG6_BACKPORT */ >> static inline int __init rpl_nf_ct_frag6_init(void) { return 0; } >> static inline void rpl_nf_ct_frag6_cleanup(void) { } >> -static inline void rpl_nf_ct_frag6_consume_orig(struct sk_buff *skb) { } >> -#endif /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ >> +#endif /* OVS_NF_DEFRAG6_BACKPORT */ >> #define nf_ct_frag6_init rpl_nf_ct_frag6_init >> #define nf_ct_frag6_cleanup rpl_nf_ct_frag6_cleanup >> -#define nf_ct_frag6_consume_orig rpl_nf_ct_frag6_consume_orig > > I'm not sure that I totally understand what is going on in this file. > In particular, although not directly related to this patch, I'm not > sure why we define rpl_nf_ct_frag6_init()/cleanup() to be no-ops in > cases where we don't have OVS_NF_DEFRAG6_BACKPORT. On new kernels, > shouldn't we be using the upstream definitions of these functions? The > values initialized seem to be used by functions which aren't > backported. In upstream, we depend on the exported symbol nf_ct_frag6_gather() which ensures that nf_defrag_ipv6 module is loaded when loading openvswitch. During nf_defrag_ipv6 module load, in nf_defrag_init() it will invoke nf_ct_frag6_init(), initializing the IPv6 fragmentation handlers and cache. In out-of-tree, depending on the upstream kernel, we may be able to rely on the same behaviour from upstream, however if nf_ct_frag6_gather() is not available then we will backport ipv6 defrag so we need to do this initialization ourselves. In datapath/compat.h, compat_init() is invoked during openvswitch module load which invokes nf_ct_frag6_init() to perform this initialization if necessary. When depending on a kernel which exports nf_ct_frag6_gather(), we don't need to do any initialization, so we define this function as a no-op and rely on the upstream ipv6_defrag. When nf_ct_frag6_gather() is not exported, we are unable to depend on the upstream module so we rely on our own backport. In this case, we provide the implementation of nf_ct_frag6_init(). > More minor but it also seems odd that rpl_nf_ct_frag6_gather() is > declared in the first #ifdef but then actually defined under > OVS_NF_DEFRAG6_BACKPORT like all of the other functions. What's the > reason for separating it out this way? This patch shifts the #define directly below the declaration.. I think this patch fixes what you're describing here?
On Thu, Apr 28, 2016 at 5:17 PM, Joe Stringer <joe@ovn.org> wrote: > On 26 April 2016 at 19:44, Jesse Gross <jesse@kernel.org> wrote: >> On Thu, Apr 21, 2016 at 2:07 PM, Joe Stringer <joe@ovn.org> wrote: >>> diff --git a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h >>> index fe99ced37227..a3b86dab2c9c 100644 >>> --- a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h >>> +++ b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h >>> @@ -16,17 +16,17 @@ >>> #define OVS_NF_DEFRAG6_BACKPORT 1 >>> struct sk_buff *rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, >>> u32 user); >>> +#define nf_ct_frag6_gather rpl_nf_ct_frag6_gather >>> +#endif /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ >>> + >>> +#ifdef OVS_NF_DEFRAG6_BACKPORT >>> int __init rpl_nf_ct_frag6_init(void); >>> void rpl_nf_ct_frag6_cleanup(void); >>> -void rpl_nf_ct_frag6_consume_orig(struct sk_buff *skb); >>> -#define nf_ct_frag6_gather rpl_nf_ct_frag6_gather >>> -#else /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ >>> +#else /* !OVS_NF_DEFRAG6_BACKPORT */ >>> static inline int __init rpl_nf_ct_frag6_init(void) { return 0; } >>> static inline void rpl_nf_ct_frag6_cleanup(void) { } >>> -static inline void rpl_nf_ct_frag6_consume_orig(struct sk_buff *skb) { } >>> -#endif /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ >>> +#endif /* OVS_NF_DEFRAG6_BACKPORT */ >>> #define nf_ct_frag6_init rpl_nf_ct_frag6_init >>> #define nf_ct_frag6_cleanup rpl_nf_ct_frag6_cleanup >>> -#define nf_ct_frag6_consume_orig rpl_nf_ct_frag6_consume_orig >> >> I'm not sure that I totally understand what is going on in this file. >> In particular, although not directly related to this patch, I'm not >> sure why we define rpl_nf_ct_frag6_init()/cleanup() to be no-ops in >> cases where we don't have OVS_NF_DEFRAG6_BACKPORT. On new kernels, >> shouldn't we be using the upstream definitions of these functions? The >> values initialized seem to be used by functions which aren't >> backported. > > In upstream, we depend on the exported symbol nf_ct_frag6_gather() > which ensures that nf_defrag_ipv6 module is loaded when loading > openvswitch. During nf_defrag_ipv6 module load, in nf_defrag_init() it > will invoke nf_ct_frag6_init(), initializing the IPv6 fragmentation > handlers and cache. > > In out-of-tree, depending on the upstream kernel, we may be able to > rely on the same behaviour from upstream, however if > nf_ct_frag6_gather() is not available then we will backport ipv6 > defrag so we need to do this initialization ourselves. In > datapath/compat.h, compat_init() is invoked during openvswitch module > load which invokes nf_ct_frag6_init() to perform this initialization > if necessary. > > When depending on a kernel which exports nf_ct_frag6_gather(), we > don't need to do any initialization, so we define this function as a > no-op and rely on the upstream ipv6_defrag. > > When nf_ct_frag6_gather() is not exported, we are unable to depend on > the upstream module so we rely on our own backport. In this case, we > provide the implementation of nf_ct_frag6_init(). OK, thanks for the explanation. This is intricate but it makes sense now. >> More minor but it also seems odd that rpl_nf_ct_frag6_gather() is >> declared in the first #ifdef but then actually defined under >> OVS_NF_DEFRAG6_BACKPORT like all of the other functions. What's the >> reason for separating it out this way? > > This patch shifts the #define directly below the declaration.. I think > this patch fixes what you're describing here? My comment was more related to the fact that the declaration for nf_ct_frag6_gather() was in the first check (where OVS_NF_DEFRAG6_BACKPORT is #defined) but it's actual definition is under #ifdef OVS_NF_DEFRAG6_BACKPORT. I realized that this is minor and can't cause a problem in practice but it made me wonder why the declarations were separated this way.
diff --git a/datapath/conntrack.c b/datapath/conntrack.c index 82db51567313..f721a8920678 100644 --- a/datapath/conntrack.c +++ b/datapath/conntrack.c @@ -337,21 +337,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, if (!reasm) return -EINPROGRESS; - if (skb == reasm) { - kfree_skb(skb); - return -EINVAL; - } - - /* Don't free 'skb' even though it is one of the original - * fragments, as we're going to morph it into the head. - */ - skb_get(skb); - nf_ct_frag6_consume_orig(reasm); - key->ip.proto = ipv6_hdr(reasm)->nexthdr; - skb_morph(skb, reasm); - skb->next = reasm->next; - consume_skb(reasm); ovs_cb.dp_cb.mru = IP6CB(skb)->frag_max_size; #endif /* IP frag support */ } else { diff --git a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h index fe99ced37227..a3b86dab2c9c 100644 --- a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -16,17 +16,17 @@ #define OVS_NF_DEFRAG6_BACKPORT 1 struct sk_buff *rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); +#define nf_ct_frag6_gather rpl_nf_ct_frag6_gather +#endif /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ + +#ifdef OVS_NF_DEFRAG6_BACKPORT int __init rpl_nf_ct_frag6_init(void); void rpl_nf_ct_frag6_cleanup(void); -void rpl_nf_ct_frag6_consume_orig(struct sk_buff *skb); -#define nf_ct_frag6_gather rpl_nf_ct_frag6_gather -#else /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ +#else /* !OVS_NF_DEFRAG6_BACKPORT */ static inline int __init rpl_nf_ct_frag6_init(void) { return 0; } static inline void rpl_nf_ct_frag6_cleanup(void) { } -static inline void rpl_nf_ct_frag6_consume_orig(struct sk_buff *skb) { } -#endif /* HAVE_NF_CT_FRAG6_CONSUME_ORIG */ +#endif /* OVS_NF_DEFRAG6_BACKPORT */ #define nf_ct_frag6_init rpl_nf_ct_frag6_init #define nf_ct_frag6_cleanup rpl_nf_ct_frag6_cleanup -#define nf_ct_frag6_consume_orig rpl_nf_ct_frag6_consume_orig #endif /* __NF_DEFRAG_IPV6_WRAPPER_H */ diff --git a/datapath/linux/compat/nf_conntrack_reasm.c b/datapath/linux/compat/nf_conntrack_reasm.c index 701bd15d8efd..c6dc7ebec5b5 100644 --- a/datapath/linux/compat/nf_conntrack_reasm.c +++ b/datapath/linux/compat/nf_conntrack_reasm.c @@ -62,7 +62,6 @@ struct nf_ct_frag6_skb_cb { struct inet6_skb_parm h; int offset; - struct sk_buff *orig; }; #define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) @@ -94,12 +93,6 @@ static unsigned int nf_hashfn(struct inet_frag_queue *q) return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); } -static void nf_skb_free(struct sk_buff *skb) -{ - if (NFCT_FRAG6_CB(skb)->orig) - kfree_skb(NFCT_FRAG6_CB(skb)->orig); -} - static void nf_ct_frag6_expire(unsigned long data) { struct frag_queue *fq; @@ -300,9 +293,9 @@ err: * the last and the first frames arrived and all the bits are here. */ static struct sk_buff * -nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) +nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { - struct sk_buff *fp, *op, *head = fq->q.fragments; + struct sk_buff *fp, *head = fq->q.fragments; int payload_len; u8 ecn; @@ -353,10 +346,38 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->csum = 0; clone->ip_summed = head->ip_summed; - NFCT_FRAG6_CB(clone)->orig = NULL; add_frag_mem_limit(fq->q.net, clone->truesize); } + /* morph head into last received skb: prev. + * + * This allows callers of ipv6 conntrack defrag to continue + * to use the last skb(frag) passed into the reasm engine. + * The last skb frag 'silently' turns into the full reassembled skb. + * + * Since prev is also part of q->fragments we have to clone it first. + */ + if (head != prev) { + struct sk_buff *iter; + + fp = skb_clone(prev, GFP_ATOMIC); + if (!fp) + goto out_oom; + + fp->next = prev->next; + skb_queue_walk(head, iter) { + if (iter->next != prev) + continue; + iter->next = fp; + break; + } + + skb_morph(prev, head); + prev->next = head->next; + consume_skb(head); + head = prev; + } + /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; @@ -397,21 +418,6 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) fq->q.fragments = NULL; fq->q.fragments_tail = NULL; - /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ - fp = skb_shinfo(head)->frag_list; - if (fp && NFCT_FRAG6_CB(fp)->orig == NULL) - /* at above code, head skb is divided into two skbs. */ - fp = fp->next; - - op = NFCT_FRAG6_CB(head)->orig; - for (; fp; fp = fp->next) { - struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig; - - op->next = orig; - op = orig; - NFCT_FRAG6_CB(fp)->orig = NULL; - } - return head; out_oversize: @@ -490,7 +496,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) struct sk_buff *rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { - struct sk_buff *clone; struct net_device *dev = skb->dev; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -508,42 +513,30 @@ struct sk_buff *rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) return skb; - clone = skb_clone(skb, GFP_ATOMIC); - if (clone == NULL) { - pr_debug("Can't clone skb\n"); + if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) return skb; - } - NFCT_FRAG6_CB(clone)->orig = skb; - - if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { - pr_debug("message is too short.\n"); - goto ret_orig; - } - - skb_set_transport_header(clone, fhoff); - hdr = ipv6_hdr(clone); - fhdr = (struct frag_hdr *)skb_transport_header(clone); + skb_set_transport_header(skb, fhoff); + hdr = ipv6_hdr(skb); + fhdr = (struct frag_hdr *)skb_transport_header(skb); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); - if (fq == NULL) { - pr_debug("Can't find and can't create new queue\n"); - goto ret_orig; - } + if (fq == NULL) + return skb; spin_lock_bh(&fq->q.lock); - if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { + if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); inet_frag_put(&fq->q, &nf_frags); - goto ret_orig; + return skb; } if (qp_flags(fq) == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { - ret_skb = nf_ct_frag6_reasm(fq, dev); + ret_skb = nf_ct_frag6_reasm(fq, skb, dev); if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } @@ -551,10 +544,6 @@ struct sk_buff *rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, inet_frag_put(&fq->q, &nf_frags); return ret_skb; - -ret_orig: - kfree_skb(clone); - return skb; } EXPORT_SYMBOL_GPL(rpl_nf_ct_frag6_gather); @@ -590,18 +579,6 @@ static bool rpl_ip6_frag_match(struct inet_frag_queue *q, void *a) ipv6_addr_equal(&fq->daddr, arg->dst); } -void nf_ct_frag6_consume_orig(struct sk_buff *skb) -{ - struct sk_buff *s, *s2; - - for (s = NFCT_FRAG6_CB(skb)->orig; s;) { - s2 = s->next; - s->next = NULL; - consume_skb(s); - s = s2; - } -} - static int nf_ct_net_init(struct net *net) { nf_defrag_ipv6_enable(); @@ -626,7 +603,6 @@ int rpl_nf_ct_frag6_init(void) nf_frags.hashfn = nf_hashfn; nf_frags.constructor = rpl_ip6_frag_init; nf_frags.destructor = NULL; - nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = rpl_ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire;