Message ID | 1395849071-15432-1-git-send-email-vyasevic@redhat.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
On Wed, 2014-03-26 at 11:51 -0400, Vlad Yasevich wrote: > skb_network_protocol() already accounts for multiple vlan > headers that may be present in the skb. However, skb_mac_gso_segment() > doesn't know anything about it and assumes that skb->mac_len > is set correctly to skip all mac headers. That may not > always be the case. If we are simply forwarding the packet (via > bridge or macvtap), all vlan headers may not be accounted for. > > A simple solution is to allow skb_network_protocol to return > the vlan depth it has calculated. This way skb_mac_gso_segment > will correctly skip all mac headers. > > Signed-off-by: Vlad Yasevich <vyasevic@redhat.com> > --- > include/linux/netdevice.h | 2 +- > net/core/dev.c | 12 ++++++++---- > net/core/skbuff.c | 2 +- > 3 files changed, 10 insertions(+), 6 deletions(-) > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index d855794..18b8c1b 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -3015,7 +3015,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) > { > return __skb_gso_segment(skb, features, true); > } > -__be16 skb_network_protocol(struct sk_buff *skb); > +__be16 skb_network_protocol(struct sk_buff *skb, int *depth); > > static inline bool can_checksum_protocol(netdev_features_t features, > __be16 protocol) > diff --git a/net/core/dev.c b/net/core/dev.c > index a98f7fa..49c41e6 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -2287,7 +2287,7 @@ out: > } > EXPORT_SYMBOL(skb_checksum_help); > > -__be16 skb_network_protocol(struct sk_buff *skb) > +__be16 skb_network_protocol(struct sk_buff *skb, int *depth) > { > __be16 type = skb->protocol; > int vlan_depth = ETH_HLEN; > @@ -2314,6 +2314,9 @@ __be16 skb_network_protocol(struct sk_buff *skb) > vlan_depth += VLAN_HLEN; > } > > + if (depth) > + *depth = vlan_depth; expensive test, just always pass a non NULL pointer, to a dummy stack variable. > + > return type; > } > > @@ -2327,12 +2330,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, > { > struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); > struct packet_offload *ptype; > - __be16 type = skb_network_protocol(skb); > + int vlan_depth = 0; vlan_depth = ETH_HLEN; > + __be16 type = skb_network_protocol(skb, &vlan_depth); > > if (unlikely(!type)) > return ERR_PTR(-EINVAL); > > - __skb_pull(skb, skb->mac_len); > + __skb_pull(skb, vlan_depth > skb->mac_len ? vlan_depth : skb->mac_len); Please remove this test __skb_pull(skb, vlan_depth); -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/26/2014 12:14 PM, Eric Dumazet wrote: > On Wed, 2014-03-26 at 11:51 -0400, Vlad Yasevich wrote: >> skb_network_protocol() already accounts for multiple vlan >> headers that may be present in the skb. However, skb_mac_gso_segment() >> doesn't know anything about it and assumes that skb->mac_len >> is set correctly to skip all mac headers. That may not >> always be the case. If we are simply forwarding the packet (via >> bridge or macvtap), all vlan headers may not be accounted for. >> >> A simple solution is to allow skb_network_protocol to return >> the vlan depth it has calculated. This way skb_mac_gso_segment >> will correctly skip all mac headers. >> >> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com> >> --- >> include/linux/netdevice.h | 2 +- >> net/core/dev.c | 12 ++++++++---- >> net/core/skbuff.c | 2 +- >> 3 files changed, 10 insertions(+), 6 deletions(-) >> >> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h >> index d855794..18b8c1b 100644 >> --- a/include/linux/netdevice.h >> +++ b/include/linux/netdevice.h >> @@ -3015,7 +3015,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) >> { >> return __skb_gso_segment(skb, features, true); >> } >> -__be16 skb_network_protocol(struct sk_buff *skb); >> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth); >> >> static inline bool can_checksum_protocol(netdev_features_t features, >> __be16 protocol) >> diff --git a/net/core/dev.c b/net/core/dev.c >> index a98f7fa..49c41e6 100644 >> --- a/net/core/dev.c >> +++ b/net/core/dev.c >> @@ -2287,7 +2287,7 @@ out: >> } >> EXPORT_SYMBOL(skb_checksum_help); >> >> -__be16 skb_network_protocol(struct sk_buff *skb) >> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth) >> { >> __be16 type = skb->protocol; >> int vlan_depth = ETH_HLEN; >> @@ -2314,6 +2314,9 @@ __be16 skb_network_protocol(struct sk_buff *skb) >> vlan_depth += VLAN_HLEN; >> } >> >> + if (depth) >> + *depth = vlan_depth; > > expensive test, just always pass a non NULL pointer, to a dummy stack > variable. > >> + >> return type; >> } >> >> @@ -2327,12 +2330,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, >> { >> struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); >> struct packet_offload *ptype; >> - __be16 type = skb_network_protocol(skb); >> + int vlan_depth = 0; > vlan_depth = ETH_HLEN; safer to make it skb->mac_len. > >> + __be16 type = skb_network_protocol(skb, &vlan_depth); >> >> if (unlikely(!type)) >> return ERR_PTR(-EINVAL); >> >> - __skb_pull(skb, skb->mac_len); >> + __skb_pull(skb, vlan_depth > skb->mac_len ? vlan_depth : skb->mac_len); > > Please remove this test Couldn't mac_len be larger that ETH_HLEN already? > > __skb_pull(skb, vlan_depth); > The other variant of this patch that I tested was simply adjusting skb->mac_len directly in skb_network_protocol(). I didn't encounter any issues with it, but didn't like the potential side-effects for GSO and thus didn't send it. Can you see any issue with that approach? Thanks -vlad -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Vlad, > skb_network_protocol() already accounts for multiple vlan > headers that may be present in the skb. However, skb_mac_gso_segment() > doesn't know anything about it and assumes that skb->mac_len > is set correctly to skip all mac headers. That may not > always be the case. If we are simply forwarding the packet (via > bridge or macvtap), all vlan headers may not be accounted for. When is it the case that skb->mac_len does not include all VLAN tags? If you can clearly describe when this is true, and when not, it would be nice to add a comment above the mac_len field in skbuff.h, to explain. > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index d855794..18b8c1b 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -3015,7 +3015,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, > netdev_features_t features) > { > return __skb_gso_segment(skb, features, true); > } > -__be16 skb_network_protocol(struct sk_buff *skb); > +__be16 skb_network_protocol(struct sk_buff *skb, int *depth); For me, eth_hdr_len or l2_hdr_len would be much clearer names than "depth". > static inline bool can_checksum_protocol(netdev_features_t features, > __be16 protocol) > diff --git a/net/core/dev.c b/net/core/dev.c > index a98f7fa..49c41e6 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -2287,7 +2287,7 @@ out: > } > EXPORT_SYMBOL(skb_checksum_help); > > -__be16 skb_network_protocol(struct sk_buff *skb) > +__be16 skb_network_protocol(struct sk_buff *skb, int *depth) > { > __be16 type = skb->protocol; > int vlan_depth = ETH_HLEN; > @@ -2314,6 +2314,9 @@ __be16 skb_network_protocol(struct sk_buff *skb) > vlan_depth += VLAN_HLEN; > } > > + if (depth) > + *depth = vlan_depth; > + > return type; > } Similarly here ... > @@ -2327,12 +2330,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff > *skb, > { > struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); > struct packet_offload *ptype; > - __be16 type = skb_network_protocol(skb); > + int vlan_depth = 0; > + __be16 type = skb_network_protocol(skb, &vlan_depth); > > if (unlikely(!type)) > return ERR_PTR(-EINVAL); > > - __skb_pull(skb, skb->mac_len); > + __skb_pull(skb, vlan_depth > skb->mac_len ? vlan_depth : skb->mac_len); ... and here (obviously). Regards, Neil -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/31/2014 05:22 PM, Neil Jerram wrote: > Hi Vlad, > >> skb_network_protocol() already accounts for multiple vlan >> headers that may be present in the skb. However, skb_mac_gso_segment() >> doesn't know anything about it and assumes that skb->mac_len >> is set correctly to skip all mac headers. That may not >> always be the case. If we are simply forwarding the packet (via >> bridge or macvtap), all vlan headers may not be accounted for. > > When is it the case that skb->mac_len does not include all VLAN tags? If you can clearly describe when this is true, and when not, it would be nice to add a comment above the mac_len field in skbuff.h, to explain. > In the case of skb_mac_gso_segment(), the problem can be observed if the we receive a packet with multiple vlan tags that has to go through the gso code (ex: GSO_DODGY is set). In this case, mac_len will be adjust for the outer header, but not the inner header. skb_network_header will be pointing to the wrong offset and GSO will fail causing retransmissions. V2 of this patch has already been applied. I can submit a clean-up follow-on to rename the variable. -vlad >> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h >> index d855794..18b8c1b 100644 >> --- a/include/linux/netdevice.h >> +++ b/include/linux/netdevice.h >> @@ -3015,7 +3015,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, >> netdev_features_t features) >> { >> return __skb_gso_segment(skb, features, true); >> } >> -__be16 skb_network_protocol(struct sk_buff *skb); >> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth); > > For me, eth_hdr_len or l2_hdr_len would be much clearer names than "depth". > >> static inline bool can_checksum_protocol(netdev_features_t features, >> __be16 protocol) >> diff --git a/net/core/dev.c b/net/core/dev.c >> index a98f7fa..49c41e6 100644 >> --- a/net/core/dev.c >> +++ b/net/core/dev.c >> @@ -2287,7 +2287,7 @@ out: >> } >> EXPORT_SYMBOL(skb_checksum_help); >> >> -__be16 skb_network_protocol(struct sk_buff *skb) >> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth) >> { >> __be16 type = skb->protocol; >> int vlan_depth = ETH_HLEN; >> @@ -2314,6 +2314,9 @@ __be16 skb_network_protocol(struct sk_buff *skb) >> vlan_depth += VLAN_HLEN; >> } >> >> + if (depth) >> + *depth = vlan_depth; >> + >> return type; >> } > > Similarly here ... > >> @@ -2327,12 +2330,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff >> *skb, >> { >> struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); >> struct packet_offload *ptype; >> - __be16 type = skb_network_protocol(skb); >> + int vlan_depth = 0; >> + __be16 type = skb_network_protocol(skb, &vlan_depth); >> >> if (unlikely(!type)) >> return ERR_PTR(-EINVAL); >> >> - __skb_pull(skb, skb->mac_len); >> + __skb_pull(skb, vlan_depth > skb->mac_len ? vlan_depth : skb->mac_len); > > ... and here (obviously). > > Regards, > Neil > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d855794..18b8c1b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3015,7 +3015,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { return __skb_gso_segment(skb, features, true); } -__be16 skb_network_protocol(struct sk_buff *skb); +__be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, __be16 protocol) diff --git a/net/core/dev.c b/net/core/dev.c index a98f7fa..49c41e6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2287,7 +2287,7 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -__be16 skb_network_protocol(struct sk_buff *skb) +__be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; @@ -2314,6 +2314,9 @@ __be16 skb_network_protocol(struct sk_buff *skb) vlan_depth += VLAN_HLEN; } + if (depth) + *depth = vlan_depth; + return type; } @@ -2327,12 +2330,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; - __be16 type = skb_network_protocol(skb); + int vlan_depth = 0; + __be16 type = skb_network_protocol(skb, &vlan_depth); if (unlikely(!type)) return ERR_PTR(-EINVAL); - __skb_pull(skb, skb->mac_len); + __skb_pull(skb, vlan_depth > skb->mac_len ? vlan_depth : skb->mac_len); rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { @@ -2500,7 +2504,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, netdev_features_t features) { if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, skb_network_protocol(skb))) { + !can_checksum_protocol(features, skb_network_protocol(skb, NULL))) { features &= ~NETIF_F_ALL_CSUM; } else if (illegal_highdma(dev, skb)) { features &= ~NETIF_F_SG; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 869c7af..25d8cd8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2867,7 +2867,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int i = 0; int pos; - proto = skb_network_protocol(head_skb); + proto = skb_network_protocol(head_skb, NULL); if (unlikely(!proto)) return ERR_PTR(-EINVAL);
skb_network_protocol() already accounts for multiple vlan headers that may be present in the skb. However, skb_mac_gso_segment() doesn't know anything about it and assumes that skb->mac_len is set correctly to skip all mac headers. That may not always be the case. If we are simply forwarding the packet (via bridge or macvtap), all vlan headers may not be accounted for. A simple solution is to allow skb_network_protocol to return the vlan depth it has calculated. This way skb_mac_gso_segment will correctly skip all mac headers. Signed-off-by: Vlad Yasevich <vyasevic@redhat.com> --- include/linux/netdevice.h | 2 +- net/core/dev.c | 12 ++++++++---- net/core/skbuff.c | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-)