diff mbox

[net] net: Account for all vlan headers in skb_mac_gso_segment

Message ID 1395849071-15432-1-git-send-email-vyasevic@redhat.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Vlad Yasevich March 26, 2014, 3:51 p.m. UTC
skb_network_protocol() already accounts for multiple vlan
headers that may be present in the skb.  However, skb_mac_gso_segment()
doesn't know anything about it and assumes that skb->mac_len
is set correctly to skip all mac headers.  That may not
always be the case.  If we are simply forwarding the packet (via
bridge or macvtap), all vlan headers may not be accounted for.

A simple solution is to allow skb_network_protocol to return
the vlan depth it has calculated.  This way skb_mac_gso_segment
will correctly skip all mac headers.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 12 ++++++++----
 net/core/skbuff.c         |  2 +-
 3 files changed, 10 insertions(+), 6 deletions(-)

Comments

Eric Dumazet March 26, 2014, 4:14 p.m. UTC | #1
On Wed, 2014-03-26 at 11:51 -0400, Vlad Yasevich wrote:
> skb_network_protocol() already accounts for multiple vlan
> headers that may be present in the skb.  However, skb_mac_gso_segment()
> doesn't know anything about it and assumes that skb->mac_len
> is set correctly to skip all mac headers.  That may not
> always be the case.  If we are simply forwarding the packet (via
> bridge or macvtap), all vlan headers may not be accounted for.
> 
> A simple solution is to allow skb_network_protocol to return
> the vlan depth it has calculated.  This way skb_mac_gso_segment
> will correctly skip all mac headers.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
>  include/linux/netdevice.h |  2 +-
>  net/core/dev.c            | 12 ++++++++----
>  net/core/skbuff.c         |  2 +-
>  3 files changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index d855794..18b8c1b 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -3015,7 +3015,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
>  {
>  	return __skb_gso_segment(skb, features, true);
>  }
> -__be16 skb_network_protocol(struct sk_buff *skb);
> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth);
>  
>  static inline bool can_checksum_protocol(netdev_features_t features,
>  					 __be16 protocol)
> diff --git a/net/core/dev.c b/net/core/dev.c
> index a98f7fa..49c41e6 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2287,7 +2287,7 @@ out:
>  }
>  EXPORT_SYMBOL(skb_checksum_help);
>  
> -__be16 skb_network_protocol(struct sk_buff *skb)
> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
>  {
>  	__be16 type = skb->protocol;
>  	int vlan_depth = ETH_HLEN;
> @@ -2314,6 +2314,9 @@ __be16 skb_network_protocol(struct sk_buff *skb)
>  		vlan_depth += VLAN_HLEN;
>  	}
>  
> +	if (depth)
> +		*depth = vlan_depth;

expensive test, just always pass a non NULL pointer, to a dummy stack
variable.

> +
>  	return type;
>  }
>  
> @@ -2327,12 +2330,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
>  {
>  	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
>  	struct packet_offload *ptype;
> -	__be16 type = skb_network_protocol(skb);
> +	int vlan_depth = 0;
vlan_depth = ETH_HLEN;

> +	__be16 type = skb_network_protocol(skb, &vlan_depth);
>  
>  	if (unlikely(!type))
>  		return ERR_PTR(-EINVAL);
>  
> -	__skb_pull(skb, skb->mac_len);
> +	__skb_pull(skb, vlan_depth > skb->mac_len ? vlan_depth : skb->mac_len);

Please remove this test

	__skb_pull(skb, vlan_depth);




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich March 26, 2014, 4:20 p.m. UTC | #2
On 03/26/2014 12:14 PM, Eric Dumazet wrote:
> On Wed, 2014-03-26 at 11:51 -0400, Vlad Yasevich wrote:
>> skb_network_protocol() already accounts for multiple vlan
>> headers that may be present in the skb.  However, skb_mac_gso_segment()
>> doesn't know anything about it and assumes that skb->mac_len
>> is set correctly to skip all mac headers.  That may not
>> always be the case.  If we are simply forwarding the packet (via
>> bridge or macvtap), all vlan headers may not be accounted for.
>>
>> A simple solution is to allow skb_network_protocol to return
>> the vlan depth it has calculated.  This way skb_mac_gso_segment
>> will correctly skip all mac headers.
>>
>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>> ---
>>  include/linux/netdevice.h |  2 +-
>>  net/core/dev.c            | 12 ++++++++----
>>  net/core/skbuff.c         |  2 +-
>>  3 files changed, 10 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index d855794..18b8c1b 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -3015,7 +3015,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
>>  {
>>  	return __skb_gso_segment(skb, features, true);
>>  }
>> -__be16 skb_network_protocol(struct sk_buff *skb);
>> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth);
>>  
>>  static inline bool can_checksum_protocol(netdev_features_t features,
>>  					 __be16 protocol)
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index a98f7fa..49c41e6 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -2287,7 +2287,7 @@ out:
>>  }
>>  EXPORT_SYMBOL(skb_checksum_help);
>>  
>> -__be16 skb_network_protocol(struct sk_buff *skb)
>> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
>>  {
>>  	__be16 type = skb->protocol;
>>  	int vlan_depth = ETH_HLEN;
>> @@ -2314,6 +2314,9 @@ __be16 skb_network_protocol(struct sk_buff *skb)
>>  		vlan_depth += VLAN_HLEN;
>>  	}
>>  
>> +	if (depth)
>> +		*depth = vlan_depth;
> 
> expensive test, just always pass a non NULL pointer, to a dummy stack
> variable.
> 
>> +
>>  	return type;
>>  }
>>  
>> @@ -2327,12 +2330,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
>>  {
>>  	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
>>  	struct packet_offload *ptype;
>> -	__be16 type = skb_network_protocol(skb);
>> +	int vlan_depth = 0;
> vlan_depth = ETH_HLEN;

safer to make it skb->mac_len.

> 
>> +	__be16 type = skb_network_protocol(skb, &vlan_depth);
>>  
>>  	if (unlikely(!type))
>>  		return ERR_PTR(-EINVAL);
>>  
>> -	__skb_pull(skb, skb->mac_len);
>> +	__skb_pull(skb, vlan_depth > skb->mac_len ? vlan_depth : skb->mac_len);
> 
> Please remove this test

Couldn't mac_len be larger that ETH_HLEN already?

> 
> 	__skb_pull(skb, vlan_depth);
> 

The other variant of this patch that I tested was simply adjusting
skb->mac_len directly in skb_network_protocol().  I didn't encounter any
issues with it, but didn't like the potential side-effects for GSO
and thus didn't send it.  Can you see any issue with that approach?

Thanks
-vlad

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Neil Jerram March 31, 2014, 9:22 p.m. UTC | #3
Hi Vlad,

> skb_network_protocol() already accounts for multiple vlan
> headers that may be present in the skb.  However, skb_mac_gso_segment()
> doesn't know anything about it and assumes that skb->mac_len
> is set correctly to skip all mac headers.  That may not
> always be the case.  If we are simply forwarding the packet (via
> bridge or macvtap), all vlan headers may not be accounted for.

When is it the case that skb->mac_len does not include all VLAN tags?  If you can clearly describe when this is true, and when not, it would be nice to add a comment above the mac_len field in skbuff.h, to explain.

> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index d855794..18b8c1b 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -3015,7 +3015,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
> netdev_features_t features)
>  {
>  	return __skb_gso_segment(skb, features, true);
>  }
> -__be16 skb_network_protocol(struct sk_buff *skb);
> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth);

For me, eth_hdr_len or l2_hdr_len would be much clearer names than "depth".

>  static inline bool can_checksum_protocol(netdev_features_t features,
>  					 __be16 protocol)
> diff --git a/net/core/dev.c b/net/core/dev.c
> index a98f7fa..49c41e6 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2287,7 +2287,7 @@ out:
>  }
>  EXPORT_SYMBOL(skb_checksum_help);
> 
> -__be16 skb_network_protocol(struct sk_buff *skb)
> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
>  {
>  	__be16 type = skb->protocol;
>  	int vlan_depth = ETH_HLEN;
> @@ -2314,6 +2314,9 @@ __be16 skb_network_protocol(struct sk_buff *skb)
>  		vlan_depth += VLAN_HLEN;
>  	}
> 
> +	if (depth)
> +		*depth = vlan_depth;
> +
>  	return type;
>  }

Similarly here ...

> @@ -2327,12 +2330,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff
> *skb,
>  {
>  	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
>  	struct packet_offload *ptype;
> -	__be16 type = skb_network_protocol(skb);
> +	int vlan_depth = 0;
> +	__be16 type = skb_network_protocol(skb, &vlan_depth);
> 
>  	if (unlikely(!type))
>  		return ERR_PTR(-EINVAL);
> 
> -	__skb_pull(skb, skb->mac_len);
> +	__skb_pull(skb, vlan_depth > skb->mac_len ? vlan_depth : skb->mac_len);

... and here (obviously).

Regards,
	Neil

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich March 31, 2014, 9:37 p.m. UTC | #4
On 03/31/2014 05:22 PM, Neil Jerram wrote:
> Hi Vlad,
> 
>> skb_network_protocol() already accounts for multiple vlan
>> headers that may be present in the skb.  However, skb_mac_gso_segment()
>> doesn't know anything about it and assumes that skb->mac_len
>> is set correctly to skip all mac headers.  That may not
>> always be the case.  If we are simply forwarding the packet (via
>> bridge or macvtap), all vlan headers may not be accounted for.
> 
> When is it the case that skb->mac_len does not include all VLAN tags?  If you can clearly describe when this is true, and when not, it would be nice to add a comment above the mac_len field in skbuff.h, to explain.
> 

In the case of skb_mac_gso_segment(), the problem can be observed
if the we receive a packet with multiple vlan tags that has
to go through the gso code (ex: GSO_DODGY is set).

In this case, mac_len will be adjust for the outer header, but
not the inner header.  skb_network_header will be pointing to
the wrong offset and GSO will fail causing retransmissions.

V2 of this patch has already been applied.  I can submit a clean-up
follow-on to rename the variable.

-vlad

>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index d855794..18b8c1b 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -3015,7 +3015,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
>> netdev_features_t features)
>>  {
>>  	return __skb_gso_segment(skb, features, true);
>>  }
>> -__be16 skb_network_protocol(struct sk_buff *skb);
>> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth);
> 
> For me, eth_hdr_len or l2_hdr_len would be much clearer names than "depth".
> 
>>  static inline bool can_checksum_protocol(netdev_features_t features,
>>  					 __be16 protocol)
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index a98f7fa..49c41e6 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -2287,7 +2287,7 @@ out:
>>  }
>>  EXPORT_SYMBOL(skb_checksum_help);
>>
>> -__be16 skb_network_protocol(struct sk_buff *skb)
>> +__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
>>  {
>>  	__be16 type = skb->protocol;
>>  	int vlan_depth = ETH_HLEN;
>> @@ -2314,6 +2314,9 @@ __be16 skb_network_protocol(struct sk_buff *skb)
>>  		vlan_depth += VLAN_HLEN;
>>  	}
>>
>> +	if (depth)
>> +		*depth = vlan_depth;
>> +
>>  	return type;
>>  }
> 
> Similarly here ...
> 
>> @@ -2327,12 +2330,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff
>> *skb,
>>  {
>>  	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
>>  	struct packet_offload *ptype;
>> -	__be16 type = skb_network_protocol(skb);
>> +	int vlan_depth = 0;
>> +	__be16 type = skb_network_protocol(skb, &vlan_depth);
>>
>>  	if (unlikely(!type))
>>  		return ERR_PTR(-EINVAL);
>>
>> -	__skb_pull(skb, skb->mac_len);
>> +	__skb_pull(skb, vlan_depth > skb->mac_len ? vlan_depth : skb->mac_len);
> 
> ... and here (obviously).
> 
> Regards,
> 	Neil
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d855794..18b8c1b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3015,7 +3015,7 @@  struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
 {
 	return __skb_gso_segment(skb, features, true);
 }
-__be16 skb_network_protocol(struct sk_buff *skb);
+__be16 skb_network_protocol(struct sk_buff *skb, int *depth);
 
 static inline bool can_checksum_protocol(netdev_features_t features,
 					 __be16 protocol)
diff --git a/net/core/dev.c b/net/core/dev.c
index a98f7fa..49c41e6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2287,7 +2287,7 @@  out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
-__be16 skb_network_protocol(struct sk_buff *skb)
+__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
 {
 	__be16 type = skb->protocol;
 	int vlan_depth = ETH_HLEN;
@@ -2314,6 +2314,9 @@  __be16 skb_network_protocol(struct sk_buff *skb)
 		vlan_depth += VLAN_HLEN;
 	}
 
+	if (depth)
+		*depth = vlan_depth;
+
 	return type;
 }
 
@@ -2327,12 +2330,13 @@  struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
-	__be16 type = skb_network_protocol(skb);
+	int vlan_depth = 0;
+	__be16 type = skb_network_protocol(skb, &vlan_depth);
 
 	if (unlikely(!type))
 		return ERR_PTR(-EINVAL);
 
-	__skb_pull(skb, skb->mac_len);
+	__skb_pull(skb, vlan_depth > skb->mac_len ? vlan_depth : skb->mac_len);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
@@ -2500,7 +2504,7 @@  static netdev_features_t harmonize_features(struct sk_buff *skb,
 					    netdev_features_t features)
 {
 	if (skb->ip_summed != CHECKSUM_NONE &&
-	    !can_checksum_protocol(features, skb_network_protocol(skb))) {
+	    !can_checksum_protocol(features, skb_network_protocol(skb, NULL))) {
 		features &= ~NETIF_F_ALL_CSUM;
 	} else if (illegal_highdma(dev, skb)) {
 		features &= ~NETIF_F_SG;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 869c7af..25d8cd8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2867,7 +2867,7 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	int i = 0;
 	int pos;
 
-	proto = skb_network_protocol(head_skb);
+	proto = skb_network_protocol(head_skb, NULL);
 	if (unlikely(!proto))
 		return ERR_PTR(-EINVAL);