diff mbox series

[net-next,v4] seg6: using DSCP of inner IPv4 packets

Message ID 20200824085124.2488-1-ahabdels@gmail.com
State Changes Requested
Delegated to: David Miller
Headers show
Series [net-next,v4] seg6: using DSCP of inner IPv4 packets | expand

Commit Message

Ahmed Abdelsalam Aug. 24, 2020, 8:51 a.m. UTC
This patch allows copying the DSCP from inner IPv4 header to the
outer IPv6 header, when doing SRv6 Encapsulation.

This allows forwarding packet across the SRv6 fabric based on their
original traffic class.

Signed-off-by: Ahmed Abdelsalam <ahabdels@gmail.com>
---
 net/ipv6/seg6_iptunnel.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

Comments

David Miller Aug. 25, 2020, 1:11 a.m. UTC | #1
From: Ahmed Abdelsalam <ahabdels@gmail.com>
Date: Mon, 24 Aug 2020 08:51:24 +0000

> This patch allows copying the DSCP from inner IPv4 header to the
> outer IPv6 header, when doing SRv6 Encapsulation.
> 
> This allows forwarding packet across the SRv6 fabric based on their
> original traffic class.
> 
> Signed-off-by: Ahmed Abdelsalam <ahabdels@gmail.com>

"Allows" sounds like the behavior is optional, but that is not what
is happening here.  You are making this DSCP inheritance behavior
unconditional.

I've stated that the current behavior matches what other ipv6
tunneling devices do, and therefore we should keep it that way.

Furthermore, this behavior has been in place for several releases
so you cannot change it by default.  People may be depending upon
how things work right now.

Also:

> @@ -130,6 +129,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
>  	struct ipv6_sr_hdr *isrh;
>  	int hdrlen, tot_len, err;
>  	__be32 flowlabel;
> +	u8 tos = 0, hop_limit;

Need to preserve reverse christmas tree here.
Ahmed Abdelsalam Aug. 25, 2020, 12:01 p.m. UTC | #2
On 25/08/2020 03:11, David Miller wrote:
> From: Ahmed Abdelsalam <ahabdels@gmail.com>
> Date: Mon, 24 Aug 2020 08:51:24 +0000
> 
>> This patch allows copying the DSCP from inner IPv4 header to the
>> outer IPv6 header, when doing SRv6 Encapsulation.
>>
>> This allows forwarding packet across the SRv6 fabric based on their
>> original traffic class.
>>
>> Signed-off-by: Ahmed Abdelsalam <ahabdels@gmail.com>
> 
> "Allows" sounds like the behavior is optional, but that is not what
> is happening here.  You are making this DSCP inheritance behavior
> unconditional.
> 
> I've stated that the current behavior matches what other ipv6
> tunneling devices do, and therefore we should keep it that way.
> 
> Furthermore, this behavior has been in place for several releases
> so you cannot change it by default.  People may be depending upon
> how things work right now.
> 

Ok. I added a new sysctl (seg6_inherit_inner_ipv4_dscp) to 
enable/disable the new behavior.

The sysctl will be checked in case of IPv4 traffic.
In the IPv6 case, there is no change as the code is already copying the 
DSCP from the inner IPv6 packet.

I'm sending a new patch.


> Also:
> 
>> @@ -130,6 +129,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
>>   	struct ipv6_sr_hdr *isrh;
>>   	int hdrlen, tot_len, err;
>>   	__be32 flowlabel;
>> +	u8 tos = 0, hop_limit;
> 
> Need to preserve reverse christmas tree here.
> 
Fixed in the new patch.
diff mbox series

Patch

diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 897fa59c47de..7eaa7adc296b 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -104,8 +104,7 @@  static void set_tun_src(struct net *net, struct net_device *dev,
 }
 
 /* Compute flowlabel for outer IPv6 header */
-static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
-				  struct ipv6hdr *inner_hdr)
+static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb)
 {
 	int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
 	__be32 flowlabel = 0;
@@ -116,7 +115,7 @@  static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
 		hash = rol32(hash, 16);
 		flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
 	} else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
-		flowlabel = ip6_flowlabel(inner_hdr);
+		flowlabel = ip6_flowlabel(ipv6_hdr(skb));
 	}
 	return flowlabel;
 }
@@ -130,6 +129,7 @@  int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 	struct ipv6_sr_hdr *isrh;
 	int hdrlen, tot_len, err;
 	__be32 flowlabel;
+	u8 tos = 0, hop_limit;
 
 	hdrlen = (osrh->hdrlen + 1) << 3;
 	tot_len = hdrlen + sizeof(*hdr);
@@ -138,30 +138,32 @@  int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 	if (unlikely(err))
 		return err;
 
-	inner_hdr = ipv6_hdr(skb);
-	flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
-
-	skb_push(skb, tot_len);
-	skb_reset_network_header(skb);
-	skb_mac_header_rebuild(skb);
-	hdr = ipv6_hdr(skb);
-
 	/* inherit tc, flowlabel and hlim
 	 * hlim will be decremented in ip6_forward() afterwards and
 	 * decapsulation will overwrite inner hlim with outer hlim
 	 */
 
+	flowlabel = seg6_make_flowlabel(net, skb);
+	hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
 	if (skb->protocol == htons(ETH_P_IPV6)) {
-		ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
-			     flowlabel);
-		hdr->hop_limit = inner_hdr->hop_limit;
+		inner_hdr = ipv6_hdr(skb);
+		hop_limit = inner_hdr->hop_limit;
+		tos = ip6_tclass(ip6_flowinfo(inner_hdr));
+	} else if (skb->protocol == htons(ETH_P_IP)) {
+		tos = ip_hdr(skb)->tos;
+		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 	} else {
-		ip6_flow_hdr(hdr, 0, flowlabel);
-		hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
-
 		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 	}
 
+	skb_push(skb, tot_len);
+	skb_reset_network_header(skb);
+	skb_mac_header_rebuild(skb);
+
+	hdr = ipv6_hdr(skb);
+	ip6_flow_hdr(hdr, tos, flowlabel);
+	hdr->hop_limit = hop_limit;
 	hdr->nexthdr = NEXTHDR_ROUTING;
 
 	isrh = (void *)hdr + sizeof(*hdr);