diff mbox series

[ipsec/vti,1/2] vti: fragment IPv4 packets when DF bit is not set

Message ID 1552865877-13401-2-git-send-email-bram-yvahk@mail.wizbit.be
State Awaiting Upstream
Delegated to: David Miller
Headers show
Series Fragmentation of IPv4 in VTI | expand

Commit Message

Bram Yvakh March 17, 2019, 11:37 p.m. UTC
Only send a 'need to frag' ICMP message when the "Don't Fragment" bit
is set. If it's not set then the packet can/will be fragmented.

This fixes sending an 'need to frag' message on a client that did not
set the DF bit, i.e.:

	$ ping -s 1300 -M dont -c5 192.168.235.2
	PING 192.168.235.3 (192.168.235.3) 1300(1328) bytes of data.
	From 192.168.236.254 icmp_seq=1 Frag needed and DF set (mtu = 1214)

Signed-off-by: Bram Yvahk <bram-yvahk@mail.wizbit.be>
---
 net/ipv4/ip_vti.c  | 43 +++++++++++++++++++++++++++--------------
 net/ipv6/ip6_vti.c | 56 +++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 70 insertions(+), 29 deletions(-)

Comments

Bram Yvakh March 17, 2019, 11:52 p.m. UTC | #1
Bram Yvahk wrote:
> Only send a 'need to frag' ICMP message when the "Don't Fragment" bit
> is set. If it's not set then the packet can/will be fragmented.
>
> This fixes sending an 'need to frag' message on a client that did not
> set the DF bit, i.e.:
>
>         $ ping -s 1300 -M dont -c5 192.168.235.2
>         PING 192.168.235.3 (192.168.235.3) 1300(1328) bytes of data.
>         From 192.168.236.254 icmp_seq=1 Frag needed and DF set (mtu = 1214)
>
> Signed-off-by: Bram Yvahk <bram-yvahk@mail.wizbit.be>
> ---
>  net/ipv4/ip_vti.c  | 43 +++++++++++++++++++++++++++--------------
>  net/ipv6/ip6_vti.c | 56 +++++++++++++++++++++++++++++++++++++++---------------
>  2 files changed, 70 insertions(+), 29 deletions(-)
>
>   
Parts in the patch that I don't really like:

1) adds almost identical code in ipv4/ip_vti.c and ipv6/ip6_vti.c
    (is there a place to share this code?)

2) 'vti_tunnel_check_size'/'vti6_tunnel_check_size' functions:
   Name and implementation of this function is based on the xfrm code
   but to me "check" in the function name implies that it's (only)
   checking something but in this case it's doing a bit more: it also
updates
   the path-mtu and transmitting an ICMP message.

3) return value of 'vti6_tunnel_check_size':

   In XFRM ('xfrm4_tunnel_check_size'/'xfrm6_tunnel_check_size') return
   value of '0' and '-EMSGSIZE' is used.
  
   I opted not to follow this because of how the code is called in
   'vti6_xmit'. It has an 'err' variable but it is initialized to '-1'.
  
   What I considered:
   - let 'vti6_tunnel_check_size' return 0 and -EMSGSIZE: this matches
     with XFRM but it makes the 'err' variable in 'vti_xmit' a bit
confusing.
     Before calling 'vti6_tunnel_check_size' err=-1 means no error, after
     calling 'vti6_tunnel_check_size' err=0 means no error..
    
   - let 'vti6_tunnel_check_size' return 0 and -EMSGSIZE and use a second
     err variable in vti6_xmit (i.e.
       if ((err2 = vti6_tunnel_check_size(..)) < 0) { err=err2; goto ... }
  
   - let 'vti6_tunnel_check_size' return -1 and -EMSGSIZE: this matches
     with XFRM and works but to me it seems confusing for the function to
     return -1 when there is no error...
    
   - let 'vti6_tunnel_check_size' return a bool and set error in 'vti6_xmit'
  
   - change vti6_xmit to initialize err to 0 (but I've no idea of the impact
     of that)
    
   The least ugly solution to me was the bool so I went with that.
  
4) no error is set in vti_xmit (ip_vti): when the packet is too large it
   doesn't set an error and always returns 'NETDEV_TX_OK'. [This was already
   the case so that behaviour is kept but it doesn't look right...]
diff mbox series

Patch

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 68a21bf..5738e44 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -196,6 +196,34 @@  static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src)
 	return true;
 }
 
+static bool vti_tunnel_check_size(struct sk_buff *skb)
+{
+	int mtu;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
+			return true;
+	}
+
+	mtu = dst_mtu(skb_dst(skb));
+	if (skb->len > mtu) {
+		skb_dst_update_pmtu(skb, mtu);
+		if (skb->protocol == htons(ETH_P_IP)) {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+		} else {
+			if (mtu < IPV6_MIN_MTU)
+				mtu = IPV6_MIN_MTU;
+
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		}
+
+		return false;
+	}
+
+	return true;
+}
+
 static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 			    struct flowi *fl)
 {
@@ -205,7 +233,6 @@  static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 	struct net_device *tdev;	/* Device to other host */
 	int pkt_len = skb->len;
 	int err;
-	int mtu;
 
 	if (!dst) {
 		dev->stats.tx_carrier_errors++;
@@ -233,19 +260,7 @@  static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 		goto tx_error;
 	}
 
-	mtu = dst_mtu(dst);
-	if (skb->len > mtu) {
-		skb_dst_update_pmtu(skb, mtu);
-		if (skb->protocol == htons(ETH_P_IP)) {
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-				  htonl(mtu));
-		} else {
-			if (mtu < IPV6_MIN_MTU)
-				mtu = IPV6_MIN_MTU;
-
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		}
-
+	if (!vti_tunnel_check_size(skb)) {
 		dst_release(dst);
 		goto tx_error;
 	}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 8b6eeff..47f178c 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -436,6 +436,46 @@  static bool vti6_state_check(const struct xfrm_state *x,
 }
 
 /**
+ * vti6_tunnel_check_size - check size of packet
+ *   @skb: the outgoing socket buffer
+ *
+ * Description:
+ *   Check if packet is too large (> pmtu)
+ *
+ * Return:
+ *   true if size of packet is ok
+ *   false if packet is too large
+ **/
+static bool vti6_tunnel_check_size(struct sk_buff *skb)
+{
+	int mtu;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
+			return true;
+	}
+
+	mtu = dst_mtu(skb_dst(skb));
+	if (skb->len > mtu) {
+		skb_dst_update_pmtu(skb, mtu);
+
+		if (skb->protocol == htons(ETH_P_IPV6)) {
+			if (mtu < IPV6_MIN_MTU)
+				mtu = IPV6_MIN_MTU;
+
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		} else {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+		}
+
+		return false;
+	}
+
+	return true;
+}
+
+/**
  * vti6_xmit - send a packet
  *   @skb: the outgoing socket buffer
  *   @dev: the outgoing tunnel device
@@ -451,7 +491,6 @@  vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 	struct xfrm_state *x;
 	int pkt_len = skb->len;
 	int err = -1;
-	int mtu;
 
 	if (!dst)
 		goto tx_err_link_failure;
@@ -481,20 +520,7 @@  vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 		goto tx_err_dst_release;
 	}
 
-	mtu = dst_mtu(dst);
-	if (skb->len > mtu) {
-		skb_dst_update_pmtu(skb, mtu);
-
-		if (skb->protocol == htons(ETH_P_IPV6)) {
-			if (mtu < IPV6_MIN_MTU)
-				mtu = IPV6_MIN_MTU;
-
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		} else {
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-				  htonl(mtu));
-		}
-
+	if (!vti6_tunnel_check_size(skb)) {
 		err = -EMSGSIZE;
 		goto tx_err_dst_release;
 	}