diff mbox

[net,v2] ipv4: allow local fragmentation in ip_finish_output_gso()

Message ID 1478117124-12462-1-git-send-email-lrichard@redhat.com
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Lance Richardson Nov. 2, 2016, 8:05 p.m. UTC
Some configurations (e.g. geneve interface with default
MTU of 1500 over an ethernet interface with 1500 MTU) result
in the transmission of packets that exceed the configured MTU.
While this should be considered to be a "bad" configuration,
it is still allowed and should not result in the sending
of packets that exceed the configured MTU.

Fix by dropping the assumption in ip_finish_output_gso() that
locally originated gso packets will never need fragmentation.
Basic testing using iperf (observing CPU usage and bandwidth)
have shown no measurable performance impact for traffic not
requiring fragmentation.

Fixes: c7ba65d7b649 ("net: ip: push gso skb forwarding handling down the stack")
Reported-by: Jan Tluka <jtluka@redhat.com>
Signed-off-by: Lance Richardson <lrichard@redhat.com>
---
 v2: IPSKB_FRAG_SEGS is no longer useful, remove it.

 include/net/ip.h          |  3 +--
 net/ipv4/ip_forward.c     |  2 +-
 net/ipv4/ip_output.c      |  6 ++----
 net/ipv4/ip_tunnel_core.c | 10 ----------
 net/ipv4/ipmr.c           |  2 +-
 5 files changed, 5 insertions(+), 18 deletions(-)

Comments

kernel test robot Nov. 2, 2016, 8:20 p.m. UTC | #1
Hi Lance,

[auto build test WARNING on net/master]

url:    https://github.com/0day-ci/linux/commits/Lance-Richardson/ipv4-allow-local-fragmentation-in-ip_finish_output_gso/20161103-040904
config: x86_64-randconfig-x014-201644 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   net/ipv4/ip_tunnel_core.c: In function 'iptunnel_xmit':
>> net/ipv4/ip_tunnel_core.c:66:6: warning: unused variable 'skb_iif' [-Wunused-variable]
     int skb_iif = skb->skb_iif;
         ^~~~~~~

vim +/skb_iif +66 net/ipv4/ip_tunnel_core.c

0e6fbc5b6 Pravin B Shelar   2013-06-17  50  
55c2bc143 Tom Herbert       2016-05-18  51  const struct ip_tunnel_encap_ops __rcu *
55c2bc143 Tom Herbert       2016-05-18  52  		iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
55c2bc143 Tom Herbert       2016-05-18  53  EXPORT_SYMBOL(iptun_encaps);
55c2bc143 Tom Herbert       2016-05-18  54  
058214a4d Tom Herbert       2016-05-18  55  const struct ip6_tnl_encap_ops __rcu *
058214a4d Tom Herbert       2016-05-18  56  		ip6tun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
058214a4d Tom Herbert       2016-05-18  57  EXPORT_SYMBOL(ip6tun_encaps);
058214a4d Tom Herbert       2016-05-18  58  
039f50629 Pravin B Shelar   2015-12-24  59  void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
0e6fbc5b6 Pravin B Shelar   2013-06-17  60  		   __be32 src, __be32 dst, __u8 proto,
963a88b31 Nicolas Dichtel   2013-09-02  61  		   __u8 tos, __u8 ttl, __be16 df, bool xnet)
0e6fbc5b6 Pravin B Shelar   2013-06-17  62  {
bc22a0e2e Nicolas Dichtel   2015-09-18  63  	int pkt_len = skb->len - skb_inner_network_offset(skb);
f859b0f66 Eric W. Biederman 2015-10-07  64  	struct net *net = dev_net(rt->dst.dev);
039f50629 Pravin B Shelar   2015-12-24  65  	struct net_device *dev = skb->dev;
b8247f095 Shmulik Ladkani   2016-07-18 @66  	int skb_iif = skb->skb_iif;
0e6fbc5b6 Pravin B Shelar   2013-06-17  67  	struct iphdr *iph;
0e6fbc5b6 Pravin B Shelar   2013-06-17  68  	int err;
0e6fbc5b6 Pravin B Shelar   2013-06-17  69  
963a88b31 Nicolas Dichtel   2013-09-02  70  	skb_scrub_packet(skb, xnet);
963a88b31 Nicolas Dichtel   2013-09-02  71  
bf8d85d4f Eric Dumazet      2016-09-08  72  	skb_clear_hash_if_not_l4(skb);
0e6fbc5b6 Pravin B Shelar   2013-06-17  73  	skb_dst_set(skb, &rt->dst);
0e6fbc5b6 Pravin B Shelar   2013-06-17  74  	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));

:::::: The code at line 66 was first introduced by commit
:::::: b8247f095eddfbfdba0fcecd1e3525a6cdb4b585 net: ip_finish_output_gso: If skb_gso_network_seglen exceeds MTU, allow segmentation for local udp tunneled skbs

:::::: TO: Shmulik Ladkani <shmulik.ladkani@gmail.com>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/include/net/ip.h b/include/net/ip.h
index 5413883..d3a1078 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -47,8 +47,7 @@  struct inet_skb_parm {
 #define IPSKB_REROUTED		BIT(4)
 #define IPSKB_DOREDIRECT	BIT(5)
 #define IPSKB_FRAG_PMTU		BIT(6)
-#define IPSKB_FRAG_SEGS		BIT(7)
-#define IPSKB_L3SLAVE		BIT(8)
+#define IPSKB_L3SLAVE		BIT(7)
 
 	u16			frag_max_size;
 };
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 8b4ffd2..9f0a7b9 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -117,7 +117,7 @@  int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_uses_gateway)
 		goto sr_failed;
 
-	IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
+	IPCB(skb)->flags |= IPSKB_FORWARDED;
 	mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
 	if (ip_exceeds_mtu(skb, mtu)) {
 		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 03e7f73..4971401 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -239,11 +239,9 @@  static int ip_finish_output_gso(struct net *net, struct sock *sk,
 	struct sk_buff *segs;
 	int ret = 0;
 
-	/* common case: fragmentation of segments is not allowed,
-	 * or seglen is <= mtu
+	/* common case: seglen is <= mtu
 	 */
-	if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
-	      skb_gso_validate_mtu(skb, mtu))
+	if (skb_gso_validate_mtu(skb, mtu))
 		return ip_finish_output2(net, sk, skb);
 
 	/* Slowpath -  GSO segment length is exceeding the dst MTU.
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 777bc18..0f6995b1 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -73,16 +73,6 @@  void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 	skb_dst_set(skb, &rt->dst);
 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
-	if (skb_iif && !(df & htons(IP_DF))) {
-		/* Arrived from an ingress interface, got encapsulated, with
-		 * fragmentation of encapulating frames allowed.
-		 * If skb is gso, the resulting encapsulated network segments
-		 * may exceed dst mtu.
-		 * Allow IP Fragmentation of segments.
-		 */
-		IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
-	}
-
 	/* Push down and install the IP header. */
 	skb_push(skb, sizeof(struct iphdr));
 	skb_reset_network_header(skb);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5f006e1..27089f5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1749,7 +1749,7 @@  static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 		vif->dev->stats.tx_bytes += skb->len;
 	}
 
-	IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
+	IPCB(skb)->flags |= IPSKB_FORWARDED;
 
 	/* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
 	 * not only before forwarding, but after forwarding on all output