From patchwork Tue Sep 19 09:38:16 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Haishuang Yan X-Patchwork-Id: 815373 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3xxHrN4XGdz9s7F for ; Tue, 19 Sep 2017 19:39:00 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751479AbdISJis (ORCPT ); Tue, 19 Sep 2017 05:38:48 -0400 Received: from cmccmta3.chinamobile.com ([221.176.66.81]:2977 "EHLO cmccmta3.chinamobile.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751341AbdISJiq (ORCPT ); Tue, 19 Sep 2017 05:38:46 -0400 Received: from spf.mail.chinamobile.com (unknown[172.16.121.3]) by rmmx-syy-dmz-app11-12011 (RichMail) with SMTP id 2eeb59c0e5a3e22-b5fea; Tue, 19 Sep 2017 17:38:43 +0800 (CST) X-RM-TRANSID: 2eeb59c0e5a3e22-b5fea X-RM-SPAM-FLAG: 00000000 Received: from localhost (unknown[223.105.0.240]) by rmsmtp-syy-appsvr02-12002 (RichMail) with SMTP id 2ee259c0e5a2264-5f33b; Tue, 19 Sep 2017 17:38:43 +0800 (CST) X-RM-TRANSID: 2ee259c0e5a2264-5f33b From: Haishuang Yan To: "David S. Miller" , Alexey Kuznetsov , Eric Dumazet , Wei Wang , Luca BRUNO Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Haishuang Yan Subject: [Patch v3 3/3] ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob Date: Tue, 19 Sep 2017 17:38:16 +0800 Message-Id: <1505813896-12121-3-git-send-email-yanhaishuang@cmss.chinamobile.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1505813896-12121-1-git-send-email-yanhaishuang@cmss.chinamobile.com> References: <1505813896-12121-1-git-send-email-yanhaishuang@cmss.chinamobile.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Different namespace application might require different time period in second to disable Fastopen on active TCP sockets. Tested: Simulate following similar situation that the server's data gets dropped after 3WHS. C ---- syn-data ---> S C <--- syn/ack ----- S C ---- ack --------> S S (accept & write) C? X <- data ------ S [retry and timeout] And then print netstat of TCPFastOpenBlackhole, the counter increased as expected when the firewall blackhole issue is detected and active TFO is disabled. # cat /proc/net/netstat | awk '{print $91}' TCPFastOpenBlackhole 1 Signed-off-by: Haishuang Yan --- include/net/netns/ipv4.h | 3 +++ net/ipv4/sysctl_net_ipv4.c | 20 +++++++++++--------- net/ipv4/tcp_fastopen.c | 28 ++++++++++------------------ net/ipv4/tcp_ipv4.c | 2 ++ 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 66b8335..d76edde 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,9 @@ struct netns_ipv4 { int sysctl_tcp_fastopen; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; spinlock_t tcp_fastopen_ctx_lock; + unsigned int sysctl_tcp_fastopen_blackhole_timeout; + atomic_t tfo_active_disable_times; + unsigned long tfo_active_disable_stamp; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 20e19fe..cac8dd3 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -355,11 +355,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_tcp_fastopen_blackhole_timeout); int ret; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) - tcp_fastopen_active_timeout_reset(); + atomic_set(&net->ipv4.tfo_active_disable_times, 0); return ret; } @@ -398,14 +400,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, .proc_handler = proc_dointvec }, { - .procname = "tcp_fastopen_blackhole_timeout_sec", - .data = &sysctl_tcp_fastopen_blackhole_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_tfo_blackhole_detect_timeout, - .extra1 = &zero, - }, - { .procname = "tcp_abort_on_overflow", .data = &sysctl_tcp_abort_on_overflow, .maxlen = sizeof(int), @@ -1083,6 +1077,14 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), .proc_handler = proc_tcp_fastopen_key, }, + { + .procname = "tcp_fastopen_blackhole_timeout_sec", + .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tfo_blackhole_detect_timeout, + .extra1 = &zero, + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 21783c7..f3b00a9 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -407,25 +407,16 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err) * TFO connection with data exchanges. */ -/* Default to 1hr */ -unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60; -static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0); -static unsigned long tfo_active_disable_stamp __read_mostly; - /* Disable active TFO and record current jiffies and * tfo_active_disable_times */ void tcp_fastopen_active_disable(struct sock *sk) { - atomic_inc(&tfo_active_disable_times); - tfo_active_disable_stamp = jiffies; - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE); -} + struct net *net = sock_net(sk); -/* Reset tfo_active_disable_times to 0 */ -void tcp_fastopen_active_timeout_reset(void) -{ - atomic_set(&tfo_active_disable_times, 0); + atomic_inc(&net->ipv4.tfo_active_disable_times); + net->ipv4.tfo_active_disable_stamp = jiffies; + NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); } /* Calculate timeout for tfo active disable @@ -434,17 +425,18 @@ void tcp_fastopen_active_timeout_reset(void) */ bool tcp_fastopen_active_should_disable(struct sock *sk) { - int tfo_da_times = atomic_read(&tfo_active_disable_times); + int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); int multiplier; unsigned long timeout; + unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; if (!tfo_da_times) return false; /* Limit timout to max: 2^6 * initial timeout */ multiplier = 1 << min(tfo_da_times - 1, 6); - timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ; - if (time_before(jiffies, tfo_active_disable_stamp + timeout)) + timeout = multiplier * tfo_bh_timeout * HZ; + if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout)) return true; /* Mark check bit so we can check for successful active TFO @@ -480,10 +472,10 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk) } } } else if (tp->syn_fastopen_ch && - atomic_read(&tfo_active_disable_times)) { + atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { dst = sk_dst_get(sk); if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) - tcp_fastopen_active_timeout_reset(); + atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); dst_release(dst); } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 29e042c..4b7aedb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2474,6 +2474,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); + net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; + atomic_set(&net->ipv4.tfo_active_disable_times, 0); return 0; fail: