diff mbox series

[v2] ipv4: Namespaceify tcp_fastopen knob

Message ID 1505301598-12681-1-git-send-email-yanhaishuang@cmss.chinamobile.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [v2] ipv4: Namespaceify tcp_fastopen knob | expand

Commit Message

Haishuang Yan Sept. 13, 2017, 11:19 a.m. UTC
Different namespace application might require enable TCP Fast Open
feature independently of the host.

Reported-by: Luca BRUNO <lucab@debian.org>
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>

---
Change since v2:
  * Remove unrelated change by mistake
---
 include/net/netns/ipv4.h   |  2 ++
 include/net/tcp.h          |  1 -
 net/ipv4/af_inet.c         |  7 ++++---
 net/ipv4/sysctl_net_ipv4.c | 42 +++++++++++++++++++++---------------------
 net/ipv4/tcp.c             |  4 ++--
 net/ipv4/tcp_fastopen.c    | 13 ++++++-------
 net/ipv4/tcp_ipv4.c        |  2 ++
 7 files changed, 37 insertions(+), 34 deletions(-)

Comments

Eric Dumazet Sept. 13, 2017, 12:44 p.m. UTC | #1
On Wed, 2017-09-13 at 19:19 +0800, Haishuang Yan wrote:
> Different namespace application might require enable TCP Fast Open
> feature independently of the host.
> 

Poor changelog, no actual description / list of sysctls that are moved
to per netns.

And looking at the patch, it seems your conversion is not complete.

So I will ask you to provide more evidence that you tested your patch
next time you submit it.

> Reported-by: Luca BRUNO <lucab@debian.org>
> Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
> 
> ---
> Change since v2:
>   * Remove unrelated change by mistake
> ---
>  include/net/netns/ipv4.h   |  2 ++
>  include/net/tcp.h          |  1 -
>  net/ipv4/af_inet.c         |  7 ++++---
>  net/ipv4/sysctl_net_ipv4.c | 42 +++++++++++++++++++++---------------------
>  net/ipv4/tcp.c             |  4 ++--
>  net/ipv4/tcp_fastopen.c    | 13 ++++++-------
>  net/ipv4/tcp_ipv4.c        |  2 ++
>  7 files changed, 37 insertions(+), 34 deletions(-)
> 
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 305e031..ea0953b 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -128,6 +128,8 @@ struct netns_ipv4 {
>  	struct inet_timewait_death_row tcp_death_row;
>  	int sysctl_max_syn_backlog;
>  	int sysctl_tcp_max_orphans;
> +	int sysctl_tcp_fastopen;
> +	unsigned int sysctl_tcp_fastopen_blackhole_timeout;
>  
>  #ifdef CONFIG_NET_L3_MASTER_DEV
>  	int sysctl_udp_l3mdev_accept;
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index ac2d998..e4cc0dd 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -240,7 +240,6 @@
>  
> 
>  /* sysctl variables for tcp */
> -extern int sysctl_tcp_fastopen;
>  extern int sysctl_tcp_retrans_collapse;
>  extern int sysctl_tcp_stdurg;
>  extern int sysctl_tcp_rfc1337;
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index e31108e..309b849 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
>  {
>  	struct sock *sk = sock->sk;
>  	unsigned char old_state;
> -	int err;
> +	int err, tcp_fastopen;
>  
>  	lock_sock(sk);
>  
> @@ -217,8 +217,9 @@ int inet_listen(struct socket *sock, int backlog)
>  		 * because the socket was in TCP_LISTEN state previously but
>  		 * was shutdown() rather than close().
>  		 */
> -		if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
> -		    (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
> +		tcp_fastopen =  sock_net(sk)->ipv4.sysctl_tcp_fastopen;
> +		if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
> +		    (tcp_fastopen & TFO_SERVER_ENABLE) &&
>  		    !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
>  			fastopen_queue_tune(sk, backlog);
>  			tcp_fastopen_init_key_once(true);
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 4f26c8d3..30ebeb9 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -394,27 +394,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
>  		.proc_handler	= proc_dointvec
>  	},
>  	{
> -		.procname	= "tcp_fastopen",
> -		.data		= &sysctl_tcp_fastopen,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "tcp_fastopen_key",
> -		.mode		= 0600,
> -		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
> -		.proc_handler	= proc_tcp_fastopen_key,
> -	},
> -	{
> -		.procname	= "tcp_fastopen_blackhole_timeout_sec",
> -		.data		= &sysctl_tcp_fastopen_blackhole_timeout,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_tfo_blackhole_detect_timeout,
> -		.extra1		= &zero,
> -	},
> -	{
>  		.procname	= "tcp_abort_on_overflow",
>  		.data		= &sysctl_tcp_abort_on_overflow,
>  		.maxlen		= sizeof(int),
> @@ -1085,6 +1064,27 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec
>  	},
> +	{
> +		.procname	= "tcp_fastopen",
> +		.data		= &init_net.ipv4.sysctl_tcp_fastopen,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "tcp_fastopen_key",

But proc_tcp_fastopen_key() is not per netns yet.



> +		.mode		= 0600,
> +		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
> +		.proc_handler	= proc_tcp_fastopen_key,
> +	},


As a reminder, net-next is closed.

Thanks.
Eric Dumazet Sept. 13, 2017, 1:02 p.m. UTC | #2
On Wed, 2017-09-13 at 05:44 -0700, Eric Dumazet wrote:
> On Wed, 2017-09-13 at 19:19 +0800, Haishuang Yan wrote:
> > Different namespace application might require enable TCP Fast Open
> > feature independently of the host.
> > 
> 
> Poor changelog, no actual description / list of sysctls that are moved
> to per netns.
> 
> And looking at the patch, it seems your conversion is not complete.
> 
> So I will ask you to provide more evidence that you tested your patch
> next time you submit it.

I suggest you move one sysctl at a time, in a patch series.

It will be easier to document and test for you, and review for us.

Thanks.
Haishuang Yan Sept. 14, 2017, 6:19 a.m. UTC | #3
> On 2017年9月13日, at 下午9:02, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> On Wed, 2017-09-13 at 05:44 -0700, Eric Dumazet wrote:
>> On Wed, 2017-09-13 at 19:19 +0800, Haishuang Yan wrote:
>>> Different namespace application might require enable TCP Fast Open
>>> feature independently of the host.
>>> 
>> 
>> Poor changelog, no actual description / list of sysctls that are moved
>> to per netns.
>> 
>> And looking at the patch, it seems your conversion is not complete.
>> 
>> So I will ask you to provide more evidence that you tested your patch
>> next time you submit it.
> 
> I suggest you move one sysctl at a time, in a patch series.
> 
> It will be easier to document and test for you, and review for us.
> 
> Thanks.
> 

Okay, I will split my patch for each sysctl change. Thanks.
diff mbox series

Patch

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 305e031..ea0953b 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -128,6 +128,8 @@  struct netns_ipv4 {
 	struct inet_timewait_death_row tcp_death_row;
 	int sysctl_max_syn_backlog;
 	int sysctl_tcp_max_orphans;
+	int sysctl_tcp_fastopen;
+	unsigned int sysctl_tcp_fastopen_blackhole_timeout;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	int sysctl_udp_l3mdev_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ac2d998..e4cc0dd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -240,7 +240,6 @@ 
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_fastopen;
 extern int sysctl_tcp_retrans_collapse;
 extern int sysctl_tcp_stdurg;
 extern int sysctl_tcp_rfc1337;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e31108e..309b849 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@  int inet_listen(struct socket *sock, int backlog)
 {
 	struct sock *sk = sock->sk;
 	unsigned char old_state;
-	int err;
+	int err, tcp_fastopen;
 
 	lock_sock(sk);
 
@@ -217,8 +217,9 @@  int inet_listen(struct socket *sock, int backlog)
 		 * because the socket was in TCP_LISTEN state previously but
 		 * was shutdown() rather than close().
 		 */
-		if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
-		    (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+		tcp_fastopen =  sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+		if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+		    (tcp_fastopen & TFO_SERVER_ENABLE) &&
 		    !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
 			fastopen_queue_tune(sk, backlog);
 			tcp_fastopen_init_key_once(true);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4f26c8d3..30ebeb9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -394,27 +394,6 @@  static int proc_tcp_available_ulp(struct ctl_table *ctl,
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.procname	= "tcp_fastopen",
-		.data		= &sysctl_tcp_fastopen,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "tcp_fastopen_key",
-		.mode		= 0600,
-		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
-		.proc_handler	= proc_tcp_fastopen_key,
-	},
-	{
-		.procname	= "tcp_fastopen_blackhole_timeout_sec",
-		.data		= &sysctl_tcp_fastopen_blackhole_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_tfo_blackhole_detect_timeout,
-		.extra1		= &zero,
-	},
-	{
 		.procname	= "tcp_abort_on_overflow",
 		.data		= &sysctl_tcp_abort_on_overflow,
 		.maxlen		= sizeof(int),
@@ -1085,6 +1064,27 @@  static int proc_tcp_available_ulp(struct ctl_table *ctl,
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "tcp_fastopen",
+		.data		= &init_net.ipv4.sysctl_tcp_fastopen,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tcp_fastopen_key",
+		.mode		= 0600,
+		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+		.proc_handler	= proc_tcp_fastopen_key,
+	},
+	{
+		.procname	= "tcp_fastopen_blackhole_timeout_sec",
+		.data		= &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_tfo_blackhole_detect_timeout,
+		.extra1		= &zero,
+	},
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	{
 		.procname	= "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39187ac..b3a2ffc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1126,7 +1126,7 @@  static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 	struct sockaddr *uaddr = msg->msg_name;
 	int err, flags;
 
-	if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+	if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
 	    (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
 	     uaddr->sa_family == AF_UNSPEC))
 		return -EOPNOTSUPP;
@@ -2759,7 +2759,7 @@  static int do_tcp_setsockopt(struct sock *sk, int level,
 	case TCP_FASTOPEN_CONNECT:
 		if (val > 1 || val < 0) {
 			err = -EINVAL;
-		} else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+		} else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
 			if (sk->sk_state == TCP_CLOSE)
 				tp->fastopen_connect = val;
 			else
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index e3c3322..1bf57e8 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -9,8 +9,6 @@ 
 #include <net/inetpeer.h>
 #include <net/tcp.h>
 
-int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE;
-
 struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
 
 static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
@@ -282,18 +280,19 @@  struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
 	struct sock *child;
+	int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
 
 	if (foc->len == 0) /* Client requests a cookie */
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
 
-	if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+	if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
 	      (syn_data || foc->len >= 0) &&
 	      tcp_fastopen_queue_check(sk))) {
 		foc->len = -1;
 		return NULL;
 	}
 
-	if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+	if (syn_data && (tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
 		goto fastopen;
 
 	if (foc->len >= 0 &&  /* Client presents or requests a cookie */
@@ -347,7 +346,7 @@  bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 		return false;
 	}
 
-	if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
+	if (sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
 		cookie->len = -1;
 		return true;
 	}
@@ -402,7 +401,6 @@  bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
  */
 
 /* Default to 1hr */
-unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
 static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
 static unsigned long tfo_active_disable_stamp __read_mostly;
 
@@ -431,13 +429,14 @@  bool tcp_fastopen_active_should_disable(struct sock *sk)
 	int tfo_da_times = atomic_read(&tfo_active_disable_times);
 	int multiplier;
 	unsigned long timeout;
+	unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
 
 	if (!tfo_da_times)
 		return false;
 
 	/* Limit timout to max: 2^6 * initial timeout */
 	multiplier = 1 << min(tfo_da_times - 1, 6);
-	timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ;
+	timeout = multiplier * tfo_bh_timeout * HZ;
 	if (time_before(jiffies, tfo_active_disable_stamp + timeout))
 		return true;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4b17a91..38d30ea 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2473,6 +2473,8 @@  static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_tcp_window_scaling = 1;
 	net->ipv4.sysctl_tcp_timestamps = 1;
 
+	net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
+	net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
 	return 0;
 fail:
 	tcp_sk_exit(net);