diff mbox

[RFC,1/1] IPVS netns shutdown/startup dead-lock

Message ID 1307957530-12732-1-git-send-email-hans.schillstrom@ericsson.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Hans Schillstrom June 13, 2011, 9:32 a.m. UTC
ip_vs_mutext is used by both netns shutdown code and startup
and both implicit uses sk_lock-AF_INET mutex.

cleanup CPU-1         startup CPU-2
ip_vs_dst_event()     ip_vs_genl_set_cmd()
 sk_lock-AF_INET     __ip_vs_mutex
                     sk_lock-AF_INET
__ip_vs_mutex
* DEAD LOCK *

This can be solved by have the ip_vs_mutex per netns
or avid locking when starting/stoping sync-threads.
i.e. just add a starting/stoping flag.

ip_vs_mutex per name-space seems to be a more future proof solution.

Which one should be used ?

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
---
 include/net/ip_vs.h             |    2 ++
 net/netfilter/ipvs/ip_vs_ctl.c  |   15 ++++++++++-----
 net/netfilter/ipvs/ip_vs_sync.c |   30 +++++++++++++++++++++++++-----
 3 files changed, 37 insertions(+), 10 deletions(-)

Comments

Simon Horman June 13, 2011, 11:11 a.m. UTC | #1
On Mon, Jun 13, 2011 at 11:32:10AM +0200, Hans Schillstrom wrote:
> ip_vs_mutext is used by both netns shutdown code and startup
> and both implicit uses sk_lock-AF_INET mutex.
> 
> cleanup CPU-1         startup CPU-2
> ip_vs_dst_event()     ip_vs_genl_set_cmd()
>  sk_lock-AF_INET     __ip_vs_mutex
>                      sk_lock-AF_INET
> __ip_vs_mutex
> * DEAD LOCK *
> 
> This can be solved by have the ip_vs_mutex per netns
> or avid locking when starting/stoping sync-threads.
> i.e. just add a starting/stoping flag.
> 
> ip_vs_mutex per name-space seems to be a more future proof solution.
> 
> Which one should be used ?

I don't feel strongly either way.

> Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
> ---
>  include/net/ip_vs.h             |    2 ++
>  net/netfilter/ipvs/ip_vs_ctl.c  |   15 ++++++++++-----
>  net/netfilter/ipvs/ip_vs_sync.c |   30 +++++++++++++++++++++++++-----
>  3 files changed, 37 insertions(+), 10 deletions(-)
> 

[snip]

> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index 699c79a..21c541f 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c

[snip]

> @@ -3305,12 +3309,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
>  			ret = -EINVAL;
>  			goto out;
>  		}
> -
> +		/* Unlock since a global socket lock will be taken later */
> +		mutex_unlock(&__ip_vs_mutex);
>  		if (cmd == IPVS_CMD_NEW_DAEMON)
>  			ret = ip_vs_genl_new_daemon(net, daemon_attrs);
>  		else
>  			ret = ip_vs_genl_del_daemon(net, daemon_attrs);
> -		goto out;
> +		goto out_nounlock;

I'm not a huge fan of labels that only return.
So I think it would be slightly easier on the eyes
to just return here, not add out_nounlock,
and possibly rename out as out_unlock.

>  	} else if (cmd == IPVS_CMD_ZERO &&
>  		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
>  		ret = ip_vs_zero_all(net);

[snip]
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov June 13, 2011, 9:25 p.m. UTC | #2
Hello,

On Mon, 13 Jun 2011, Hans Schillstrom wrote:

> ip_vs_mutext is used by both netns shutdown code and startup
> and both implicit uses sk_lock-AF_INET mutex.
> 
> cleanup CPU-1         startup CPU-2
> ip_vs_dst_event()     ip_vs_genl_set_cmd()
>  sk_lock-AF_INET     __ip_vs_mutex
>                      sk_lock-AF_INET
> __ip_vs_mutex
> * DEAD LOCK *

	So, sk_lock-AF_INET is locked before calling
ip_vs_dst_event ? Do you have a backtrace for this case?
I assume your patch is tested to fix the problem ?

> This can be solved by have the ip_vs_mutex per netns
> or avid locking when starting/stoping sync-threads.
> i.e. just add a starting/stoping flag.

	sk_release_kernel is called in thread
context, so ip_vs_mutex is not involved there. We
have a problem only with start_sync_thread, right?

> ip_vs_mutex per name-space seems to be a more future proof solution.

	Global mutex protects some global lists such as
virtual services. If your patch works, better way to fix this problem
is to use some new mutex. May be we can move the IPVS_CMD_NEW_DAEMON,
IPVS_CMD_DEL_DAEMON and IP_VS_SO_GET_DAEMON code before the
__ip_vs_mutex locking. This mutex should be used for start_sync_thread,
stop_sync_thread, ip_vs_genl_dump_daemons and IP_VS_SO_GET_DAEMON.
For example, ip_vs_sync_mutex.

	Note that __ip_vs_sync_cleanup is missing a
__ip_vs_mutex lock. We have to use the new mutex there.

> Which one should be used ?

	For now __ip_vs_mutex should be global ...

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 34a6fa8..e82fa8d 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -895,6 +895,8 @@  struct netns_ipvs {
 	struct sockaddr_in	sync_mcast_addr;
 	struct task_struct	*master_thread;
 	struct task_struct	*backup_thread;
+	atomic_t		master_flg;	/* Start-up flag*/
+	atomic_t		backup_flg;
 	int			send_mesg_maxlen;
 	int			recv_mesg_maxlen;
 	volatile int		sync_state;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 699c79a..21c541f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2318,13 +2318,17 @@  do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+		/* Unlock since a global socket lock will be taken later */
+		mutex_unlock(&__ip_vs_mutex);
 		ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
 					dm->syncid);
-		goto out_unlock;
+		goto out_dec;
 	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+		/* Unlock since a global socket lock will be taken later */
+		mutex_unlock(&__ip_vs_mutex);
 		ret = stop_sync_thread(net, dm->state);
-		goto out_unlock;
+		goto out_dec;
 	}
 
 	usvc_compat = (struct ip_vs_service_user *)arg;
@@ -3305,12 +3309,13 @@  static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 			ret = -EINVAL;
 			goto out;
 		}
-
+		/* Unlock since a global socket lock will be taken later */
+		mutex_unlock(&__ip_vs_mutex);
 		if (cmd == IPVS_CMD_NEW_DAEMON)
 			ret = ip_vs_genl_new_daemon(net, daemon_attrs);
 		else
 			ret = ip_vs_genl_del_daemon(net, daemon_attrs);
-		goto out;
+		goto out_nounlock;
 	} else if (cmd == IPVS_CMD_ZERO &&
 		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
 		ret = ip_vs_zero_all(net);
@@ -3382,7 +3387,7 @@  static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 
 out:
 	mutex_unlock(&__ip_vs_mutex);
-
+out_nounlock:
 	return ret;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index e292e5b..7a996dc 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1540,30 +1540,37 @@  int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 	char *name, *buf = NULL;
 	int (*threadfn)(void *data);
 	int result = -ENOMEM;
+	atomic_t *run_flg;
 
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
 		  sizeof(struct ip_vs_sync_conn_v0));
 
+	/* master/backup_flag is used to protect for multiple starts
+	 * the ip_vs_mutex can't be used here due to deadlock problems.*/
 	if (state == IP_VS_STATE_MASTER) {
-		if (ipvs->master_thread)
+		if (ipvs->master_thread ||
+		    !atomic_dec_and_test(&ipvs->master_flg))
 			return -EEXIST;
 
 		strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
 			sizeof(ipvs->master_mcast_ifn));
 		ipvs->master_syncid = syncid;
 		realtask = &ipvs->master_thread;
+		run_flg = &ipvs->master_flg;
 		name = "ipvs_master:%d";
 		threadfn = sync_thread_master;
 		sock = make_send_sock(net);
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (ipvs->backup_thread)
+		if (ipvs->backup_thread ||
+		    !atomic_dec_and_test(&ipvs->backup_flg))
 			return -EEXIST;
 
 		strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
 			sizeof(ipvs->backup_mcast_ifn));
 		ipvs->backup_syncid = syncid;
 		realtask = &ipvs->backup_thread;
+		run_flg = &ipvs->backup_flg;
 		name = "ipvs_backup:%d";
 		threadfn = sync_thread_backup;
 		sock = make_receive_sock(net);
@@ -1600,7 +1607,8 @@  int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 	/* mark as active */
 	*realtask = task;
 	ipvs->sync_state |= state;
-
+	/* Free to use again */
+	atomic_set(run_flg, 1);
 	/* increase the module use count */
 	ip_vs_use_count_inc();
 
@@ -1613,6 +1621,7 @@  outbuf:
 outsocket:
 	sk_release_kernel(sock->sk);
 out:
+	atomic_set(run_flg, -1);
 	return result;
 }
 
@@ -1621,11 +1630,15 @@  int stop_sync_thread(struct net *net, int state)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	int retc = -EINVAL;
+	atomic_t *run_flg;
 
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 
+	/* master/backup_flag is used to protect for multiple shutdowns
+	 * the ip_vs_mutex can't be used here due to deadlock problems.*/
 	if (state == IP_VS_STATE_MASTER) {
-		if (!ipvs->master_thread)
+		if (!ipvs->master_thread ||
+		    !atomic_dec_and_test(&ipvs->master_flg))
 			return -ESRCH;
 
 		pr_info("stopping master sync thread %d ...\n",
@@ -1642,8 +1655,11 @@  int stop_sync_thread(struct net *net, int state)
 		spin_unlock_bh(&ipvs->sync_lock);
 		retc = kthread_stop(ipvs->master_thread);
 		ipvs->master_thread = NULL;
+		/* Free to use again */
+		atomic_set(&ipvs->master_flg, 1);
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (!ipvs->backup_thread)
+		if (!ipvs->backup_thread ||
+		    !atomic_dec_and_test(&ipvs->backup_flg))
 			return -ESRCH;
 
 		pr_info("stopping backup sync thread %d ...\n",
@@ -1652,6 +1668,8 @@  int stop_sync_thread(struct net *net, int state)
 		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
 		retc = kthread_stop(ipvs->backup_thread);
 		ipvs->backup_thread = NULL;
+		/* Free to use again */
+		atomic_set(&ipvs->backup_flg, 1);
 	}
 
 	/* decrease the module use count */
@@ -1674,6 +1692,8 @@  int __net_init __ip_vs_sync_init(struct net *net)
 	ipvs->sync_mcast_addr.sin_family = AF_INET;
 	ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
 	ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
+	atomic_set(&ipvs->master_flg, 1);
+	atomic_set(&ipvs->backup_flg, 1);
 	return 0;
 }