From patchwork Tue Oct 4 06:53:36 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hans Schillstrom X-Patchwork-Id: 117580 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id BF982B6F71 for ; Tue, 4 Oct 2011 17:54:13 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754329Ab1JDGyD (ORCPT ); Tue, 4 Oct 2011 02:54:03 -0400 Received: from mailgw9.se.ericsson.net ([193.180.251.57]:46795 "EHLO mailgw9.se.ericsson.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754282Ab1JDGyB (ORCPT ); Tue, 4 Oct 2011 02:54:01 -0400 X-AuditID: c1b4fb39-b7bfdae000005125-ed-4e8aad87361d Received: from esessmw0247.eemea.ericsson.se (Unknown_Domain [153.88.253.125]) by mailgw9.se.ericsson.net (Symantec Mail Security) with SMTP id 54.C4.20773.78DAA8E4; Tue, 4 Oct 2011 08:53:59 +0200 (CEST) Received: from seassled11.rnd.as.sw.ericsson.se (153.88.115.8) by esessmw0247.eemea.ericsson.se (153.88.115.94) with Microsoft SMTP Server id 8.3.137.0; Tue, 4 Oct 2011 08:53:36 +0200 Received: by seassled11.rnd.as.sw.ericsson.se (Postfix, from userid 88893) id 75C83406397; Tue, 4 Oct 2011 08:53:36 +0200 (CEST) From: Hans Schillstrom To: horms@verge.net.au, ja@ssi.bg, wensong@linux-vs.org, lvs-devel@vger.kernel.org, netdev@vger.kernel.org, netfilter-devel@vger.kernel.org CC: hans@schillstrom.com Subject: [PATCH 2/2] IPVS netns shutdown/startup dead-lock (Take III) Date: Tue, 4 Oct 2011 08:53:36 +0200 Message-ID: <1317711216-8416-1-git-send-email-hans.schillstrom@ericsson.com> X-Mailer: git-send-email 1.6.0.2 MIME-Version: 1.0 X-Brightmail-Tracker: AAAAAA== Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Hans Schillstrom ip_vs_mutext is used by both netns shutdown code and startup and both implicit uses sk_lock-AF_INET mutex. cleanup CPU-1 startup CPU-2 ip_vs_dst_event() ip_vs_genl_set_cmd() sk_lock-AF_INET __ip_vs_mutex sk_lock-AF_INET __ip_vs_mutex * DEAD LOCK * A new mutex placed in ip_vs netns struct called sync_mutex is added. Comments from Julian and Simon added. This patch has been running for more than 3 month now and it seems to work. Ver. 3 IP_VS_SO_GET_DAEMON in do_ip_vs_get_ctl protected by sync_mutex instead of __ip_vs_mutex as sugested by Julian. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov --- include/net/ip_vs.h | 1 + net/netfilter/ipvs/ip_vs_ctl.c | 131 ++++++++++++++++++++++++--------------- net/netfilter/ipvs/ip_vs_sync.c | 6 ++ 3 files changed, 87 insertions(+), 51 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1aaf915..8fa4430 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -900,6 +900,7 @@ struct netns_ipvs { volatile int sync_state; volatile int master_syncid; volatile int backup_syncid; + struct mutex sync_mutex; /* multicast interface name */ char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a1af72f..c77b5a0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2284,6 +2284,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) struct ip_vs_service *svc; struct ip_vs_dest_user *udest_compat; struct ip_vs_dest_user_kern udest; + struct netns_ipvs *ipvs = net_ipvs(net); if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2304,6 +2305,24 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* increase the module use count */ ip_vs_use_count_inc(); + /* Handle daemons since they have another lock */ + if (cmd == IP_VS_SO_SET_STARTDAEMON || + cmd == IP_VS_SO_SET_STOPDAEMON) { + struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; + + if (mutex_lock_interruptible(&ipvs->sync_mutex)) { + ret = -ERESTARTSYS; + goto out_dec; + } + if (cmd == IP_VS_SO_SET_STARTDAEMON) + ret = start_sync_thread(net, dm->state, dm->mcast_ifn, + dm->syncid); + else + ret = stop_sync_thread(net, dm->state); + mutex_unlock(&ipvs->sync_mutex); + goto out_dec; + } + if (mutex_lock_interruptible(&__ip_vs_mutex)) { ret = -ERESTARTSYS; goto out_dec; @@ -2317,15 +2336,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Set timeout values for (tcp tcpfin udp) */ ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); goto out_unlock; - } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { - struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = start_sync_thread(net, dm->state, dm->mcast_ifn, - dm->syncid); - goto out_unlock; - } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { - struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = stop_sync_thread(net, dm->state); - goto out_unlock; } usvc_compat = (struct ip_vs_service_user *)arg; @@ -2585,6 +2595,33 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (copy_from_user(arg, user, copylen) != 0) return -EFAULT; + /* + * Handle daemons first since it has its own locking + */ + if (cmd == IP_VS_SO_GET_DAEMON) { + struct ip_vs_daemon_user d[2]; + + memset(&d, 0, sizeof(d)); + if (mutex_lock_interruptible(&ipvs->sync_mutex)) + return -ERESTARTSYS; + + if (ipvs->sync_state & IP_VS_STATE_MASTER) { + d[0].state = IP_VS_STATE_MASTER; + strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, + sizeof(d[0].mcast_ifn)); + d[0].syncid = ipvs->master_syncid; + } + if (ipvs->sync_state & IP_VS_STATE_BACKUP) { + d[1].state = IP_VS_STATE_BACKUP; + strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, + sizeof(d[1].mcast_ifn)); + d[1].syncid = ipvs->backup_syncid; + } + if (copy_to_user(user, &d, sizeof(d)) != 0) + ret = -EFAULT; + mutex_unlock(&ipvs->sync_mutex); + return ret; + } if (mutex_lock_interruptible(&__ip_vs_mutex)) return -ERESTARTSYS; @@ -2682,28 +2719,6 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } break; - case IP_VS_SO_GET_DAEMON: - { - struct ip_vs_daemon_user d[2]; - - memset(&d, 0, sizeof(d)); - if (ipvs->sync_state & IP_VS_STATE_MASTER) { - d[0].state = IP_VS_STATE_MASTER; - strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, - sizeof(d[0].mcast_ifn)); - d[0].syncid = ipvs->master_syncid; - } - if (ipvs->sync_state & IP_VS_STATE_BACKUP) { - d[1].state = IP_VS_STATE_BACKUP; - strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, - sizeof(d[1].mcast_ifn)); - d[1].syncid = ipvs->backup_syncid; - } - if (copy_to_user(user, &d, sizeof(d)) != 0) - ret = -EFAULT; - } - break; - default: ret = -EINVAL; } @@ -3206,7 +3221,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct net *net = skb_sknet(skb); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->sync_mutex); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, ipvs->master_mcast_ifn, @@ -3226,7 +3241,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, } nla_put_failure: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->sync_mutex); return skb->len; } @@ -3272,13 +3287,9 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) return ip_vs_set_timeout(net, &t); } -static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) +static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) { - struct ip_vs_service *svc = NULL; - struct ip_vs_service_user_kern usvc; - struct ip_vs_dest_user_kern udest; int ret = 0, cmd; - int need_full_svc = 0, need_full_dest = 0; struct net *net; struct netns_ipvs *ipvs; @@ -3286,19 +3297,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; - mutex_lock(&__ip_vs_mutex); - - if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(net); - goto out; - } else if (cmd == IPVS_CMD_SET_CONFIG) { - ret = ip_vs_genl_set_config(net, info->attrs); - goto out; - } else if (cmd == IPVS_CMD_NEW_DAEMON || - cmd == IPVS_CMD_DEL_DAEMON) { - + if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; + mutex_lock(&ipvs->sync_mutex); if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], @@ -3311,6 +3313,33 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ret = ip_vs_genl_new_daemon(net, daemon_attrs); else ret = ip_vs_genl_del_daemon(net, daemon_attrs); +out: + mutex_unlock(&ipvs->sync_mutex); + } + return ret; +} + +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct ip_vs_service *svc = NULL; + struct ip_vs_service_user_kern usvc; + struct ip_vs_dest_user_kern udest; + int ret = 0, cmd; + int need_full_svc = 0, need_full_dest = 0; + struct net *net; + struct netns_ipvs *ipvs; + + net = skb_sknet(skb); + ipvs = net_ipvs(net); + cmd = info->genlhdr->cmd; + + mutex_lock(&__ip_vs_mutex); + + if (cmd == IPVS_CMD_FLUSH) { + ret = ip_vs_flush(net); + goto out; + } else if (cmd == IPVS_CMD_SET_CONFIG) { + ret = ip_vs_genl_set_config(net, info->attrs); goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { @@ -3537,13 +3566,13 @@ static struct genl_ops ip_vs_genl_ops[] __read_mostly = { .cmd = IPVS_CMD_NEW_DAEMON, .flags = GENL_ADMIN_PERM, .policy = ip_vs_cmd_policy, - .doit = ip_vs_genl_set_cmd, + .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_DEL_DAEMON, .flags = GENL_ADMIN_PERM, .policy = ip_vs_cmd_policy, - .doit = ip_vs_genl_set_cmd, + .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_GET_DAEMON, diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 7ee7215..3cdd479 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -61,6 +61,7 @@ #define SYNC_PROTO_VER 1 /* Protocol version in header */ +static struct lock_class_key __ipvs_sync_key; /* * IPVS sync connection entry * Version 0, i.e. original version. @@ -1545,6 +1546,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + if (state == IP_VS_STATE_MASTER) { if (ipvs->master_thread) return -EEXIST; @@ -1667,6 +1669,7 @@ int __net_init ip_vs_sync_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); INIT_LIST_HEAD(&ipvs->sync_queue); spin_lock_init(&ipvs->sync_lock); spin_lock_init(&ipvs->sync_buff_lock); @@ -1680,7 +1683,9 @@ int __net_init ip_vs_sync_net_init(struct net *net) void ip_vs_sync_net_cleanup(struct net *net) { int retc; + struct netns_ipvs *ipvs = net_ipvs(net); + mutex_lock(&ipvs->sync_mutex); retc = stop_sync_thread(net, IP_VS_STATE_MASTER); if (retc && retc != -ESRCH) pr_err("Failed to stop Master Daemon\n"); @@ -1688,4 +1693,5 @@ void ip_vs_sync_net_cleanup(struct net *net) retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); if (retc && retc != -ESRCH) pr_err("Failed to stop Backup Daemon\n"); + mutex_unlock(&ipvs->sync_mutex); }