diff mbox

[3/3] IPVS: init and cleanup restructuring.

Message ID 1303226705-29178-3-git-send-email-hans@schillstrom.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Hans Schillstrom April 19, 2011, 3:25 p.m. UTC
This patch tries to restore the initial init and cleanup
sequences that was before name space patch.

The number of calls to register_pernet_device have been
reduced to one for the ip_vs.ko
Schedulers still have their own calls.

This patch adds a function __ip_vs_service_cleanup()
and a throttle or actually on/off switch for
the netfilter hooks.

The nf hooks will be enabled when the first service is loaded
and disabled when the last service is removed or when a
name space exit starts.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
---
 include/net/ip_vs.h              |   17 +++++++
 net/netfilter/ipvs/ip_vs_app.c   |   15 +-----
 net/netfilter/ipvs/ip_vs_conn.c  |   20 ++++----
 net/netfilter/ipvs/ip_vs_core.c  |   86 ++++++++++++++++++++++++++++++++------
 net/netfilter/ipvs/ip_vs_ctl.c   |   66 ++++++++++++++++++++++-------
 net/netfilter/ipvs/ip_vs_est.c   |   14 +-----
 net/netfilter/ipvs/ip_vs_proto.c |   11 +----
 net/netfilter/ipvs/ip_vs_sync.c  |   13 +----
 8 files changed, 161 insertions(+), 81 deletions(-)

--
1.7.2.3

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Simon Horman April 19, 2011, 11:12 p.m. UTC | #1
On Tue, Apr 19, 2011 at 05:25:05PM +0200, Hans Schillstrom wrote:
> This patch tries to restore the initial init and cleanup
> sequences that was before name space patch.
> 
> The number of calls to register_pernet_device have been
> reduced to one for the ip_vs.ko
> Schedulers still have their own calls.
> 
> This patch adds a function __ip_vs_service_cleanup()
> and a throttle or actually on/off switch for
> the netfilter hooks.
> 
> The nf hooks will be enabled when the first service is loaded
> and disabled when the last service is removed or when a
> name space exit starts.
> 
> Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
> ---
>  include/net/ip_vs.h              |   17 +++++++
>  net/netfilter/ipvs/ip_vs_app.c   |   15 +-----
>  net/netfilter/ipvs/ip_vs_conn.c  |   20 ++++----
>  net/netfilter/ipvs/ip_vs_core.c  |   86 ++++++++++++++++++++++++++++++++------
>  net/netfilter/ipvs/ip_vs_ctl.c   |   66 ++++++++++++++++++++++-------
>  net/netfilter/ipvs/ip_vs_est.c   |   14 +-----
>  net/netfilter/ipvs/ip_vs_proto.c |   11 +----
>  net/netfilter/ipvs/ip_vs_sync.c  |   13 +----
>  8 files changed, 161 insertions(+), 81 deletions(-)
> 
> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index d516f00..558e490 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -791,6 +791,7 @@ struct ip_vs_app {
>  /* IPVS in network namespace */
>  struct netns_ipvs {
>  	int			gen;		/* Generation */
> +	int			throttle;	/* Instead of nf unreg */

perhaps enable or active would be names that fits better with the
schemantics used.  Using a bool might also make things more obvious.

>  	/*
>  	 *	Hash table: for real service lookups
>  	 */
> @@ -1089,6 +1090,22 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
>  	atomic_inc(&ctl_cp->n_control);
>  }
> 
> +/*
> + * IPVS netns init & cleanup functions
> + */
> +extern int __ip_vs_estimator_init(struct net *net);
> +extern int __ip_vs_control_init(struct net *net);
> +extern int __ip_vs_protocol_init(struct net *net);
> +extern int __ip_vs_app_init(struct net *net);
> +extern int __ip_vs_conn_init(struct net *net);
> +extern int __ip_vs_sync_init(struct net *net);
> +extern void __ip_vs_conn_cleanup(struct net *net);
> +extern void __ip_vs_app_cleanup(struct net *net);
> +extern void __ip_vs_protocol_cleanup(struct net *net);
> +extern void __ip_vs_control_cleanup(struct net *net);
> +extern void __ip_vs_estimator_cleanup(struct net *net);
> +extern void __ip_vs_sync_cleanup(struct net *net);
> +extern void __ip_vs_service_cleanup(struct net *net);
> 
>  /*
>   *      IPVS application functions
> diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
> index 7e8e769..51f3af7 100644
> --- a/net/netfilter/ipvs/ip_vs_app.c
> +++ b/net/netfilter/ipvs/ip_vs_app.c
> @@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = {
>  };
>  #endif
> 
> -static int __net_init __ip_vs_app_init(struct net *net)
> +int __net_init __ip_vs_app_init(struct net *net)
>  {
>  	struct netns_ipvs *ipvs = net_ipvs(net);
> 
> @@ -585,26 +585,17 @@ static int __net_init __ip_vs_app_init(struct net *net)
>  	return 0;
>  }
> 
> -static void __net_exit __ip_vs_app_cleanup(struct net *net)
> +void __net_exit __ip_vs_app_cleanup(struct net *net)
>  {
>  	proc_net_remove(net, "ip_vs_app");
>  }
> 
> -static struct pernet_operations ip_vs_app_ops = {
> -	.init = __ip_vs_app_init,
> -	.exit = __ip_vs_app_cleanup,
> -};
> -
>  int __init ip_vs_app_init(void)
>  {
> -	int rv;
> -
> -	rv = register_pernet_device(&ip_vs_app_ops);
> -	return rv;
> +	return 0;
>  }
> 
> 
>  void ip_vs_app_cleanup(void)
>  {
> -	unregister_pernet_device(&ip_vs_app_ops);
>  }

Can we just remove ip_vs_app_init() and ip_vs_app_cleanup() as
they no longer do anything? Likewise with other init and cleanup
functions below.

> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> index 36cd5ea..f8d6702 100644
> --- a/net/netfilter/ipvs/ip_vs_conn.c
> +++ b/net/netfilter/ipvs/ip_vs_conn.c
> @@ -1251,30 +1251,30 @@ int __net_init __ip_vs_conn_init(struct net *net)
>  {
>  	struct netns_ipvs *ipvs = net_ipvs(net);
> 
> +	EnterFunction(2);
>  	atomic_set(&ipvs->conn_count, 0);
> 
>  	proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
>  	proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
> +	LeaveFunction(2);
>  	return 0;
>  }

Does adding these EnterFunction() and LeaveFunction() calls
restore some previous behaviour? If not, I think they should at the very
least be in a separate patch. Likewise for similar changes below.

> 
> -static void __net_exit __ip_vs_conn_cleanup(struct net *net)
> +void __net_exit __ip_vs_conn_cleanup(struct net *net)
>  {
> +	EnterFunction(2);
>  	/* flush all the connection entries first */
>  	ip_vs_conn_flush(net);
>  	proc_net_remove(net, "ip_vs_conn");
>  	proc_net_remove(net, "ip_vs_conn_sync");
> +	LeaveFunction(2);
>  }
> -static struct pernet_operations ipvs_conn_ops = {
> -	.init = __ip_vs_conn_init,
> -	.exit = __ip_vs_conn_cleanup,
> -};
> 
>  int __init ip_vs_conn_init(void)
>  {
>  	int idx;
> -	int retc;
> 
> +	EnterFunction(2);
>  	/* Compute size and mask */
>  	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
>  	ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
> @@ -1309,18 +1309,18 @@ int __init ip_vs_conn_init(void)
>  		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
>  	}
> 
> -	retc = register_pernet_device(&ipvs_conn_ops);
> -
>  	/* calculate the random value for connection hash */
>  	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
> +	LeaveFunction(2);
> 
> -	return retc;
> +	return 0;
>  }
> 
>  void ip_vs_conn_cleanup(void)
>  {
> -	unregister_pernet_device(&ipvs_conn_ops);
> +	EnterFunction(2);
>  	/* Release the empty cache */
>  	kmem_cache_destroy(ip_vs_conn_cachep);
>  	vfree(ip_vs_conn_tab);
> +	LeaveFunction(2);
>  }
> diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
> index a7bb81d..dc27fdf 100644
> --- a/net/netfilter/ipvs/ip_vs_core.c
> +++ b/net/netfilter/ipvs/ip_vs_core.c
> @@ -1343,6 +1343,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
>  		return NF_ACCEPT; /* The packet looks wrong, ignore */
> 
>  	net = skb_net(skb);
> +	/* Name space in use ? */
> +	if (net->ipvs->throttle)
> +		return NF_ACCEPT;
> +
>  	pd = ip_vs_proto_data_get(net, cih->protocol);
>  	if (!pd)
>  		return NF_ACCEPT;
> @@ -1563,6 +1567,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
>  		}
> 
>  	net = skb_net(skb);
> +	if (net->ipvs->throttle)
> +		return NF_ACCEPT;
>  	/* Protocol supported? */
>  	pd = ip_vs_proto_data_get(net, iph.protocol);
>  	if (unlikely(!pd))
> @@ -1588,7 +1594,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
>  	}
> 
>  	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
> -	net = skb_net(skb);
>  	ipvs = net_ipvs(net);
>  	/* Check the server status */
>  	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
> @@ -1879,24 +1884,73 @@ static int __net_init __ip_vs_init(struct net *net)
>  {
>  	struct netns_ipvs *ipvs;
> 
> +	EnterFunction(2);
>  	ipvs = net_generic(net, ip_vs_net_id);
>  	if (ipvs == NULL) {
>  		pr_err("%s(): no memory.\n", __func__);
>  		return -ENOMEM;
>  	}
> +	/* Hold the beast until a service is registerd */
> +	ipvs->throttle = -1;
>  	ipvs->net = net;
>  	/* Counters used for creating unique names */
>  	ipvs->gen = atomic_read(&ipvs_netns_cnt);
>  	atomic_inc(&ipvs_netns_cnt);
>  	net->ipvs = ipvs;
> +
> +	if ( __ip_vs_estimator_init(net) < 0)
> +		goto estimator_fail;
> +
> +	if (__ip_vs_control_init(net) < 0)
> +		goto control_fail;
> +
> +	if (__ip_vs_protocol_init(net) < 0)
> +		goto protocol_fail;
> +
> +	if (__ip_vs_app_init(net) < 0)
> +		goto app_fail;
> +
> +	if (__ip_vs_conn_init(net) < 0)
> +		goto conn_fail;
> +
> +	if (__ip_vs_sync_init(net) < 0)
> +		goto sync_fail;
> +
> +	LeaveFunction(2);
>  	printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
>  			 sizeof(struct netns_ipvs), ipvs->gen);
>  	return 0;
> +/*
> + * Error handling
> + */
> +
> +sync_fail:
> +	__ip_vs_conn_cleanup(net);
> +conn_fail:
> +	__ip_vs_app_cleanup(net);
> +app_fail:
> +	__ip_vs_protocol_cleanup(net);
> +protocol_fail:
> +	__ip_vs_control_cleanup(net);
> +control_fail:
> +	__ip_vs_estimator_cleanup(net);
> +estimator_fail:
> +	return -ENOMEM;
>  }
> 
>  static void __net_exit __ip_vs_cleanup(struct net *net)
>  {
> -	IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
> +	net->ipvs->throttle = -1;
> +	EnterFunction(2);
> +	__ip_vs_sync_cleanup(net);
> +	__ip_vs_service_cleanup(net);	/* ip_vs_flush() with locks */
> +	__ip_vs_conn_cleanup(net);
> +	__ip_vs_app_cleanup(net);
> +	__ip_vs_protocol_cleanup(net);
> +	__ip_vs_control_cleanup(net);
> +	__ip_vs_estimator_cleanup(net);
> +	LeaveFunction(2);
> +	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
>  }
> 
>  static struct pernet_operations ipvs_core_ops = {
> @@ -1913,10 +1967,7 @@ static int __init ip_vs_init(void)
>  {
>  	int ret;
> 
> -	ret = register_pernet_device(&ipvs_core_ops);	/* Alloc ip_vs struct */
> -	if (ret < 0)
> -		return ret;
> -
> +	EnterFunction(2);
>  	ip_vs_estimator_init();
>  	ret = ip_vs_control_init();
>  	if (ret < 0) {
> @@ -1944,41 +1995,50 @@ static int __init ip_vs_init(void)
>  		goto cleanup_conn;
>  	}
> 
> +	ret = register_pernet_device(&ipvs_core_ops);	/* Alloc ip_vs struct */
> +	if (ret < 0)
> +		goto cleanup_sync;
> +
>  	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
>  	if (ret < 0) {
>  		pr_err("can't register hooks.\n");
> -		goto cleanup_sync;
> +		goto cleanup_net;
>  	}
> 
>  	pr_info("ipvs loaded.\n");
> +	LeaveFunction(2);
> +
>  	return ret;
> 
> +cleanup_net:
> +	unregister_pernet_device(&ipvs_core_ops);       /* free ip_vs struct */
>  cleanup_sync:
>  	ip_vs_sync_cleanup();
> -  cleanup_conn:
> +cleanup_conn:
>  	ip_vs_conn_cleanup();
> -  cleanup_app:
> +cleanup_app:
>  	ip_vs_app_cleanup();
> -  cleanup_protocol:
> +cleanup_protocol:
>  	ip_vs_protocol_cleanup();
>  	ip_vs_control_cleanup();
> -  cleanup_estimator:
> +cleanup_estimator:
>  	ip_vs_estimator_cleanup();
> -	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
>  	return ret;

While I do prefer labels to be in column 0, putting those changes
here is rather a lot of noise. Could you put them in a separate patch?

>  }
> 
>  static void __exit ip_vs_cleanup(void)
>  {
> +	EnterFunction(2);
>  	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
> +	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
>  	ip_vs_sync_cleanup();
>  	ip_vs_conn_cleanup();
>  	ip_vs_app_cleanup();
>  	ip_vs_protocol_cleanup();
>  	ip_vs_control_cleanup();
>  	ip_vs_estimator_cleanup();
> -	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
>  	pr_info("ipvs unloaded.\n");
> +	LeaveFunction(2);
>  }
> 
>  module_init(ip_vs_init);
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index 08715d8..6534ca3 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -69,6 +69,11 @@ int ip_vs_get_debug_level(void)
>  }
>  #endif
> 
> +
> +/*  Protos */
> +static void __ip_vs_del_service(struct ip_vs_service *svc);
> +
> +
>  #ifdef CONFIG_IP_VS_IPV6
>  /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
>  static int __ip_vs_addr_is_local_v6(struct net *net,
> @@ -345,6 +350,9 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
> 
>  	svc->flags &= ~IP_VS_SVC_F_HASHED;
>  	atomic_dec(&svc->refcnt);
> +	/* No more services then no need for input */
> +	if (atomic_read(&svc->refcnt) == 0)
> +		svc->net->ipvs->throttle = -1;
>  	return 1;
>  }
> 
> @@ -480,7 +488,6 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
>  	}
>  }
> 
> -

I don't think this hunk is appropriate for this patch.

>  /*
>   *	Returns hash value for real service
>   */
> @@ -1214,6 +1221,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
>  	write_unlock_bh(&__ip_vs_svc_lock);
> 
>  	*svc_p = svc;
> +	/* Now whe have a service - full throttle */
> +	ipvs->throttle = 0;
>  	return 0;
> 
> 
> @@ -1472,6 +1481,41 @@ static int ip_vs_flush(struct net *net)
>  	return 0;
>  }
> 
> +/*
> + *	Delete service by {netns} in the service table.
> + *	Called by __ip_vs_cleanup()
> + */
> +void __ip_vs_service_cleanup(struct net *net)
> +{
> +	unsigned hash;
> +	struct ip_vs_service *svc, *tmp;
> +
> +	EnterFunction(2);
> +	/* Check for "full" addressed entries */
> +	for (hash = 0; hash<IP_VS_SVC_TAB_SIZE; hash++) {

A space is needed on either side of '<'

> +		write_lock_bh(&__ip_vs_svc_lock);
> +		list_for_each_entry_safe(svc, tmp, &ip_vs_svc_table[hash],
> +					 s_list) {
> +			if (net_eq(svc->net, net)) {
> +				ip_vs_svc_unhash(svc);
> +				/*  Wait until all the svc users go away. */
> +				IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
> +				__ip_vs_del_service(svc);
> +			}
> +		}
> +		list_for_each_entry_safe(svc, tmp, &ip_vs_svc_fwm_table[hash],
> +					 f_list) {
> +			if (net_eq(svc->net, net)) {
> +				ip_vs_svc_unhash(svc);
> +				/*  Wait until all the svc users go away. */
> +				IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
> +				__ip_vs_del_service(svc);
> +			}
> +		}
> +		write_unlock_bh(&__ip_vs_svc_lock);
> +	}
> +	LeaveFunction(2);
> +}
> 
>  /*
>   *	Zero counters in a service or all services
> @@ -3593,6 +3637,7 @@ int __net_init __ip_vs_control_init(struct net *net)
>  	int idx;
>  	struct netns_ipvs *ipvs = net_ipvs(net);
> 
> +	EnterFunction(2);
>  	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
> 
>  	/* Initialize rs_table */
> @@ -3619,6 +3664,7 @@ int __net_init __ip_vs_control_init(struct net *net)
>  	if (__ip_vs_control_init_sysctl(net))
>  		goto err;
> 
> +	LeaveFunction(2);
>  	return 0;
> 
>  err:
> @@ -3626,10 +3672,11 @@ err:
>  	return -ENOMEM;
>  }
> 
> -static void __net_exit __ip_vs_control_cleanup(struct net *net)
> +void __net_exit __ip_vs_control_cleanup(struct net *net)
>  {
>  	struct netns_ipvs *ipvs = net_ipvs(net);
> 
> +	EnterFunction(2);
>  	ip_vs_trash_cleanup(net);
>  	ip_vs_stop_estimator(net, &ipvs->tot_stats);
>  	__ip_vs_control_cleanup_sysctl(net);
> @@ -3637,13 +3684,9 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
>  	proc_net_remove(net, "ip_vs_stats");
>  	proc_net_remove(net, "ip_vs");
>  	free_percpu(ipvs->tot_stats.cpustats);
> +	LeaveFunction(2);
>  }
> 
> -static struct pernet_operations ipvs_control_ops = {
> -	.init = __ip_vs_control_init,
> -	.exit = __ip_vs_control_cleanup,
> -};
> -
>  int __init ip_vs_control_init(void)
>  {
>  	int idx;
> @@ -3657,12 +3700,6 @@ int __init ip_vs_control_init(void)
>  		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
>  	}
> 
> -	ret = register_pernet_device(&ipvs_control_ops);
> -	if (ret) {
> -		pr_err("cannot register namespace.\n");
> -		goto err;
> -	}
> -
>  	smp_wmb();	/* Do we really need it now ? */
> 
>  	ret = nf_register_sockopt(&ip_vs_sockopts);
> @@ -3682,8 +3719,6 @@ int __init ip_vs_control_init(void)
>  	return 0;
> 
>  err_net:
> -	unregister_pernet_device(&ipvs_control_ops);
> -err:
>  	return ret;
>  }
> 
> @@ -3691,7 +3726,6 @@ err:
>  void ip_vs_control_cleanup(void)
>  {
>  	EnterFunction(2);
> -	unregister_pernet_device(&ipvs_control_ops);
>  	ip_vs_genl_unregister();
>  	nf_unregister_sockopt(&ip_vs_sockopts);
>  	LeaveFunction(2);
> diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
> index 759163e..508cce9 100644
> --- a/net/netfilter/ipvs/ip_vs_est.c
> +++ b/net/netfilter/ipvs/ip_vs_est.c
> @@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
>  	dst->outbps = (e->outbps + 0xF) >> 5;
>  }
> 
> -static int __net_init __ip_vs_estimator_init(struct net *net)
> +int __net_init __ip_vs_estimator_init(struct net *net)
>  {
>  	struct netns_ipvs *ipvs = net_ipvs(net);
> 
> @@ -203,24 +203,16 @@ static int __net_init __ip_vs_estimator_init(struct net *net)
>  	return 0;
>  }
> 
> -static void __net_exit __ip_vs_estimator_exit(struct net *net)
> +void __net_exit __ip_vs_estimator_cleanup(struct net *net)
>  {
>  	del_timer_sync(&net_ipvs(net)->est_timer);
>  }
> -static struct pernet_operations ip_vs_app_ops = {
> -	.init = __ip_vs_estimator_init,
> -	.exit = __ip_vs_estimator_exit,
> -};
> 
>  int __init ip_vs_estimator_init(void)
>  {
> -	int rv;
> -
> -	rv = register_pernet_device(&ip_vs_app_ops);
> -	return rv;
> +	return 0;
>  }
> 
>  void ip_vs_estimator_cleanup(void)
>  {
> -	unregister_pernet_device(&ip_vs_app_ops);
>  }
> diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
> index f7021fc..eb86028 100644
> --- a/net/netfilter/ipvs/ip_vs_proto.c
> +++ b/net/netfilter/ipvs/ip_vs_proto.c
> @@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
>  /*
>   * per network name-space init
>   */
> -static int __net_init __ip_vs_protocol_init(struct net *net)
> +int __net_init __ip_vs_protocol_init(struct net *net)
>  {
>  #ifdef CONFIG_IP_VS_PROTO_TCP
>  	register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
> @@ -336,7 +336,7 @@ static int __net_init __ip_vs_protocol_init(struct net *net)
>  	return 0;
>  }
> 
> -static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
> +void __net_exit __ip_vs_protocol_cleanup(struct net *net)
>  {
>  	struct netns_ipvs *ipvs = net_ipvs(net);
>  	struct ip_vs_proto_data *pd;
> @@ -349,11 +349,6 @@ static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
>  	}
>  }
> 
> -static struct pernet_operations ipvs_proto_ops = {
> -	.init = __ip_vs_protocol_init,
> -	.exit = __ip_vs_protocol_cleanup,
> -};
> -
>  int __init ip_vs_protocol_init(void)
>  {
>  	char protocols[64];
> @@ -382,7 +377,6 @@ int __init ip_vs_protocol_init(void)
>  	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
>  #endif
>  	pr_info("Registered protocols (%s)\n", &protocols[2]);
> -	return register_pernet_device(&ipvs_proto_ops);
> 
>  	return 0;
>  }
> @@ -393,7 +387,6 @@ void ip_vs_protocol_cleanup(void)
>  	struct ip_vs_protocol *pp;
>  	int i;
> 
> -	unregister_pernet_device(&ipvs_proto_ops);
>  	/* unregister all the ipvs protocols */
>  	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
>  		while ((pp = ip_vs_proto_table[i]) != NULL)
> diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
> index 1aeca1d..e911f03 100644
> --- a/net/netfilter/ipvs/ip_vs_sync.c
> +++ b/net/netfilter/ipvs/ip_vs_sync.c
> @@ -1664,7 +1664,7 @@ int stop_sync_thread(struct net *net, int state)
>  /*
>   * Initialize data struct for each netns
>   */
> -static int __net_init __ip_vs_sync_init(struct net *net)
> +int __net_init __ip_vs_sync_init(struct net *net)
>  {
>  	struct netns_ipvs *ipvs = net_ipvs(net);
> 
> @@ -1678,24 +1678,17 @@ static int __net_init __ip_vs_sync_init(struct net *net)
>  	return 0;
>  }
> 
> -static void __ip_vs_sync_cleanup(struct net *net)
> +void __ip_vs_sync_cleanup(struct net *net)
>  {
>  	stop_sync_thread(net, IP_VS_STATE_MASTER);
>  	stop_sync_thread(net, IP_VS_STATE_BACKUP);
>  }
> 
> -static struct pernet_operations ipvs_sync_ops = {
> -	.init = __ip_vs_sync_init,
> -	.exit = __ip_vs_sync_cleanup,
> -};
> -
> -
>  int __init ip_vs_sync_init(void)
>  {
> -	return register_pernet_device(&ipvs_sync_ops);
> +	return 0;
>  }
> 
>  void ip_vs_sync_cleanup(void)
>  {
> -	unregister_pernet_device(&ipvs_sync_ops);
>  }
> --
> 1.7.2.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov April 19, 2011, 11:19 p.m. UTC | #2
Hello,

On Tue, 19 Apr 2011, Hans Schillstrom wrote:

> This patch tries to restore the initial init and cleanup
> sequences that was before name space patch.
> 
> The number of calls to register_pernet_device have been
> reduced to one for the ip_vs.ko
> Schedulers still have their own calls.
> 
> This patch adds a function __ip_vs_service_cleanup()
> and a throttle or actually on/off switch for
> the netfilter hooks.
> 
> The nf hooks will be enabled when the first service is loaded
> and disabled when the last service is removed or when a
> name space exit starts.

	For me using _net suffix is more clear compared
to __ prefix for the pernet methods.

	For ip_vs_in: may be we can move the check here:

+	net = skb_net(skb);
+	if (net->ipvs->throttle)
+		return NF_ACCEPT;
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
 	/* Bad... Do not break raw sockets */

	It will save such checks later in ICMP funcs. But this
throttle idea looks dangerous for cleanup. It does not use RCU.
The readers can cache the 0 in throttle for long time.
May be by using register_pernet_device we are in list with other
devices and it is still possible some device used by
our dst_cache to be unregistered before IPVS or we to be
unregistered before such device and some race with throttle
to happen. throttle is good when enabling traffic with
the first virtual service, later it can slowly stop the traffic
but we can not rely on it during netns cleanup.

	So, there are 2 problems with the devices:

- if we use _device pernet registration we can see packets
in our netns during cleanup. I assume this is possible
when IPVS is unregistered before such devices.

- dests can cache dst and to hold the device after it is
unregistered in netns, obviously for very short time until
IPVS is later unregistered from netns. And for long time
if device is unregistered but netns remains.

	Also, in most of the cases svc->refcnt is above 0 because
dests can be in trash list. You should be lucky to delete the
service without any connections, only then ip_vs_svc_unhash can
see refcnt == 0.

	So, may be we have to use register_pernet_subsys (not
_device). We need just to register notifier with
register_netdevice_notifier and to catch NETDEV_UNREGISTER,
so that if any dest uses this device we have to release the dst:

	- lock mutex
	- for every dest (also in trash):
	spin_lock_bh(&dest->dst_lock);
	if (dest->dst_cache && dest->dst_cache->dev == unregistered_dev)
		ip_vs_dst_reset(dest);
	spin_unlock_bh(&dest->dst_lock);

	There are many examples for this, eg. net/core/fib_rules.c
Then we are sure on cleanup we can not see traffic for our net
because all devices are unregistered before us. We don't have
to rely on throttle to stop the traffic during cleanup. And
we do not hold devices after NETDEV_UNREGISTER.

	I can prepare such patch but in next days. We need such
code anyways because the dests can hold such dsts when no
traffic is present and we can see again this "waiting for %s" ...
message.

	throttle still can be used but now it can not stop
the traffic if connections exist.

	For __ip_vs_service_cleanup: it still has to use mutex.
Or we can avoid it by introducing ip_vs_unlink_service_nolock:
ip_vs_flush will look like your __ip_vs_service_cleanup and
will call ip_vs_unlink_service_nolock. ip_vs_unlink_service_nolock
will be called by ip_vs_flush and by ip_vs_unlink_service.
You can try such changes, if not, I'll prepare some patches
after 2-3 days.

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans Schillstrom April 20, 2011, 9:56 a.m. UTC | #3
Thanks everyone
I will send a new patch series today based on your comments

Regards
Hans
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans Schillstrom April 20, 2011, 10:41 a.m. UTC | #4
On Wednesday, April 20, 2011 01:19:38 Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Tue, 19 Apr 2011, Hans Schillstrom wrote:
> 
> > This patch tries to restore the initial init and cleanup
> > sequences that was before name space patch.
> > 
> > The number of calls to register_pernet_device have been
> > reduced to one for the ip_vs.ko
> > Schedulers still have their own calls.
> > 
> > This patch adds a function __ip_vs_service_cleanup()
> > and a throttle or actually on/off switch for
> > the netfilter hooks.
> > 
> > The nf hooks will be enabled when the first service is loaded
> > and disabled when the last service is removed or when a
> > name space exit starts.
> 
> 	For me using _net suffix is more clear compared
> to __ prefix for the pernet methods.
> 
Sure I'll do that.

> 	For ip_vs_in: may be we can move the check here:
> 
> +	net = skb_net(skb);
> +	if (net->ipvs->throttle)
> +		return NF_ACCEPT;
>  	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
>  
>  	/* Bad... Do not break raw sockets */

Done

> 
> 	It will save such checks later in ICMP funcs. But this
> throttle idea looks dangerous for cleanup. It does not use RCU.
> The readers can cache the 0 in throttle for long time.
> May be by using register_pernet_device we are in list with other
> devices and it is still possible some device used by
> our dst_cache to be unregistered before IPVS or we to be
> unregistered before such device and some race with throttle
> to happen. throttle is good when enabling traffic with
> the first virtual service, later it can slowly stop the traffic
> but we can not rely on it during netns cleanup.

OK , I will revert back to register_pernet_subsys(),
and use one single register_pernet_device()  exit hook for :
 - the throttle to disable traffic
 - stop the kernel threads.

> 
> 	So, there are 2 problems with the devices:
> 
> - if we use _device pernet registration we can see packets
> in our netns during cleanup. I assume this is possible
> when IPVS is unregistered before such devices.
> 
> - dests can cache dst and to hold the device after it is
> unregistered in netns, obviously for very short time until
> IPVS is later unregistered from netns. And for long time
> if device is unregistered but netns remains.
> 
> 	Also, in most of the cases svc->refcnt is above 0 because
> dests can be in trash list. You should be lucky to delete the
> service without any connections, only then ip_vs_svc_unhash can
> see refcnt == 0.
> 
> 	So, may be we have to use register_pernet_subsys (not
> _device). We need just to register notifier with
> register_netdevice_notifier and to catch NETDEV_UNREGISTER,
> so that if any dest uses this device we have to release the dst:
> 
I made a quick test of the concept and it seems to work, 
(A mix of new and old connections at 1GB/s into 4 namespaces when killing them all)

> 	- lock mutex
> 	- for every dest (also in trash):
> 	spin_lock_bh(&dest->dst_lock);
> 	if (dest->dst_cache && dest->dst_cache->dev == unregistered_dev)
> 		ip_vs_dst_reset(dest);
> 	spin_unlock_bh(&dest->dst_lock);


Here is a what i did

static inline void
__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
{
	spin_lock_bh(&dest->dst_lock);
	if (dest->dst_cache && dest->dst_cache->dev == dev)
		ip_vs_dst_reset(dest);
	spin_unlock_bh(&dest->dst_lock);
}

static void ip_vs_svc_reset(struct ip_vs_service *svc, struct net_device *dev)
{
	struct ip_vs_dest *dest;

	list_for_each_entry(dest, &svc->destinations, n_list) {
		__ip_vs_dev_reset(dest, dev);
	}
}

static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
			    void *ptr)
{
	struct net_device *dev = ptr;
	struct net *net = dev_net(dev);
	struct ip_vs_service *svc;
	struct ip_vs_dest *dest;
	unsigned int idx;

	if (event != NETDEV_UNREGISTER)
		return NOTIFY_DONE;

	EnterFunction(2);
	mutex_lock(&__ip_vs_mutex);
	for (idx = 0; idx<IP_VS_SVC_TAB_SIZE; idx++) {
		write_lock_bh(&__ip_vs_svc_lock);
		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
			if (net_eq(svc->net, net))
				ip_vs_svc_reset(svc, dev);
		}
		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
			if (net_eq(svc->net, net))
				ip_vs_svc_reset(svc, dev);
		}
		write_unlock_bh(&__ip_vs_svc_lock);
	}

	list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
		__ip_vs_dev_reset(dest, dev);
	}
	mutex_unlock(&__ip_vs_mutex);
	LeaveFunction(2);
	return NOTIFY_DONE;
}

static struct notifier_block ip_vs_dst_notifier = {
	.notifier_call = ip_vs_dst_event,
};


int __init ip_vs_control_init(void)
...
	at the end
	ret = register_netdevice_notifier(&ip_vs_dst_notifier);
...

and an unregister_ of course.


> 
> 	There are many examples for this, eg. net/core/fib_rules.c
> Then we are sure on cleanup we can not see traffic for our net
> because all devices are unregistered before us. We don't have
> to rely on throttle to stop the traffic during cleanup. And
> we do not hold devices after NETDEV_UNREGISTER.
> 
> 	I can prepare such patch but in next days. We need such
> code anyways because the dests can hold such dsts when no
> traffic is present and we can see again this "waiting for %s" ...
> message.
> 
> 	throttle still can be used but now it can not stop
> the traffic if connections exist.
> 
> 	For __ip_vs_service_cleanup: it still has to use mutex.

OK I will try to use unlock methods, if it doesn't work use the mutex.

> Or we can avoid it by introducing ip_vs_unlink_service_nolock:
> ip_vs_flush will look like your __ip_vs_service_cleanup and
> will call ip_vs_unlink_service_nolock. ip_vs_unlink_service_nolock
> will be called by ip_vs_flush and by ip_vs_unlink_service.
> You can try such changes, if not, I'll prepare some patches
> after 2-3 days.
> 

Regards
Hans
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans Schillstrom April 20, 2011, noon UTC | #5
On Wednesday, April 20, 2011 01:12:34 Simon Horman wrote:
> On Tue, Apr 19, 2011 at 05:25:05PM +0200, Hans Schillstrom wrote:
> > This patch tries to restore the initial init and cleanup
> > sequences that was before name space patch.
[snip]
> perhaps enable or active would be names that fits better with the
> schemantics used.  Using a bool might also make things more obvious.

I'll use enable
> 
[snip]
> 
> Can we just remove ip_vs_app_init() and ip_vs_app_cleanup() as
> they no longer do anything? Likewise with other init and cleanup
> functions below.

I will add a "final" patch that removes empty functions,
(They are nice to have during the review, to keep track of the order in different contexts)

> 
> > diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> > index 36cd5ea..f8d6702 100644
> > --- a/net/netfilter/ipvs/ip_vs_conn.c
> > +++ b/net/netfilter/ipvs/ip_vs_conn.c
> > @@ -1251,30 +1251,30 @@ int __net_init __ip_vs_conn_init(struct net *net)
> >  {
> >  	struct netns_ipvs *ipvs = net_ipvs(net);
> > 
> > +	EnterFunction(2);
> >  	atomic_set(&ipvs->conn_count, 0);
> > 
> >  	proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
> >  	proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
> > +	LeaveFunction(2);
> >  	return 0;
> >  }
> 
> Does adding these EnterFunction() and LeaveFunction() calls
> restore some previous behaviour? If not, I think they should at the very
> least be in a separate patch. Likewise for similar changes below.
> 

I can remove them if you want, (but they are nice for debugging)

[snip]
> 
> While I do prefer labels to be in column 0, putting those changes
> here is rather a lot of noise. Could you put them in a separate patch?

OK it will be patch no 1 later on

Regards
Hans
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index d516f00..558e490 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -791,6 +791,7 @@  struct ip_vs_app {
 /* IPVS in network namespace */
 struct netns_ipvs {
 	int			gen;		/* Generation */
+	int			throttle;	/* Instead of nf unreg */
 	/*
 	 *	Hash table: for real service lookups
 	 */
@@ -1089,6 +1090,22 @@  ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 	atomic_inc(&ctl_cp->n_control);
 }

+/*
+ * IPVS netns init & cleanup functions
+ */
+extern int __ip_vs_estimator_init(struct net *net);
+extern int __ip_vs_control_init(struct net *net);
+extern int __ip_vs_protocol_init(struct net *net);
+extern int __ip_vs_app_init(struct net *net);
+extern int __ip_vs_conn_init(struct net *net);
+extern int __ip_vs_sync_init(struct net *net);
+extern void __ip_vs_conn_cleanup(struct net *net);
+extern void __ip_vs_app_cleanup(struct net *net);
+extern void __ip_vs_protocol_cleanup(struct net *net);
+extern void __ip_vs_control_cleanup(struct net *net);
+extern void __ip_vs_estimator_cleanup(struct net *net);
+extern void __ip_vs_sync_cleanup(struct net *net);
+extern void __ip_vs_service_cleanup(struct net *net);

 /*
  *      IPVS application functions
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 7e8e769..51f3af7 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -576,7 +576,7 @@  static const struct file_operations ip_vs_app_fops = {
 };
 #endif

-static int __net_init __ip_vs_app_init(struct net *net)
+int __net_init __ip_vs_app_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -585,26 +585,17 @@  static int __net_init __ip_vs_app_init(struct net *net)
 	return 0;
 }

-static void __net_exit __ip_vs_app_cleanup(struct net *net)
+void __net_exit __ip_vs_app_cleanup(struct net *net)
 {
 	proc_net_remove(net, "ip_vs_app");
 }

-static struct pernet_operations ip_vs_app_ops = {
-	.init = __ip_vs_app_init,
-	.exit = __ip_vs_app_cleanup,
-};
-
 int __init ip_vs_app_init(void)
 {
-	int rv;
-
-	rv = register_pernet_device(&ip_vs_app_ops);
-	return rv;
+	return 0;
 }


 void ip_vs_app_cleanup(void)
 {
-	unregister_pernet_device(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 36cd5ea..f8d6702 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1251,30 +1251,30 @@  int __net_init __ip_vs_conn_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

+	EnterFunction(2);
 	atomic_set(&ipvs->conn_count, 0);

 	proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
 	proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+	LeaveFunction(2);
 	return 0;
 }

-static void __net_exit __ip_vs_conn_cleanup(struct net *net)
+void __net_exit __ip_vs_conn_cleanup(struct net *net)
 {
+	EnterFunction(2);
 	/* flush all the connection entries first */
 	ip_vs_conn_flush(net);
 	proc_net_remove(net, "ip_vs_conn");
 	proc_net_remove(net, "ip_vs_conn_sync");
+	LeaveFunction(2);
 }
-static struct pernet_operations ipvs_conn_ops = {
-	.init = __ip_vs_conn_init,
-	.exit = __ip_vs_conn_cleanup,
-};

 int __init ip_vs_conn_init(void)
 {
 	int idx;
-	int retc;

+	EnterFunction(2);
 	/* Compute size and mask */
 	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
 	ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
@@ -1309,18 +1309,18 @@  int __init ip_vs_conn_init(void)
 		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
 	}

-	retc = register_pernet_device(&ipvs_conn_ops);
-
 	/* calculate the random value for connection hash */
 	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+	LeaveFunction(2);

-	return retc;
+	return 0;
 }

 void ip_vs_conn_cleanup(void)
 {
-	unregister_pernet_device(&ipvs_conn_ops);
+	EnterFunction(2);
 	/* Release the empty cache */
 	kmem_cache_destroy(ip_vs_conn_cachep);
 	vfree(ip_vs_conn_tab);
+	LeaveFunction(2);
 }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index a7bb81d..dc27fdf 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1343,6 +1343,10 @@  ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */

 	net = skb_net(skb);
+	/* Name space in use ? */
+	if (net->ipvs->throttle)
+		return NF_ACCEPT;
+
 	pd = ip_vs_proto_data_get(net, cih->protocol);
 	if (!pd)
 		return NF_ACCEPT;
@@ -1563,6 +1567,8 @@  ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 		}

 	net = skb_net(skb);
+	if (net->ipvs->throttle)
+		return NF_ACCEPT;
 	/* Protocol supported? */
 	pd = ip_vs_proto_data_get(net, iph.protocol);
 	if (unlikely(!pd))
@@ -1588,7 +1594,6 @@  ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}

 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-	net = skb_net(skb);
 	ipvs = net_ipvs(net);
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -1879,24 +1884,73 @@  static int __net_init __ip_vs_init(struct net *net)
 {
 	struct netns_ipvs *ipvs;

+	EnterFunction(2);
 	ipvs = net_generic(net, ip_vs_net_id);
 	if (ipvs == NULL) {
 		pr_err("%s(): no memory.\n", __func__);
 		return -ENOMEM;
 	}
+	/* Hold the beast until a service is registerd */
+	ipvs->throttle = -1;
 	ipvs->net = net;
 	/* Counters used for creating unique names */
 	ipvs->gen = atomic_read(&ipvs_netns_cnt);
 	atomic_inc(&ipvs_netns_cnt);
 	net->ipvs = ipvs;
+
+	if ( __ip_vs_estimator_init(net) < 0)
+		goto estimator_fail;
+
+	if (__ip_vs_control_init(net) < 0)
+		goto control_fail;
+
+	if (__ip_vs_protocol_init(net) < 0)
+		goto protocol_fail;
+
+	if (__ip_vs_app_init(net) < 0)
+		goto app_fail;
+
+	if (__ip_vs_conn_init(net) < 0)
+		goto conn_fail;
+
+	if (__ip_vs_sync_init(net) < 0)
+		goto sync_fail;
+
+	LeaveFunction(2);
 	printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
 			 sizeof(struct netns_ipvs), ipvs->gen);
 	return 0;
+/*
+ * Error handling
+ */
+
+sync_fail:
+	__ip_vs_conn_cleanup(net);
+conn_fail:
+	__ip_vs_app_cleanup(net);
+app_fail:
+	__ip_vs_protocol_cleanup(net);
+protocol_fail:
+	__ip_vs_control_cleanup(net);
+control_fail:
+	__ip_vs_estimator_cleanup(net);
+estimator_fail:
+	return -ENOMEM;
 }

 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
-	IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
+	net->ipvs->throttle = -1;
+	EnterFunction(2);
+	__ip_vs_sync_cleanup(net);
+	__ip_vs_service_cleanup(net);	/* ip_vs_flush() with locks */
+	__ip_vs_conn_cleanup(net);
+	__ip_vs_app_cleanup(net);
+	__ip_vs_protocol_cleanup(net);
+	__ip_vs_control_cleanup(net);
+	__ip_vs_estimator_cleanup(net);
+	LeaveFunction(2);
+	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
 }

 static struct pernet_operations ipvs_core_ops = {
@@ -1913,10 +1967,7 @@  static int __init ip_vs_init(void)
 {
 	int ret;

-	ret = register_pernet_device(&ipvs_core_ops);	/* Alloc ip_vs struct */
-	if (ret < 0)
-		return ret;
-
+	EnterFunction(2);
 	ip_vs_estimator_init();
 	ret = ip_vs_control_init();
 	if (ret < 0) {
@@ -1944,41 +1995,50 @@  static int __init ip_vs_init(void)
 		goto cleanup_conn;
 	}

+	ret = register_pernet_device(&ipvs_core_ops);	/* Alloc ip_vs struct */
+	if (ret < 0)
+		goto cleanup_sync;
+
 	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	if (ret < 0) {
 		pr_err("can't register hooks.\n");
-		goto cleanup_sync;
+		goto cleanup_net;
 	}

 	pr_info("ipvs loaded.\n");
+	LeaveFunction(2);
+
 	return ret;

+cleanup_net:
+	unregister_pernet_device(&ipvs_core_ops);       /* free ip_vs struct */
 cleanup_sync:
 	ip_vs_sync_cleanup();
-  cleanup_conn:
+cleanup_conn:
 	ip_vs_conn_cleanup();
-  cleanup_app:
+cleanup_app:
 	ip_vs_app_cleanup();
-  cleanup_protocol:
+cleanup_protocol:
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
-  cleanup_estimator:
+cleanup_estimator:
 	ip_vs_estimator_cleanup();
-	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
 	return ret;
 }

 static void __exit ip_vs_cleanup(void)
 {
+	EnterFunction(2);
 	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
 	ip_vs_sync_cleanup();
 	ip_vs_conn_cleanup();
 	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
 	ip_vs_estimator_cleanup();
-	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
 	pr_info("ipvs unloaded.\n");
+	LeaveFunction(2);
 }

 module_init(ip_vs_init);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 08715d8..6534ca3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -69,6 +69,11 @@  int ip_vs_get_debug_level(void)
 }
 #endif

+
+/*  Protos */
+static void __ip_vs_del_service(struct ip_vs_service *svc);
+
+
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
 static int __ip_vs_addr_is_local_v6(struct net *net,
@@ -345,6 +350,9 @@  static int ip_vs_svc_unhash(struct ip_vs_service *svc)

 	svc->flags &= ~IP_VS_SVC_F_HASHED;
 	atomic_dec(&svc->refcnt);
+	/* No more services then no need for input */
+	if (atomic_read(&svc->refcnt) == 0)
+		svc->net->ipvs->throttle = -1;
 	return 1;
 }

@@ -480,7 +488,6 @@  __ip_vs_unbind_svc(struct ip_vs_dest *dest)
 	}
 }

-
 /*
  *	Returns hash value for real service
  */
@@ -1214,6 +1221,8 @@  ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	write_unlock_bh(&__ip_vs_svc_lock);

 	*svc_p = svc;
+	/* Now whe have a service - full throttle */
+	ipvs->throttle = 0;
 	return 0;


@@ -1472,6 +1481,41 @@  static int ip_vs_flush(struct net *net)
 	return 0;
 }

+/*
+ *	Delete service by {netns} in the service table.
+ *	Called by __ip_vs_cleanup()
+ */
+void __ip_vs_service_cleanup(struct net *net)
+{
+	unsigned hash;
+	struct ip_vs_service *svc, *tmp;
+
+	EnterFunction(2);
+	/* Check for "full" addressed entries */
+	for (hash = 0; hash<IP_VS_SVC_TAB_SIZE; hash++) {
+		write_lock_bh(&__ip_vs_svc_lock);
+		list_for_each_entry_safe(svc, tmp, &ip_vs_svc_table[hash],
+					 s_list) {
+			if (net_eq(svc->net, net)) {
+				ip_vs_svc_unhash(svc);
+				/*  Wait until all the svc users go away. */
+				IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+				__ip_vs_del_service(svc);
+			}
+		}
+		list_for_each_entry_safe(svc, tmp, &ip_vs_svc_fwm_table[hash],
+					 f_list) {
+			if (net_eq(svc->net, net)) {
+				ip_vs_svc_unhash(svc);
+				/*  Wait until all the svc users go away. */
+				IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+				__ip_vs_del_service(svc);
+			}
+		}
+		write_unlock_bh(&__ip_vs_svc_lock);
+	}
+	LeaveFunction(2);
+}

 /*
  *	Zero counters in a service or all services
@@ -3593,6 +3637,7 @@  int __net_init __ip_vs_control_init(struct net *net)
 	int idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);

+	EnterFunction(2);
 	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);

 	/* Initialize rs_table */
@@ -3619,6 +3664,7 @@  int __net_init __ip_vs_control_init(struct net *net)
 	if (__ip_vs_control_init_sysctl(net))
 		goto err;

+	LeaveFunction(2);
 	return 0;

 err:
@@ -3626,10 +3672,11 @@  err:
 	return -ENOMEM;
 }

-static void __net_exit __ip_vs_control_cleanup(struct net *net)
+void __net_exit __ip_vs_control_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

+	EnterFunction(2);
 	ip_vs_trash_cleanup(net);
 	ip_vs_stop_estimator(net, &ipvs->tot_stats);
 	__ip_vs_control_cleanup_sysctl(net);
@@ -3637,13 +3684,9 @@  static void __net_exit __ip_vs_control_cleanup(struct net *net)
 	proc_net_remove(net, "ip_vs_stats");
 	proc_net_remove(net, "ip_vs");
 	free_percpu(ipvs->tot_stats.cpustats);
+	LeaveFunction(2);
 }

-static struct pernet_operations ipvs_control_ops = {
-	.init = __ip_vs_control_init,
-	.exit = __ip_vs_control_cleanup,
-};
-
 int __init ip_vs_control_init(void)
 {
 	int idx;
@@ -3657,12 +3700,6 @@  int __init ip_vs_control_init(void)
 		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
 	}

-	ret = register_pernet_device(&ipvs_control_ops);
-	if (ret) {
-		pr_err("cannot register namespace.\n");
-		goto err;
-	}
-
 	smp_wmb();	/* Do we really need it now ? */

 	ret = nf_register_sockopt(&ip_vs_sockopts);
@@ -3682,8 +3719,6 @@  int __init ip_vs_control_init(void)
 	return 0;

 err_net:
-	unregister_pernet_device(&ipvs_control_ops);
-err:
 	return ret;
 }

@@ -3691,7 +3726,6 @@  err:
 void ip_vs_control_cleanup(void)
 {
 	EnterFunction(2);
-	unregister_pernet_device(&ipvs_control_ops);
 	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 759163e..508cce9 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -192,7 +192,7 @@  void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
 	dst->outbps = (e->outbps + 0xF) >> 5;
 }

-static int __net_init __ip_vs_estimator_init(struct net *net)
+int __net_init __ip_vs_estimator_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -203,24 +203,16 @@  static int __net_init __ip_vs_estimator_init(struct net *net)
 	return 0;
 }

-static void __net_exit __ip_vs_estimator_exit(struct net *net)
+void __net_exit __ip_vs_estimator_cleanup(struct net *net)
 {
 	del_timer_sync(&net_ipvs(net)->est_timer);
 }
-static struct pernet_operations ip_vs_app_ops = {
-	.init = __ip_vs_estimator_init,
-	.exit = __ip_vs_estimator_exit,
-};

 int __init ip_vs_estimator_init(void)
 {
-	int rv;
-
-	rv = register_pernet_device(&ip_vs_app_ops);
-	return rv;
+	return 0;
 }

 void ip_vs_estimator_cleanup(void)
 {
-	unregister_pernet_device(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index f7021fc..eb86028 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -316,7 +316,7 @@  ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
 /*
  * per network name-space init
  */
-static int __net_init __ip_vs_protocol_init(struct net *net)
+int __net_init __ip_vs_protocol_init(struct net *net)
 {
 #ifdef CONFIG_IP_VS_PROTO_TCP
 	register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
@@ -336,7 +336,7 @@  static int __net_init __ip_vs_protocol_init(struct net *net)
 	return 0;
 }

-static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+void __net_exit __ip_vs_protocol_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_proto_data *pd;
@@ -349,11 +349,6 @@  static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
 	}
 }

-static struct pernet_operations ipvs_proto_ops = {
-	.init = __ip_vs_protocol_init,
-	.exit = __ip_vs_protocol_cleanup,
-};
-
 int __init ip_vs_protocol_init(void)
 {
 	char protocols[64];
@@ -382,7 +377,6 @@  int __init ip_vs_protocol_init(void)
 	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
 #endif
 	pr_info("Registered protocols (%s)\n", &protocols[2]);
-	return register_pernet_device(&ipvs_proto_ops);

 	return 0;
 }
@@ -393,7 +387,6 @@  void ip_vs_protocol_cleanup(void)
 	struct ip_vs_protocol *pp;
 	int i;

-	unregister_pernet_device(&ipvs_proto_ops);
 	/* unregister all the ipvs protocols */
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
 		while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 1aeca1d..e911f03 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1664,7 +1664,7 @@  int stop_sync_thread(struct net *net, int state)
 /*
  * Initialize data struct for each netns
  */
-static int __net_init __ip_vs_sync_init(struct net *net)
+int __net_init __ip_vs_sync_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -1678,24 +1678,17 @@  static int __net_init __ip_vs_sync_init(struct net *net)
 	return 0;
 }

-static void __ip_vs_sync_cleanup(struct net *net)
+void __ip_vs_sync_cleanup(struct net *net)
 {
 	stop_sync_thread(net, IP_VS_STATE_MASTER);
 	stop_sync_thread(net, IP_VS_STATE_BACKUP);
 }

-static struct pernet_operations ipvs_sync_ops = {
-	.init = __ip_vs_sync_init,
-	.exit = __ip_vs_sync_cleanup,
-};
-
-
 int __init ip_vs_sync_init(void)
 {
-	return register_pernet_device(&ipvs_sync_ops);
+	return 0;
 }

 void ip_vs_sync_cleanup(void)
 {
-	unregister_pernet_device(&ipvs_sync_ops);
 }