diff mbox

[RFC,1/9] ipvs network name space aware

Message ID 201010081316.46690.hans.schillstrom@ericsson.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Hans Schillstrom Oct. 8, 2010, 11:16 a.m. UTC
This part contains the include files
where include/net/netns/ip_vs.h is new and contains all moved vars.

SUMMARY

 include/net/ip_vs.h                     |  136 ++++---
 include/net/net_namespace.h             |    2 +
 include/net/netns/ip_vs.h               |  112 +++++

Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
---

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Daniel Lezcano Oct. 18, 2010, 8:59 a.m. UTC | #1
On 10/08/2010 01:16 PM, Hans Schillstrom wrote:
> This part contains the include files
> where include/net/netns/ip_vs.h is new and contains all moved vars.
>
> SUMMARY
>
>   include/net/ip_vs.h                     |  136 ++++---
>   include/net/net_namespace.h             |    2 +
>   include/net/netns/ip_vs.h               |  112 +++++
>
> Signed-off-by:Hans Schillstrom<hans.schillstrom@ericsson.com>
> ---
>
>    

[ ... ]

>   #ifdef CONFIG_IP_VS_IPV6
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index bd10a79..b59cdc5 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -15,6 +15,7 @@
>   #include<net/netns/ipv4.h>
>   #include<net/netns/ipv6.h>
>   #include<net/netns/dccp.h>
> +#include<net/netns/ip_vs.h>
>   #include<net/netns/x_tables.h>
>   #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>   #include<net/netns/conntrack.h>
> @@ -91,6 +92,7 @@ struct net {
>   	struct sk_buff_head	wext_nlevents;
>   #endif
>   	struct net_generic	*gen;
> +	struct netns_ipvs       *ipvs;
>   };
>    

IMHO, it would be better to use the net_generic infra-structure instead 
of adding a new field in the netns structure.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans Schillstrom Oct. 18, 2010, 9:54 a.m. UTC | #2
On Monday 18 October 2010 10:59:25 Daniel Lezcano wrote:
> On 10/08/2010 01:16 PM, Hans Schillstrom wrote:
> > This part contains the include files
> > where include/net/netns/ip_vs.h is new and contains all moved vars.
> >
> > SUMMARY
> >
> >   include/net/ip_vs.h                     |  136 ++++---
> >   include/net/net_namespace.h             |    2 +
> >   include/net/netns/ip_vs.h               |  112 +++++
> >
> > Signed-off-by:Hans Schillstrom<hans.schillstrom@ericsson.com>
> > ---
> >
> >    
> 
> [ ... ]
> 
> >   #ifdef CONFIG_IP_VS_IPV6
> > diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> > index bd10a79..b59cdc5 100644
> > --- a/include/net/net_namespace.h
> > +++ b/include/net/net_namespace.h
> > @@ -15,6 +15,7 @@
> >   #include<net/netns/ipv4.h>
> >   #include<net/netns/ipv6.h>
> >   #include<net/netns/dccp.h>
> > +#include<net/netns/ip_vs.h>
> >   #include<net/netns/x_tables.h>
> >   #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> >   #include<net/netns/conntrack.h>
> > @@ -91,6 +92,7 @@ struct net {
> >   	struct sk_buff_head	wext_nlevents;
> >   #endif
> >   	struct net_generic	*gen;
> > +	struct netns_ipvs       *ipvs;
> >   };
> >    
> 
> IMHO, it would be better to use the net_generic infra-structure instead 
> of adding a new field in the netns structure.
> 
> 
I realized that to, but the performance penalty is quite high with net_generic :-(
But on the other hand if you are going to backport it, (without recompiling the kernel)
you gonna need it!

My sugestion, take both with a configuration switch like:
(i.e. avoid the rcu locking)

--- net/ip_vs.h ---
...
extern int ip_vs_net_id;		/* net id for ip_vs */


static inline struct netns_ipvs * net_ipvs(struct net* net, int id) {
#ifdef CONFIG_IP_VS_FAST_NETNS
	return net->ipvs;
#else
	return (struct netns_ipvs *)net_generic(net, id);
#endif
}
...

and where you need the netns_ipvs struct ptr,
[snip]
struct ip_vs_conn *ip_vs_conn_in_get(struct net *net, ....
{
	struct netns_ipvs *ipvs = net_ipvs(net, ip_vs_net_id);
...
Daniel Lezcano Oct. 18, 2010, 11:37 a.m. UTC | #3
On 10/18/2010 11:54 AM, Hans Schillstrom wrote:
> On Monday 18 October 2010 10:59:25 Daniel Lezcano wrote:
>    
>> On 10/08/2010 01:16 PM, Hans Schillstrom wrote:
>>      
>>> This part contains the include files
>>> where include/net/netns/ip_vs.h is new and contains all moved vars.
>>>
>>> SUMMARY
>>>
>>>    include/net/ip_vs.h                     |  136 ++++---
>>>    include/net/net_namespace.h             |    2 +
>>>    include/net/netns/ip_vs.h               |  112 +++++
>>>
>>> Signed-off-by:Hans Schillstrom<hans.schillstrom@ericsson.com>
>>> ---
>>>
>>>
>>>        
>> [ ... ]
>>
>>      
>>>    #ifdef CONFIG_IP_VS_IPV6
>>> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
>>> index bd10a79..b59cdc5 100644
>>> --- a/include/net/net_namespace.h
>>> +++ b/include/net/net_namespace.h
>>> @@ -15,6 +15,7 @@
>>>    #include<net/netns/ipv4.h>
>>>    #include<net/netns/ipv6.h>
>>>    #include<net/netns/dccp.h>
>>> +#include<net/netns/ip_vs.h>
>>>    #include<net/netns/x_tables.h>
>>>    #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>>>    #include<net/netns/conntrack.h>
>>> @@ -91,6 +92,7 @@ struct net {
>>>    	struct sk_buff_head	wext_nlevents;
>>>    #endif
>>>    	struct net_generic	*gen;
>>> +	struct netns_ipvs       *ipvs;
>>>    };
>>>
>>>        
>> IMHO, it would be better to use the net_generic infra-structure instead
>> of adding a new field in the netns structure.
>>
>>
>>      
> I realized that to, but the performance penalty is quite high with net_generic :-(
> But on the other hand if you are going to backport it, (without recompiling the kernel)
> you gonna need it!
>    

Hmm, yes. We don't want to have the init_net_ns performances to be impacted.

You use here a pointer which will be dereferenced like the net_generic, 
I don't think there will be
a big difference between using net_generic and using a pointer in the 
net namespace structure.

The difference is the id usage, but this one is based on the idr which 
is quite fast.

We should experiment a bit here to compare both solutions.

IMHO, we can (1) create a non-pointer netns_ipvs field in the net 
namespace structure or (2) use a pointer [with net_generic].

(1) is the faster but with the drawback of having a bigger memory 
footprint even if the ipvs module is not loaded.
In this case we have to take care of what we store in the netns_ipvs 
structure, that is reduce the per namespace table and so. At the first 
glance, I think we can reduce this to the sysctls and a very few data, 
for example using global tables tagged with the namespace and we don't 
break the cacheline alignment optimization.

(2) is slower but as the memory is allocated and freed when the module 
is loaded/unloaded. What I don't like with this approach is we add some 
overhead even if the netns is not compiled in the kernel.

> My sugestion, take both with a configuration switch like:
> (i.e. avoid the rcu locking)
>
> --- net/ip_vs.h ---
> ...
> extern int ip_vs_net_id;		/* net id for ip_vs */
>
>
> static inline struct netns_ipvs * net_ipvs(struct net* net, int id) {
> #ifdef CONFIG_IP_VS_FAST_NETNS
> 	return net->ipvs;
> #else
> 	return (struct netns_ipvs *)net_generic(net, id);
> #endif
> }
> ...
>
> and where you need the netns_ipvs struct ptr,
> [snip]
> struct ip_vs_conn *ip_vs_conn_in_get(struct net *net, ....
> {
> 	struct netns_ipvs *ipvs = net_ipvs(net, ip_vs_net_id);
> ...
>    

It is a nice way to wrap both solutions but at this point I don't think 
it is worth to add a 3rd option to compile ipvs.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans Schillstrom Oct. 18, 2010, 1:23 p.m. UTC | #4
On Monday 18 October 2010 13:37:38 Daniel Lezcano wrote:
> On 10/18/2010 11:54 AM, Hans Schillstrom wrote:
> > On Monday 18 October 2010 10:59:25 Daniel Lezcano wrote:
> >    
> >> On 10/08/2010 01:16 PM, Hans Schillstrom wrote:
> >>      
> >>> This part contains the include files
> >>> where include/net/netns/ip_vs.h is new and contains all moved vars.
> >>>
> >>> SUMMARY
> >>>
> >>>    include/net/ip_vs.h                     |  136 ++++---
> >>>    include/net/net_namespace.h             |    2 +
> >>>    include/net/netns/ip_vs.h               |  112 +++++
> >>>
> >>> Signed-off-by:Hans Schillstrom<hans.schillstrom@ericsson.com>
> >>> ---
> >>>
> >>>
> >>>        
> >> [ ... ]
> >>
> >>      
> >>>    #ifdef CONFIG_IP_VS_IPV6
> >>> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> >>> index bd10a79..b59cdc5 100644
> >>> --- a/include/net/net_namespace.h
> >>> +++ b/include/net/net_namespace.h
> >>> @@ -15,6 +15,7 @@
> >>>    #include<net/netns/ipv4.h>
> >>>    #include<net/netns/ipv6.h>
> >>>    #include<net/netns/dccp.h>
> >>> +#include<net/netns/ip_vs.h>
> >>>    #include<net/netns/x_tables.h>
> >>>    #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> >>>    #include<net/netns/conntrack.h>
> >>> @@ -91,6 +92,7 @@ struct net {
> >>>    	struct sk_buff_head	wext_nlevents;
> >>>    #endif
> >>>    	struct net_generic	*gen;
> >>> +	struct netns_ipvs       *ipvs;
> >>>    };
> >>>
> >>>        
> >> IMHO, it would be better to use the net_generic infra-structure instead
> >> of adding a new field in the netns structure.
> >>
> >>
> >>      
> > I realized that to, but the performance penalty is quite high with net_generic :-(
> > But on the other hand if you are going to backport it, (without recompiling the kernel)
> > you gonna need it!
> >    
> 
> Hmm, yes. We don't want to have the init_net_ns performances to be impacted.
> 
> You use here a pointer which will be dereferenced like the net_generic, 
> I don't think there will be
> a big difference between using net_generic and using a pointer in the 
> net namespace structure.
> 
> The difference is the id usage, but this one is based on the idr which 
> is quite fast.
> 

I'm not so sure about that, have a look at net_generic and rcu_read_lock
and compare   
 ipvs = net->ipvs;
vs.
 ipvs = net_generic(net, id)

static inline void *net_generic(struct net *net, int id)
{
	struct net_generic *ng;
	void *ptr;

	rcu_read_lock();
	ng = rcu_dereference(net->gen);
	BUG_ON(id == 0 || id > ng->len);
	ptr = ng->ptr[id - 1];
	rcu_read_unlock();

	return ptr;
}
...
static inline void rcu_read_lock(void)
{
        __rcu_read_lock();
        __acquire(RCU);
        rcu_read_acquire();
}

Another way of doing it is to pass the ipvs ptr instead of the net ptr,
and add *net to the ipvs struct.

> We should experiment a bit here to compare both solutions.
Agre
> 
I single stepped through the rcu_read_lock() on a x86_64 
and it's quite many "stepi" that you need to enter :-(

> IMHO, we can (1) create a non-pointer netns_ipvs field in the net 
> namespace structure or (2) use a pointer [with net_generic].
> 
> (1) is the faster but with the drawback of having a bigger memory 
> footprint even if the ipvs module is not loaded.
> In this case we have to take care of what we store in the netns_ipvs 
> structure, that is reduce the per namespace table and so. At the first 
> glance, I think we can reduce this to the sysctls and a very few data, 
> for example using global tables tagged with the namespace and we don't 
> break the cacheline alignment optimization.
> 
> (2) is slower but as the memory is allocated and freed when the module 
> is loaded/unloaded. What I don't like with this approach is we add some 
> overhead even if the netns is not compiled in the kernel.
> 
or (3)
 Like (1) with data that needs to be cache aligned in "struct net" 
 and the rest in an ipvs struct.
 Global hash tables or not ? 

> > My sugestion, take both with a configuration switch like:
> > (i.e. avoid the rcu locking)
> >
> > --- net/ip_vs.h ---
> > ...
> > extern int ip_vs_net_id;		/* net id for ip_vs */
> >
> >
> > static inline struct netns_ipvs * net_ipvs(struct net* net, int id) {
> > #ifdef CONFIG_IP_VS_FAST_NETNS
> > 	return net->ipvs;
> > #else
> > 	return (struct netns_ipvs *)net_generic(net, id);
> > #endif
> > }
> > ...
> >
> > and where you need the netns_ipvs struct ptr,
> > [snip]
> > struct ip_vs_conn *ip_vs_conn_in_get(struct net *net, ....
> > {
> > 	struct netns_ipvs *ipvs = net_ipvs(net, ip_vs_net_id);
> > ...
> >    
> 
> It is a nice way to wrap both solutions but at this point I don't think 
> it is worth to add a 3rd option to compile ipvs.
> 
>
Daniel Lezcano Oct. 18, 2010, 2:26 p.m. UTC | #5
On 10/18/2010 03:23 PM, Hans Schillstrom wrote:
> On Monday 18 October 2010 13:37:38 Daniel Lezcano wrote:
>    
>> On 10/18/2010 11:54 AM, Hans Schillstrom wrote:
>>      
>>> On Monday 18 October 2010 10:59:25 Daniel Lezcano wrote:
>>>
>>>        
>>>> On 10/08/2010 01:16 PM, Hans Schillstrom wrote:
>>>>
>>>>          
>>>>> This part contains the include files
>>>>> where include/net/netns/ip_vs.h is new and contains all moved vars.
>>>>>
>>>>> SUMMARY
>>>>>
>>>>>     include/net/ip_vs.h                     |  136 ++++---
>>>>>     include/net/net_namespace.h             |    2 +
>>>>>     include/net/netns/ip_vs.h               |  112 +++++
>>>>>
>>>>> Signed-off-by:Hans Schillstrom<hans.schillstrom@ericsson.com>
>>>>> ---
>>>>>
>>>>>
>>>>>
>>>>>            
>>>> [ ... ]
>>>>
>>>>
>>>>          
>>>>>     #ifdef CONFIG_IP_VS_IPV6
>>>>> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
>>>>> index bd10a79..b59cdc5 100644
>>>>> --- a/include/net/net_namespace.h
>>>>> +++ b/include/net/net_namespace.h
>>>>> @@ -15,6 +15,7 @@
>>>>>     #include<net/netns/ipv4.h>
>>>>>     #include<net/netns/ipv6.h>
>>>>>     #include<net/netns/dccp.h>
>>>>> +#include<net/netns/ip_vs.h>
>>>>>     #include<net/netns/x_tables.h>
>>>>>     #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>>>>>     #include<net/netns/conntrack.h>
>>>>> @@ -91,6 +92,7 @@ struct net {
>>>>>     	struct sk_buff_head	wext_nlevents;
>>>>>     #endif
>>>>>     	struct net_generic	*gen;
>>>>> +	struct netns_ipvs       *ipvs;
>>>>>     };
>>>>>
>>>>>
>>>>>            
>>>> IMHO, it would be better to use the net_generic infra-structure instead
>>>> of adding a new field in the netns structure.
>>>>
>>>>
>>>>
>>>>          
>>> I realized that to, but the performance penalty is quite high with net_generic :-(
>>> But on the other hand if you are going to backport it, (without recompiling the kernel)
>>> you gonna need it!
>>>
>>>        
>> Hmm, yes. We don't want to have the init_net_ns performances to be impacted.
>>
>> You use here a pointer which will be dereferenced like the net_generic,
>> I don't think there will be
>> a big difference between using net_generic and using a pointer in the
>> net namespace structure.
>>
>> The difference is the id usage, but this one is based on the idr which
>> is quite fast.
>>
>>      
> I'm not so sure about that, have a look at net_generic and rcu_read_lock
> and compare
>   ipvs = net->ipvs;
> vs.
>   ipvs = net_generic(net, id)
>
> static inline void *net_generic(struct net *net, int id)
> {
> 	struct net_generic *ng;
> 	void *ptr;
>
> 	rcu_read_lock();
> 	ng = rcu_dereference(net->gen);
> 	BUG_ON(id == 0 || id>  ng->len);
> 	ptr = ng->ptr[id - 1];
> 	rcu_read_unlock();
>
> 	return ptr;
> }
> ...
> static inline void rcu_read_lock(void)
> {
>          __rcu_read_lock();
>          __acquire(RCU);
>          rcu_read_acquire();
> }
>    

Yep, right. In fact I don't really like the net_generic and an ipvs ptr. 
I am not sure it is adequate for this component.

> Another way of doing it is to pass the ipvs ptr instead of the net ptr,
> and add *net to the ipvs struct.
>
>    
>> We should experiment a bit here to compare both solutions.
>>      
> Agre
>    
>>      
> I single stepped through the rcu_read_lock() on a x86_64
> and it's quite many "stepi" that you need to enter :-(
>
>    
>> IMHO, we can (1) create a non-pointer netns_ipvs field in the net
>> namespace structure or (2) use a pointer [with net_generic].
>>
>> (1) is the faster but with the drawback of having a bigger memory
>> footprint even if the ipvs module is not loaded.
>> In this case we have to take care of what we store in the netns_ipvs
>> structure, that is reduce the per namespace table and so. At the first
>> glance, I think we can reduce this to the sysctls and a very few data,
>> for example using global tables tagged with the namespace and we don't
>> break the cacheline alignment optimization.
>>
>> (2) is slower but as the memory is allocated and freed when the module
>> is loaded/unloaded. What I don't like with this approach is we add some
>> overhead even if the netns is not compiled in the kernel.
>>
>>      
> or (3)
>   Like (1) with data that needs to be cache aligned in "struct net"
>   and the rest in an ipvs struct.
>    

Ah, right. That could be a nice solution.


>   Global hash tables or not ?
>    

In the past, we used global hash tables because of the cacheline miss.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney Oct. 19, 2010, 6:44 p.m. UTC | #6
On Mon, Oct 18, 2010 at 03:23:48PM +0200, Hans Schillstrom wrote:
> On Monday 18 October 2010 13:37:38 Daniel Lezcano wrote:
> > On 10/18/2010 11:54 AM, Hans Schillstrom wrote:
> > > On Monday 18 October 2010 10:59:25 Daniel Lezcano wrote:
> > >    
> > >> On 10/08/2010 01:16 PM, Hans Schillstrom wrote:
> > >>      
> > >>> This part contains the include files
> > >>> where include/net/netns/ip_vs.h is new and contains all moved vars.
> > >>>
> > >>> SUMMARY
> > >>>
> > >>>    include/net/ip_vs.h                     |  136 ++++---
> > >>>    include/net/net_namespace.h             |    2 +
> > >>>    include/net/netns/ip_vs.h               |  112 +++++
> > >>>
> > >>> Signed-off-by:Hans Schillstrom<hans.schillstrom@ericsson.com>
> > >>> ---
> > >>>
> > >>>
> > >>>        
> > >> [ ... ]
> > >>
> > >>      
> > >>>    #ifdef CONFIG_IP_VS_IPV6
> > >>> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> > >>> index bd10a79..b59cdc5 100644
> > >>> --- a/include/net/net_namespace.h
> > >>> +++ b/include/net/net_namespace.h
> > >>> @@ -15,6 +15,7 @@
> > >>>    #include<net/netns/ipv4.h>
> > >>>    #include<net/netns/ipv6.h>
> > >>>    #include<net/netns/dccp.h>
> > >>> +#include<net/netns/ip_vs.h>
> > >>>    #include<net/netns/x_tables.h>
> > >>>    #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> > >>>    #include<net/netns/conntrack.h>
> > >>> @@ -91,6 +92,7 @@ struct net {
> > >>>    	struct sk_buff_head	wext_nlevents;
> > >>>    #endif
> > >>>    	struct net_generic	*gen;
> > >>> +	struct netns_ipvs       *ipvs;
> > >>>    };
> > >>>
> > >>>        
> > >> IMHO, it would be better to use the net_generic infra-structure instead
> > >> of adding a new field in the netns structure.
> > >>
> > >>
> > >>      
> > > I realized that to, but the performance penalty is quite high with net_generic :-(
> > > But on the other hand if you are going to backport it, (without recompiling the kernel)
> > > you gonna need it!
> > >    
> > 
> > Hmm, yes. We don't want to have the init_net_ns performances to be impacted.
> > 
> > You use here a pointer which will be dereferenced like the net_generic, 
> > I don't think there will be
> > a big difference between using net_generic and using a pointer in the 
> > net namespace structure.
> > 
> > The difference is the id usage, but this one is based on the idr which 
> > is quite fast.
> > 
> 
> I'm not so sure about that, have a look at net_generic and rcu_read_lock
> and compare   
>  ipvs = net->ipvs;
> vs.
>  ipvs = net_generic(net, id)
> 
> static inline void *net_generic(struct net *net, int id)
> {
> 	struct net_generic *ng;
> 	void *ptr;
> 
> 	rcu_read_lock();
> 	ng = rcu_dereference(net->gen);
> 	BUG_ON(id == 0 || id > ng->len);
> 	ptr = ng->ptr[id - 1];
> 	rcu_read_unlock();
> 
> 	return ptr;
> }
> ...
> static inline void rcu_read_lock(void)
> {
>         __rcu_read_lock();
>         __acquire(RCU);
>         rcu_read_acquire();
> }
> 
> Another way of doing it is to pass the ipvs ptr instead of the net ptr,
> and add *net to the ipvs struct.
> 
> > We should experiment a bit here to compare both solutions.
> Agre
> > 
> I single stepped through the rcu_read_lock() on a x86_64 
> and it's quite many "stepi" that you need to enter :-(

Was this by chance with lockdep enabled?  If not, could you please send
your .config?

							Thanx, Paul

> > IMHO, we can (1) create a non-pointer netns_ipvs field in the net 
> > namespace structure or (2) use a pointer [with net_generic].
> > 
> > (1) is the faster but with the drawback of having a bigger memory 
> > footprint even if the ipvs module is not loaded.
> > In this case we have to take care of what we store in the netns_ipvs 
> > structure, that is reduce the per namespace table and so. At the first 
> > glance, I think we can reduce this to the sysctls and a very few data, 
> > for example using global tables tagged with the namespace and we don't 
> > break the cacheline alignment optimization.
> > 
> > (2) is slower but as the memory is allocated and freed when the module 
> > is loaded/unloaded. What I don't like with this approach is we add some 
> > overhead even if the netns is not compiled in the kernel.
> > 
> or (3)
>  Like (1) with data that needs to be cache aligned in "struct net" 
>  and the rest in an ipvs struct.
>  Global hash tables or not ? 
> 
> > > My sugestion, take both with a configuration switch like:
> > > (i.e. avoid the rcu locking)
> > >
> > > --- net/ip_vs.h ---
> > > ...
> > > extern int ip_vs_net_id;		/* net id for ip_vs */
> > >
> > >
> > > static inline struct netns_ipvs * net_ipvs(struct net* net, int id) {
> > > #ifdef CONFIG_IP_VS_FAST_NETNS
> > > 	return net->ipvs;
> > > #else
> > > 	return (struct netns_ipvs *)net_generic(net, id);
> > > #endif
> > > }
> > > ...
> > >
> > > and where you need the netns_ipvs struct ptr,
> > > [snip]
> > > struct ip_vs_conn *ip_vs_conn_in_get(struct net *net, ....
> > > {
> > > 	struct netns_ipvs *ipvs = net_ipvs(net, ip_vs_net_id);
> > > ...
> > >    
> > 
> > It is a nice way to wrap both solutions but at this point I don't think 
> > it is worth to add a 3rd option to compile ipvs.
> > 
> > 
> 
> -- 
> Regards
> Hans Schillstrom <hans.schillstrom@ericsson.com>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans Schillstrom Oct. 20, 2010, 8:25 a.m. UTC | #7
On Tuesday 19 October 2010 20:44:36 Paul E. McKenney wrote:
> On Mon, Oct 18, 2010 at 03:23:48PM +0200, Hans Schillstrom wrote:
> > On Monday 18 October 2010 13:37:38 Daniel Lezcano wrote:
> > > On 10/18/2010 11:54 AM, Hans Schillstrom wrote:
> > > > On Monday 18 October 2010 10:59:25 Daniel Lezcano wrote:
> > > >
> > > >> On 10/08/2010 01:16 PM, Hans Schillstrom wrote:
> > > >>
> > > >>> This part contains the include files
> > > >>> where include/net/netns/ip_vs.h is new and contains all moved vars.
> > > >>>
> > > >>> SUMMARY
> > > >>>
> > > >>>    include/net/ip_vs.h                     |  136 ++++---
> > > >>>    include/net/net_namespace.h             |    2 +
> > > >>>    include/net/netns/ip_vs.h               |  112 +++++
> > > >>>
> > > >>> Signed-off-by:Hans Schillstrom<hans.schillstrom@ericsson.com>
> > > >>> ---
> > > >>>
> > > >>>
> > > >>>
> > > >> [ ... ]
> > > >>
> > > >>
> > > >>>    #ifdef CONFIG_IP_VS_IPV6
> > > >>> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> > > >>> index bd10a79..b59cdc5 100644
> > > >>> --- a/include/net/net_namespace.h
> > > >>> +++ b/include/net/net_namespace.h
> > > >>> @@ -15,6 +15,7 @@
> > > >>>    #include<net/netns/ipv4.h>
> > > >>>    #include<net/netns/ipv6.h>
> > > >>>    #include<net/netns/dccp.h>
> > > >>> +#include<net/netns/ip_vs.h>
> > > >>>    #include<net/netns/x_tables.h>
> > > >>>    #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> > > >>>    #include<net/netns/conntrack.h>
> > > >>> @@ -91,6 +92,7 @@ struct net {
> > > >>>    	struct sk_buff_head	wext_nlevents;
> > > >>>    #endif
> > > >>>    	struct net_generic	*gen;
> > > >>> +	struct netns_ipvs       *ipvs;
> > > >>>    };
> > > >>>
> > > >>>
> > > >> IMHO, it would be better to use the net_generic infra-structure instead
> > > >> of adding a new field in the netns structure.
> > > >>
> > > >>
> > > >>
> > > > I realized that to, but the performance penalty is quite high with net_generic :-(
> > > > But on the other hand if you are going to backport it, (without recompiling the kernel)
> > > > you gonna need it!
> > > >
> > >
> > > Hmm, yes. We don't want to have the init_net_ns performances to be impacted.
> > >
> > > You use here a pointer which will be dereferenced like the net_generic,
> > > I don't think there will be
> > > a big difference between using net_generic and using a pointer in the
> > > net namespace structure.
> > >
> > > The difference is the id usage, but this one is based on the idr which
> > > is quite fast.
> > >
> >
> > I'm not so sure about that, have a look at net_generic and rcu_read_lock
> > and compare
> >  ipvs = net->ipvs;
> > vs.
> >  ipvs = net_generic(net, id)
> >
> > static inline void *net_generic(struct net *net, int id)
> > {
> > 	struct net_generic *ng;
> > 	void *ptr;
> >
> > 	rcu_read_lock();
> > 	ng = rcu_dereference(net->gen);
> > 	BUG_ON(id == 0 || id > ng->len);
> > 	ptr = ng->ptr[id - 1];
> > 	rcu_read_unlock();
> >
> > 	return ptr;
> > }
> > ...
> > static inline void rcu_read_lock(void)
> > {
> >         __rcu_read_lock();
> >         __acquire(RCU);
> >         rcu_read_acquire();
> > }
> >
> > Another way of doing it is to pass the ipvs ptr instead of the net ptr,
> > and add *net to the ipvs struct.
> >
> > > We should experiment a bit here to compare both solutions.
> > Agre
> > >
> > I single stepped through the rcu_read_lock() on a x86_64
> > and it's quite many "stepi" that you need to enter :-(
>
> Was this by chance with lockdep enabled?  If not, could you please send
> your .config?
>
> 							Thanx, Paul

No lockdep, but what I ment is that net_generic is not as fast as a plain ptr->xxx.
IPVS has hooks in the netfilter chain, and gets a huge amount of packets .

I don't think IPVS is a candidate for net_generic, it should have its own part in "struct net"
That was my point.
( No critic to locking or net_generic)

--
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney Oct. 20, 2010, 4:02 p.m. UTC | #8
On Wed, Oct 20, 2010 at 10:25:19AM +0200, Hans Schillstrom wrote:
> On Tuesday 19 October 2010 20:44:36 Paul E. McKenney wrote:
> > On Mon, Oct 18, 2010 at 03:23:48PM +0200, Hans Schillstrom wrote:
> > > On Monday 18 October 2010 13:37:38 Daniel Lezcano wrote:
> > > > On 10/18/2010 11:54 AM, Hans Schillstrom wrote:
> > > > > On Monday 18 October 2010 10:59:25 Daniel Lezcano wrote:
> > > > >
> > > > >> On 10/08/2010 01:16 PM, Hans Schillstrom wrote:
> > > > >>
> > > > >>> This part contains the include files
> > > > >>> where include/net/netns/ip_vs.h is new and contains all moved vars.
> > > > >>>
> > > > >>> SUMMARY
> > > > >>>
> > > > >>>    include/net/ip_vs.h                     |  136 ++++---
> > > > >>>    include/net/net_namespace.h             |    2 +
> > > > >>>    include/net/netns/ip_vs.h               |  112 +++++
> > > > >>>
> > > > >>> Signed-off-by:Hans Schillstrom<hans.schillstrom@ericsson.com>
> > > > >>> ---
> > > > >>>
> > > > >>>
> > > > >>>
> > > > >> [ ... ]
> > > > >>
> > > > >>
> > > > >>>    #ifdef CONFIG_IP_VS_IPV6
> > > > >>> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> > > > >>> index bd10a79..b59cdc5 100644
> > > > >>> --- a/include/net/net_namespace.h
> > > > >>> +++ b/include/net/net_namespace.h
> > > > >>> @@ -15,6 +15,7 @@
> > > > >>>    #include<net/netns/ipv4.h>
> > > > >>>    #include<net/netns/ipv6.h>
> > > > >>>    #include<net/netns/dccp.h>
> > > > >>> +#include<net/netns/ip_vs.h>
> > > > >>>    #include<net/netns/x_tables.h>
> > > > >>>    #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> > > > >>>    #include<net/netns/conntrack.h>
> > > > >>> @@ -91,6 +92,7 @@ struct net {
> > > > >>>    	struct sk_buff_head	wext_nlevents;
> > > > >>>    #endif
> > > > >>>    	struct net_generic	*gen;
> > > > >>> +	struct netns_ipvs       *ipvs;
> > > > >>>    };
> > > > >>>
> > > > >>>
> > > > >> IMHO, it would be better to use the net_generic infra-structure instead
> > > > >> of adding a new field in the netns structure.
> > > > >>
> > > > >>
> > > > >>
> > > > > I realized that to, but the performance penalty is quite high with net_generic :-(
> > > > > But on the other hand if you are going to backport it, (without recompiling the kernel)
> > > > > you gonna need it!
> > > > >
> > > >
> > > > Hmm, yes. We don't want to have the init_net_ns performances to be impacted.
> > > >
> > > > You use here a pointer which will be dereferenced like the net_generic,
> > > > I don't think there will be
> > > > a big difference between using net_generic and using a pointer in the
> > > > net namespace structure.
> > > >
> > > > The difference is the id usage, but this one is based on the idr which
> > > > is quite fast.
> > > >
> > >
> > > I'm not so sure about that, have a look at net_generic and rcu_read_lock
> > > and compare
> > >  ipvs = net->ipvs;
> > > vs.
> > >  ipvs = net_generic(net, id)
> > >
> > > static inline void *net_generic(struct net *net, int id)
> > > {
> > > 	struct net_generic *ng;
> > > 	void *ptr;
> > >
> > > 	rcu_read_lock();
> > > 	ng = rcu_dereference(net->gen);
> > > 	BUG_ON(id == 0 || id > ng->len);
> > > 	ptr = ng->ptr[id - 1];
> > > 	rcu_read_unlock();
> > >
> > > 	return ptr;
> > > }
> > > ...
> > > static inline void rcu_read_lock(void)
> > > {
> > >         __rcu_read_lock();
> > >         __acquire(RCU);
> > >         rcu_read_acquire();
> > > }
> > >
> > > Another way of doing it is to pass the ipvs ptr instead of the net ptr,
> > > and add *net to the ipvs struct.
> > >
> > > > We should experiment a bit here to compare both solutions.
> > > Agre
> > > >
> > > I single stepped through the rcu_read_lock() on a x86_64
> > > and it's quite many "stepi" that you need to enter :-(
> >
> > Was this by chance with lockdep enabled?  If not, could you please send
> > your .config?
> >
> > 							Thanx, Paul
> 
> No lockdep, but what I ment is that net_generic is not as fast as a plain ptr->xxx.
> IPVS has hooks in the netfilter chain, and gets a huge amount of packets .
> 
> I don't think IPVS is a candidate for net_generic, it should have its own part in "struct net"
> That was my point.
> ( No critic to locking or net_generic)

You said that there were a lot of "stepi" commands to get through
rcu_read_lock() on x86_64.  This is quite surprising, especially if you
built with CONFIG_RCU_TREE.  Even if you built with CONFIG_PREEMPT_RCU_TREE,
you should only see something like the following from rcu_read_lock():

000000b7 <__rcu_read_lock>:
      b7:	55                   	push   %ebp
      b8:	64 a1 00 00 00 00    	mov    %fs:0x0,%eax
      be:	ff 80 80 01 00 00    	incl   0x180(%eax)
      c4:	89 e5                	mov    %esp,%ebp
      c6:	5d                   	pop    %ebp
      c7:	c3                   	ret    

Unless you have some sort of debugging options turned on.  Or unless
six instructions counts for "quite many" stepi commands.  ;-)

So I am quite curious, independent of whether or not IPVS is a candidate
for net_generic.  That choice for IPVS is not mine to make, and I will
trust the relevant developers and maintainers to make the right choice,
whether that be RCU or something else.  Even I do not claim that RCU
is the right tool for all jobs!  ;-)

							Thanx, Paul
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans Schillstrom Oct. 21, 2010, 7:45 a.m. UTC | #9
On Wednesday 20 October 2010 18:02:06 Paul E. McKenney wrote:
> On Wed, Oct 20, 2010 at 10:25:19AM +0200, Hans Schillstrom wrote:
> > On Tuesday 19 October 2010 20:44:36 Paul E. McKenney wrote:
> > > On Mon, Oct 18, 2010 at 03:23:48PM +0200, Hans Schillstrom wrote:
> > > > On Monday 18 October 2010 13:37:38 Daniel Lezcano wrote:
> > > > > On 10/18/2010 11:54 AM, Hans Schillstrom wrote:
> > > > > > On Monday 18 October 2010 10:59:25 Daniel Lezcano wrote:
> > > > > >
> > > > > >> On 10/08/2010 01:16 PM, Hans Schillstrom wrote:
> > > > > >>
> > > > > >>> This part contains the include files
> > > > > >>> where include/net/netns/ip_vs.h is new and contains all moved vars.
> > > > > >>>
> > > > > >>> SUMMARY
> > > > > >>>
> > > > > >>>    include/net/ip_vs.h                     |  136 ++++---
> > > > > >>>    include/net/net_namespace.h             |    2 +
> > > > > >>>    include/net/netns/ip_vs.h               |  112 +++++
> > > > > >>>
> > > > > >>> Signed-off-by:Hans Schillstrom<hans.schillstrom@ericsson.com>
> > > > > >>> ---
> > > > > >>>
> > > > > >>>
> > > > > >>>
> > > > > >> [ ... ]
> > > > > >>
> > > > > >>
> > > > > >>>    #ifdef CONFIG_IP_VS_IPV6
> > > > > >>> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> > > > > >>> index bd10a79..b59cdc5 100644
> > > > > >>> --- a/include/net/net_namespace.h
> > > > > >>> +++ b/include/net/net_namespace.h
> > > > > >>> @@ -15,6 +15,7 @@
> > > > > >>>    #include<net/netns/ipv4.h>
> > > > > >>>    #include<net/netns/ipv6.h>
> > > > > >>>    #include<net/netns/dccp.h>
> > > > > >>> +#include<net/netns/ip_vs.h>
> > > > > >>>    #include<net/netns/x_tables.h>
> > > > > >>>    #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> > > > > >>>    #include<net/netns/conntrack.h>
> > > > > >>> @@ -91,6 +92,7 @@ struct net {
> > > > > >>>    	struct sk_buff_head	wext_nlevents;
> > > > > >>>    #endif
> > > > > >>>    	struct net_generic	*gen;
> > > > > >>> +	struct netns_ipvs       *ipvs;
> > > > > >>>    };
> > > > > >>>
> > > > > >>>
> > > > > >> IMHO, it would be better to use the net_generic infra-structure instead
> > > > > >> of adding a new field in the netns structure.
> > > > > >>
> > > > > >>
> > > > > >>
> > > > > > I realized that to, but the performance penalty is quite high with net_generic :-(
> > > > > > But on the other hand if you are going to backport it, (without recompiling the kernel)
> > > > > > you gonna need it!
> > > > > >
> > > > >
> > > > > Hmm, yes. We don't want to have the init_net_ns performances to be impacted.
> > > > >
> > > > > You use here a pointer which will be dereferenced like the net_generic,
> > > > > I don't think there will be
> > > > > a big difference between using net_generic and using a pointer in the
> > > > > net namespace structure.
> > > > >
> > > > > The difference is the id usage, but this one is based on the idr which
> > > > > is quite fast.
> > > > >
> > > >
> > > > I'm not so sure about that, have a look at net_generic and rcu_read_lock
> > > > and compare
> > > >  ipvs = net->ipvs;
> > > > vs.
> > > >  ipvs = net_generic(net, id)
> > > >
> > > > static inline void *net_generic(struct net *net, int id)
> > > > {
> > > > 	struct net_generic *ng;
> > > > 	void *ptr;
> > > >
> > > > 	rcu_read_lock();
> > > > 	ng = rcu_dereference(net->gen);
> > > > 	BUG_ON(id == 0 || id > ng->len);
> > > > 	ptr = ng->ptr[id - 1];
> > > > 	rcu_read_unlock();
> > > >
> > > > 	return ptr;
> > > > }
> > > > ...
> > > > static inline void rcu_read_lock(void)
> > > > {
> > > >         __rcu_read_lock();
> > > >         __acquire(RCU);
> > > >         rcu_read_acquire();
> > > > }
> > > >
> > > > Another way of doing it is to pass the ipvs ptr instead of the net ptr,
> > > > and add *net to the ipvs struct.
> > > >
> > > > > We should experiment a bit here to compare both solutions.
> > > > Agre
> > > > >
> > > > I single stepped through the rcu_read_lock() on a x86_64
> > > > and it's quite many "stepi" that you need to enter :-(
> > >
> > > Was this by chance with lockdep enabled?  If not, could you please send
> > > your .config?
> > >
> > > 							Thanx, Paul
> >
> > No lockdep, but what I ment is that net_generic is not as fast as a plain ptr->xxx.
> > IPVS has hooks in the netfilter chain, and gets a huge amount of packets .
> >
> > I don't think IPVS is a candidate for net_generic, it should have its own part in "struct net"
> > That was my point.
> > ( No critic to locking or net_generic)
>
> You said that there were a lot of "stepi" commands to get through
> rcu_read_lock() on x86_64.  This is quite surprising, especially if you
> built with CONFIG_RCU_TREE.  Even if you built with CONFIG_PREEMPT_RCU_TREE,
> you should only see something like the following from rcu_read_lock():
>
> 000000b7 <__rcu_read_lock>:
>       b7:	55                   	push   %ebp
>       b8:	64 a1 00 00 00 00    	mov    %fs:0x0,%eax
>       be:	ff 80 80 01 00 00    	incl   0x180(%eax)
>       c4:	89 e5                	mov    %esp,%ebp
>       c6:	5d                   	pop    %ebp
>       c7:	c3                   	ret
>
> Unless you have some sort of debugging options turned on.  Or unless
> six instructions counts for "quite many" stepi commands.  ;-)
>
I do have this (and some debuging)
__rcu_read_lock()
=> 0xffffffff8108bcf3 <+0>:	push   %rbp
   0xffffffff8108bcf4 <+1>:	mov    %rsp,%rbp
   0xffffffff8108bcf7 <+4>:	nopl   0x0(%rax,%rax,1)
   0xffffffff8108bcfc <+9>:	mov    %gs:0xb540,%rax
   0xffffffff8108bd05 <+18>:	mov    0x108(%rax),%edx
   0xffffffff8108bd0b <+24>:	inc    %edx
   0xffffffff8108bd0d <+26>:	mov    %edx,0x108(%rax)
   0xffffffff8108bd13 <+32>:	leaveq
   0xffffffff8108bd14 <+33>:	retq

which is not that many, actually imprerssing few instructions :-)

Thanks
	Hans

> So I am quite curious, independent of whether or not IPVS is a candidate
> for net_generic.  That choice for IPVS is not mine to make, and I will
> trust the relevant developers and maintainers to make the right choice,
> whether that be RCU or something else.  Even I do not claim that RCU
> is the right tool for all jobs!  ;-)
>
> 							Thanx, Paul
> --
> To unsubscribe from this list: send the line "unsubscribe lvs-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Oct. 21, 2010, 8:01 a.m. UTC | #10
Le jeudi 21 octobre 2010 à 09:45 +0200, Hans Schillstrom a écrit :
> I do have this (and some debuging)
> __rcu_read_lock()
> => 0xffffffff8108bcf3 <+0>:	push   %rbp
>    0xffffffff8108bcf4 <+1>:	mov    %rsp,%rbp
>    0xffffffff8108bcf7 <+4>:	nopl   0x0(%rax,%rax,1)
>    0xffffffff8108bcfc <+9>:	mov    %gs:0xb540,%rax
>    0xffffffff8108bd05 <+18>:	mov    0x108(%rax),%edx
>    0xffffffff8108bd0b <+24>:	inc    %edx
>    0xffffffff8108bd0d <+26>:	mov    %edx,0x108(%rax)
>    0xffffffff8108bd13 <+32>:	leaveq
>    0xffffffff8108bd14 <+33>:	retq
> 
> which is not that many, actually imprerssing few instructions :-)

nopl   0x0(%rax,%rax,1) is a filler because of extra instrumentation in
your kernel.

Maybe you could find out why your compiler dont use

	incl 0x108(%rax)

instead of

	mov    0x108(%rax),%edx
	inc    %edx
	mov    %edx,0x108(%rax)


So rcu_read_lock() is really _two_ instructions.

I agree with Paul with the "few" qualification... :-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Oct. 21, 2010, 8:58 a.m. UTC | #11
> You said that there were a lot of "stepi" commands to get through
> rcu_read_lock() on x86_64.  This is quite surprising, especially if you
> built with CONFIG_RCU_TREE.  Even if you built with CONFIG_PREEMPT_RCU_TREE,
> you should only see something like the following from rcu_read_lock():
> 
> 000000b7 <__rcu_read_lock>:
>       b7:	55                   	push   %ebp
>       b8:	64 a1 00 00 00 00    	mov    %fs:0x0,%eax
>       be:	ff 80 80 01 00 00    	incl   0x180(%eax)
>       c4:	89 e5                	mov    %esp,%ebp
>       c6:	5d                   	pop    %ebp
>       c7:	c3                   	ret    
> 
> Unless you have some sort of debugging options turned on.  Or unless
> six instructions counts for "quite many" stepi commands.  ;-)
> 

Paul, this should be inlined, dont you think ?

Also, I dont understand why we use ACCESS_ONCE() in rcu_read_lock()

ACCESS_ONCE(current->rcu_read_lock_nesting)++;

Apparently, some compilers are a bit noisy here.

mov    0x1b0(%rdx),%eax
inc    %eax
mov    %eax,0x1b0(%rdx)

instead of :

incl   0x1b0(%rax)

So if the ACCESS_ONCE() is needed, we might add a comment, because it's
not obvious ;)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney Oct. 21, 2010, 3:16 p.m. UTC | #12
On Thu, Oct 21, 2010 at 10:58:24AM +0200, Eric Dumazet wrote:
> 
> > You said that there were a lot of "stepi" commands to get through
> > rcu_read_lock() on x86_64.  This is quite surprising, especially if you
> > built with CONFIG_RCU_TREE.  Even if you built with CONFIG_PREEMPT_RCU_TREE,
> > you should only see something like the following from rcu_read_lock():
> > 
> > 000000b7 <__rcu_read_lock>:
> >       b7:	55                   	push   %ebp
> >       b8:	64 a1 00 00 00 00    	mov    %fs:0x0,%eax
> >       be:	ff 80 80 01 00 00    	incl   0x180(%eax)
> >       c4:	89 e5                	mov    %esp,%ebp
> >       c6:	5d                   	pop    %ebp
> >       c7:	c3                   	ret    
> > 
> > Unless you have some sort of debugging options turned on.  Or unless
> > six instructions counts for "quite many" stepi commands.  ;-)
> 
> Paul, this should be inlined, dont you think ?

Indeed it should!!!  It is out-of-line due to header-file issues.
Lai Jiangshan proposed a change to kbuild to allow it to be inlined as
part of his ring-RCU patch, and I have asked him to submit a version
of that for Tree and Tiny preemptible RCU.  This is the usual trick of
having the build system compile the data structure and emit offsets,
which are then used in the main kernel build.  (Yes, I did something
similar in DYNIX/ptx, but never managed to work up the courage to attempt
the equivalent in Linux's kbuild, so props to Lai!)

> Also, I dont understand why we use ACCESS_ONCE() in rcu_read_lock()
> 
> ACCESS_ONCE(current->rcu_read_lock_nesting)++;
> 
> Apparently, some compilers are a bit noisy here.
> 
> mov    0x1b0(%rdx),%eax
> inc    %eax
> mov    %eax,0x1b0(%rdx)
> 
> instead of :
> 
> incl   0x1b0(%rax)
> 
> So if the ACCESS_ONCE() is needed, we might add a comment, because it's
> not obvious ;)

Here is what it looks like in my -rcu tree:

void __rcu_read_lock(void)
{
	current->rcu_read_lock_nesting++;
	barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
}

So yes, I finally did convince myself that the ACCESS_ONCE was not
needed.  ;-)

This is not yet in mainline, but Ingo sent the series containing this
commit (80dcf60e) to Linus earlier today, so there is hope!

							Thanx, Paul
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney Oct. 21, 2010, 3:18 p.m. UTC | #13
On Thu, Oct 21, 2010 at 10:01:12AM +0200, Eric Dumazet wrote:
> Le jeudi 21 octobre 2010 à 09:45 +0200, Hans Schillstrom a écrit :
> > I do have this (and some debuging)
> > __rcu_read_lock()
> > => 0xffffffff8108bcf3 <+0>:	push   %rbp
> >    0xffffffff8108bcf4 <+1>:	mov    %rsp,%rbp
> >    0xffffffff8108bcf7 <+4>:	nopl   0x0(%rax,%rax,1)
> >    0xffffffff8108bcfc <+9>:	mov    %gs:0xb540,%rax
> >    0xffffffff8108bd05 <+18>:	mov    0x108(%rax),%edx
> >    0xffffffff8108bd0b <+24>:	inc    %edx
> >    0xffffffff8108bd0d <+26>:	mov    %edx,0x108(%rax)
> >    0xffffffff8108bd13 <+32>:	leaveq
> >    0xffffffff8108bd14 <+33>:	retq
> > 
> > which is not that many, actually imprerssing few instructions :-)
> 
> nopl   0x0(%rax,%rax,1) is a filler because of extra instrumentation in
> your kernel.
> 
> Maybe you could find out why your compiler dont use
> 
> 	incl 0x108(%rax)
> 
> instead of
> 
> 	mov    0x108(%rax),%edx
> 	inc    %edx
> 	mov    %edx,0x108(%rax)
> 
> 
> So rcu_read_lock() is really _two_ instructions.
> 
> I agree with Paul with the "few" qualification... :-)

Thank you!  ;-)

And the reason for the three-instruction shuffle is that Hans's kernel
does not yet contain commit 80dcf60e.  That commit is on its way upstream,
and will hopefully make the current merge window.

							Thanx, Paul
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Oct. 21, 2010, 3:24 p.m. UTC | #14
Le jeudi 21 octobre 2010 à 08:16 -0700, Paul E. McKenney a écrit :

> Here is what it looks like in my -rcu tree:
> 
> void __rcu_read_lock(void)
> {
> 	current->rcu_read_lock_nesting++;
> 	barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
> }
> 
> So yes, I finally did convince myself that the ACCESS_ONCE was not
> needed.  ;-)
> 
> This is not yet in mainline, but Ingo sent the series containing this
> commit (80dcf60e) to Linus earlier today, so there is hope!
> 

Excellent, thanks for taking time to clarify this Paul.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index b17f863..b40a0fb 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -288,6 +288,7 @@  struct iphdr;
 struct ip_vs_conn;
 struct ip_vs_app;
 struct sk_buff;
+struct ip_vs_proto_data;

 struct ip_vs_protocol {
 	struct ip_vs_protocol	*next;
@@ -302,6 +303,10 @@  struct ip_vs_protocol {

 	void (*exit)(struct ip_vs_protocol *pp);

+	void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
+	void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
 	int (*conn_schedule)(int af, struct sk_buff *skb,
 			     struct ip_vs_protocol *pp,
 			     int *verdict, struct ip_vs_conn **cpp);
@@ -337,11 +342,11 @@  struct ip_vs_protocol {
 				const struct sk_buff *skb,
 				struct ip_vs_protocol *pp);

-	int (*register_app)(struct ip_vs_app *inc);
+	int (*register_app)(struct net *net, struct ip_vs_app *inc);

-	void (*unregister_app)(struct ip_vs_app *inc);
+	void (*unregister_app)(struct net *net, struct ip_vs_app *inc);

-	int (*app_conn_bind)(struct ip_vs_conn *cp);
+	int (*app_conn_bind)(struct net *net, struct ip_vs_conn *cp);

 	void (*debug_packet)(struct ip_vs_protocol *pp,
 			     const struct sk_buff *skb,
@@ -350,10 +355,24 @@  struct ip_vs_protocol {

 	void (*timeout_change)(struct ip_vs_protocol *pp, int flags);

-	int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
+	/*
+	 int (*set_state_timeout)(struct ip_vs_protocol *pp,
+	                          char *sname,
+	                          int to);    Not used  -Hans S */
+};
+/*
+ * protocol data per netns
+ */
+struct ip_vs_proto_data {
+	struct ip_vs_proto_data	*next;
+	struct ip_vs_protocol   *pp;
+	int			*timeout_table;	/* protocol timeout table */
+	atomic_t		appcnt;		/* counter of proto app incs. */
 };

-extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_protocol   * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_proto_data * ip_vs_proto_data_get(struct net *net,
+						      unsigned short proto);

 /*
  *	IP_VS structure allocated for each dynamically scheduled connection
@@ -398,6 +417,8 @@  struct ip_vs_conn {
 	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
 			   struct ip_vs_protocol *pp);

+	struct net		*net;		/* netns ptr needed in timer */
+
 	/* Note: we can group the following members into a structure,
 	   in order to save more space, and the following members are
 	   only used in VS/NAT anyway */
@@ -628,29 +649,32 @@  enum {
 	IP_VS_DIR_LAST,
 };

-extern struct ip_vs_conn *ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+extern struct ip_vs_conn *
+ip_vs_conn_in_get(struct net *net, int af, int protocol,
+		  const union nf_inet_addr *s_addr, __be16 s_port,
+		  const union nf_inet_addr *d_addr, __be16 d_port);

-extern struct ip_vs_conn *ip_vs_ct_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+extern struct ip_vs_conn *
+ip_vs_ct_in_get(struct net *net, int af, int protocol,
+		const union nf_inet_addr *s_addr, __be16 s_port,
+		const union nf_inet_addr *d_addr, __be16 d_port);

-struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-					    struct ip_vs_protocol *pp,
-					    const struct ip_vs_iphdr *iph,
-					    unsigned int proto_off,
-					    int inverse);
+struct ip_vs_conn *
+ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
+			struct ip_vs_protocol *pp,
+			const struct ip_vs_iphdr *iph,
+			unsigned int proto_off, int inverse);

-extern struct ip_vs_conn *ip_vs_conn_out_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+extern struct ip_vs_conn *
+ip_vs_conn_out_get(struct net *net,int af, int protocol,
+		   const union nf_inet_addr *s_addr, __be16 s_port,
+		   const union nf_inet_addr *d_addr, __be16 d_port);

-struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-					     struct ip_vs_protocol *pp,
-					     const struct ip_vs_iphdr *iph,
-					     unsigned int proto_off,
-					     int inverse);
+struct ip_vs_conn *
+ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
+			 struct ip_vs_protocol *pp,
+			 const struct ip_vs_iphdr *iph,
+			 unsigned int proto_off, int inverse);

 /* put back the conn without restarting its timer */
 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
@@ -658,20 +682,22 @@  static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
 	atomic_dec(&cp->refcnt);
 }
 extern void ip_vs_conn_put(struct ip_vs_conn *cp);
-extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
+extern void
+ip_vs_conn_fill_cport(struct net *net, struct ip_vs_conn *cp, __be16 cport);

 extern struct ip_vs_conn *
-ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+ip_vs_conn_new(struct net *net, int af, int proto,
+	       const union nf_inet_addr *caddr, __be16 cport,
 	       const union nf_inet_addr *vaddr, __be16 vport,
-	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-	       struct ip_vs_dest *dest);
+	       const union nf_inet_addr *daddr, __be16 dport,
+	       unsigned flags, struct ip_vs_dest *dest);
 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);

 extern const char * ip_vs_state_name(__u16 proto, int state);

-extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
-extern int ip_vs_check_template(struct ip_vs_conn *ct);
-extern void ip_vs_random_dropentry(void);
+extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_check_template(struct net *net, struct ip_vs_conn *ct);
+extern void ip_vs_random_dropentry(struct net *net);
 extern int ip_vs_conn_init(void);
 extern void ip_vs_conn_cleanup(void);

@@ -741,12 +767,15 @@  ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
  *      (from ip_vs_app.c)
  */
 #define IP_VS_APP_MAX_PORTS  8
-extern int register_ip_vs_app(struct ip_vs_app *app);
-extern void unregister_ip_vs_app(struct ip_vs_app *app);
-extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern int ip_vs_bind_app(struct net *net, struct ip_vs_conn *cp,
+		          struct ip_vs_protocol *pp);
 extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
-extern int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
+extern int register_ip_vs_app_inc(struct net *net,
+				  struct ip_vs_app *app,
+				  __u16 proto,
+				  __u16 port);
 extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
 extern void ip_vs_app_inc_put(struct ip_vs_app *inc);

@@ -762,7 +791,7 @@  extern void ip_vs_app_cleanup(void);
 extern int ip_vs_protocol_init(void);
 extern void ip_vs_protocol_cleanup(void);
 extern void ip_vs_protocol_timeout_change(int flags);
-extern int *ip_vs_create_timeout_table(int *table, int size);
+extern int *ip_vs_create_timeout_table(const int *table, int size);
 extern int
 ip_vs_set_state_timeout(int *table, int num, const char *const *names,
 			const char *name, int to);
@@ -806,7 +835,7 @@  extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];

 extern struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 		  const union nf_inet_addr *vaddr, __be16 vport);

 static inline void ip_vs_service_put(struct ip_vs_service *svc)
@@ -815,7 +844,7 @@  static inline void ip_vs_service_put(struct ip_vs_service *svc)
 }

 extern struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
 			  const union nf_inet_addr *daddr, __be16 dport);

 extern int ip_vs_use_count_inc(void);
@@ -823,23 +852,22 @@  extern void ip_vs_use_count_dec(void);
 extern int ip_vs_control_init(void);
 extern void ip_vs_control_cleanup(void);
 extern struct ip_vs_dest *
-ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
+ip_vs_find_dest(struct net *net, int af,
+		const union nf_inet_addr *daddr, __be16 dport,
 		const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
-extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
-
+extern struct ip_vs_dest *ip_vs_try_bind_dest(struct net *net,
+		                              struct ip_vs_conn *cp);

 /*
  *      IPVS sync daemon data and function prototypes
  *      (from ip_vs_sync.c)
  */
-extern volatile int ip_vs_sync_state;
-extern volatile int ip_vs_master_syncid;
-extern volatile int ip_vs_backup_syncid;
-extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
-extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
+		             __u8 syncid);
+extern int stop_sync_thread(struct net *net, int state);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_sync_init(void);
+extern void ip_vs_sync_cleanup(void);


 /*
@@ -847,8 +875,8 @@  extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
  */
 extern int ip_vs_estimator_init(void);
 extern void ip_vs_estimator_cleanup(void);
-extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
-extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
+extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);

 /*
@@ -864,8 +892,8 @@  extern int ip_vs_tunnel_xmit
 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 extern int ip_vs_dr_xmit
 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
-extern int ip_vs_icmp_xmit
-(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
+extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		           struct ip_vs_protocol *pp, int offset);
 extern void ip_vs_dst_reset(struct ip_vs_dest *dest);

 #ifdef CONFIG_IP_VS_IPV6
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index bd10a79..b59cdc5 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -15,6 +15,7 @@ 
 #include <net/netns/ipv4.h>
 #include <net/netns/ipv6.h>
 #include <net/netns/dccp.h>
+#include <net/netns/ip_vs.h>
 #include <net/netns/x_tables.h>
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netns/conntrack.h>
@@ -91,6 +92,7 @@  struct net {
 	struct sk_buff_head	wext_nlevents;
 #endif
 	struct net_generic	*gen;
+	struct netns_ipvs       *ipvs;
 };


diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
new file mode 100644
index 0000000..540ac90
--- /dev/null
+++ b/include/net/netns/ip_vs.h
@@ -0,0 +1,112 @@ 
+#ifndef __NETNS_IP_VS_H_
+#define __NETNS_IP_VS_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/list_nulls.h>
+#include <linux/ip_vs.h>
+#include <asm/atomic.h>
+#include <linux/in.h>
+
+struct ip_vs_stats;
+struct ip_vs_sync_buff;
+struct ctl_table_header;
+
+struct netns_ipvs {
+	int			inc;		/* incarnation */
+	/* ip_vs_app */
+	struct list_head 	app_list;
+	struct mutex		app_mutex;
+	struct lock_class_key 	app_key;	/* Grrr, for mutex debuging */
+	/* ip_vs_conn */
+	unsigned char           conn_cname[20];	/* Connection hash name */
+	struct list_head 	*conn_tab;	/* Connection hash: for in and output packets */
+	struct kmem_cache 	*conn_cachep;	/* SLAB cache for IPVS connections */
+	atomic_t 		conn_count;	/* counter for current IPVS connections */
+	atomic_t 		conn_no_cport_cnt; /* counter for no client port connections */
+	unsigned int 		conn_rnd;	/* random value for IPVS connection hash */
+	/* ip_vs_ctl */
+	struct ip_vs_stats 	*ctl_stats;	/* Statistics & estimator */
+	/*	Hash table: for virtual service lookups */
+	#define IP_VS_SVC_TAB_BITS 8
+	#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
+	#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
+	/* the service table hashed by <protocol, addr, port> */
+	struct list_head 	ctl_svc_table[IP_VS_SVC_TAB_SIZE];
+	/* the service table hashed by fwmark */
+	struct list_head 	ctl_fwm_table[IP_VS_SVC_TAB_SIZE];
+	/* Hash table: for real service lookups */
+	#define IP_VS_RTAB_BITS 4
+	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+	struct list_head 	ctl_rtable[IP_VS_RTAB_SIZE]; /* Hash table: for real service  */
+	struct list_head	ctl_dest_trash;	    /* Trash for destinations */
+	atomic_t 		ctl_ftpsvc_counter;
+	atomic_t 		ctl_nullsvc_counter;
+	/* sys-ctl struct */
+	struct ctl_table_header	*sysctl_hdr;
+	struct ctl_table	*sysctl_tbl;
+	/* sysctl variables */
+	int 			sysctl_amemthresh;
+	int 			sysctl_am_droprate;
+	int 			sysctl_drop_entry;
+	int 			sysctl_drop_packet;
+	int 			sysctl_secure_tcp;
+	int 			sysctl_cache_bypass;
+	int 			sysctl_expire_nodest_conn;
+	int 			sysctl_expire_quiescent_template;
+	int 			sysctl_sync_threshold[2];
+	int 			sysctl_nat_icmp_send;
+	/* ip_vs_proto */
+	#define IP_VS_PROTO_TAB_SIZE		32	/* must be power of 2 */
+	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+	/* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	#define	TCP_APP_TAB_BITS	4
+	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
+	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
+	struct list_head 	tcp_apps[TCP_APP_TAB_SIZE];
+	spinlock_t		tcp_app_lock;
+#endif
+	/* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	#define	UDP_APP_TAB_BITS	4
+	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
+	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
+	struct list_head 	udp_apps[UDP_APP_TAB_SIZE];
+	spinlock_t		udp_app_lock;
+#endif
+	/* ip_vs_proto_sctp */
+	#define SCTP_APP_TAB_BITS        4
+	#define SCTP_APP_TAB_SIZE        (1 << SCTP_APP_TAB_BITS)
+	#define SCTP_APP_TAB_MASK        (SCTP_APP_TAB_SIZE - 1)
+	/* Hash table for SCTP application incarnations	 */
+	struct list_head 	sctp_apps[SCTP_APP_TAB_SIZE];
+	spinlock_t		sctp_app_lock;
+
+	/* ip_vs_est */
+	struct list_head 	est_list;	/* estimator list */
+	spinlock_t		est_lock;
+	/* ip_vs_sync */
+	struct list_head	sync_queue;
+	spinlock_t		sync_lock;
+	struct ip_vs_sync_buff  *sync_buff;
+	spinlock_t		sync_buff_lock;
+	struct sockaddr_in 	sync_mcast_addr;
+	/* sync daemon tasks */
+	struct task_struct 	*sync_master_thread;
+	struct task_struct 	*sync_backup_thread;
+	/* the maximum length of sync (sending/receiving) message */
+	int 			sync_send_mesg_maxlen;
+	int 			sync_recv_mesg_maxlen;
+
+	volatile int 		sync_state;
+	volatile int 		master_syncid;
+	volatile int 		backup_syncid;
+	/* multicast interface name */
+	char 			master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+	char 			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+};
+
+#endif /*__NETNS_IP_VS_H_*/