diff mbox series

[net-next,v2] ipv4: Support multipath hashing on inner IP pkts for GRE tunnel

Message ID 20190613170330.8783-1-ssuryaextr@gmail.com
State Not Applicable
Delegated to: David Miller
Headers show
Series [net-next,v2] ipv4: Support multipath hashing on inner IP pkts for GRE tunnel | expand

Commit Message

Stephen Suryaputra June 13, 2019, 5:03 p.m. UTC
Multipath hash policy value of 0 isn't distributing since the outer IP
dest and src aren't varied eventhough the inner ones are. Since the flow
is on the inner ones in the case of tunneled traffic, hashing on them is
desired.

This is done mainly for IP over GRE, hence only tested for that. But
anything else supported by flow dissection should work.

v2: Use skb_flow_dissect_flow_keys() directly so that other tunneling
    can be supported through flow dissection (per Nikolay Aleksandrov).
Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
---
 Documentation/networking/ip-sysctl.txt |  1 +
 net/ipv4/route.c                       | 20 ++++++++++++++++++++
 net/ipv4/sysctl_net_ipv4.c             |  2 +-
 3 files changed, 22 insertions(+), 1 deletion(-)

Comments

Nikolay Aleksandrov June 13, 2019, 5:10 p.m. UTC | #1
On 13/06/2019 20:03, Stephen Suryaputra wrote:
> Multipath hash policy value of 0 isn't distributing since the outer IP
> dest and src aren't varied eventhough the inner ones are. Since the flow
> is on the inner ones in the case of tunneled traffic, hashing on them is
> desired.
> 
> This is done mainly for IP over GRE, hence only tested for that. But
> anything else supported by flow dissection should work.
> 
> v2: Use skb_flow_dissect_flow_keys() directly so that other tunneling
>     can be supported through flow dissection (per Nikolay Aleksandrov).
> Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
> ---
>  Documentation/networking/ip-sysctl.txt |  1 +
>  net/ipv4/route.c                       | 20 ++++++++++++++++++++
>  net/ipv4/sysctl_net_ipv4.c             |  2 +-
>  3 files changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
> index 5eedc6941ce5..2f3bc910895a 100644
> --- a/Documentation/networking/ip-sysctl.txt
> +++ b/Documentation/networking/ip-sysctl.txt
> @@ -80,6 +80,7 @@ fib_multipath_hash_policy - INTEGER
>  	Possible values:
>  	0 - Layer 3
>  	1 - Layer 4
> +	2 - Inner Layer 3 for tunneled IP packets.

Hmm, but you're using the ports below, so it's not L3 ?

>  
>  fib_sync_mem - UNSIGNED INTEGER
>  	Amount of dirty memory from fib entries that can be backlogged before
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 14c7fdacaa72..c3e03bce0a3a 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -1872,6 +1872,26 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
>  			hash_keys.basic.ip_proto = fl4->flowi4_proto;
>  		}
>  		break;
> +	case 2:
> +		memset(&hash_keys, 0, sizeof(hash_keys));
> +		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> +		/* skb is currently provided only when forwarding */
> +		if (skb) {
> +			struct flow_keys keys;
> +
> +			skb_flow_dissect_flow_keys(skb, &keys, 0);
> +
> +			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
> +			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
> +			hash_keys.ports.src = keys.ports.src;
> +			hash_keys.ports.dst = keys.ports.dst;
> +			hash_keys.basic.ip_proto = keys.basic.ip_proto;

This is inconsistent with the code below, you're using ports when we have skb and
using only addresses when we don't (host traffic). So either make it L3-only and
use addresses or make sure you're using the correct ports as well.

> +		} else {
> +			/* Same as case 0 */
> +			hash_keys.addrs.v4addrs.src = fl4->saddr;
> +			hash_keys.addrs.v4addrs.dst = fl4->daddr;
> +		}
> +		break;
>  	}
>  	mhash = flow_hash_from_keys(&hash_keys);
>  
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 2316c08e9591..e1efc2e62d21 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -960,7 +960,7 @@ static struct ctl_table ipv4_net_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_fib_multipath_hash_policy,
>  		.extra1		= &zero,
> -		.extra2		= &one,
> +		.extra2		= &two,
>  	},
>  #endif
>  	{
>
diff mbox series

Patch

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 5eedc6941ce5..2f3bc910895a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -80,6 +80,7 @@  fib_multipath_hash_policy - INTEGER
 	Possible values:
 	0 - Layer 3
 	1 - Layer 4
+	2 - Inner Layer 3 for tunneled IP packets.
 
 fib_sync_mem - UNSIGNED INTEGER
 	Amount of dirty memory from fib entries that can be backlogged before
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 14c7fdacaa72..c3e03bce0a3a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1872,6 +1872,26 @@  int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 			hash_keys.basic.ip_proto = fl4->flowi4_proto;
 		}
 		break;
+	case 2:
+		memset(&hash_keys, 0, sizeof(hash_keys));
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+		/* skb is currently provided only when forwarding */
+		if (skb) {
+			struct flow_keys keys;
+
+			skb_flow_dissect_flow_keys(skb, &keys, 0);
+
+			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+			hash_keys.ports.src = keys.ports.src;
+			hash_keys.ports.dst = keys.ports.dst;
+			hash_keys.basic.ip_proto = keys.basic.ip_proto;
+		} else {
+			/* Same as case 0 */
+			hash_keys.addrs.v4addrs.src = fl4->saddr;
+			hash_keys.addrs.v4addrs.dst = fl4->daddr;
+		}
+		break;
 	}
 	mhash = flow_hash_from_keys(&hash_keys);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2316c08e9591..e1efc2e62d21 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -960,7 +960,7 @@  static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_fib_multipath_hash_policy,
 		.extra1		= &zero,
-		.extra2		= &one,
+		.extra2		= &two,
 	},
 #endif
 	{