diff mbox

[net-next] store complete hash type information in socket buffer...

Message ID 1455525128-27795-1-git-send-email-paul.durrant@citrix.com
State Deferred, archived
Delegated to: David Miller
Headers show

Commit Message

Paul Durrant Feb. 15, 2016, 8:32 a.m. UTC
...rather than a boolean merely indicating a canonical L4 hash.

skb_set_hash() takes a hash type (from enum pkt_hash_types) as an
argument but information is lost since only a single bit in the skb
stores whether that hash type is PKT_HASH_TYPE_L4 or not. By using
two bits it's possible to store the complete hash type information.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
---
 drivers/net/bonding/bond_main.c |  2 +-
 include/linux/skbuff.h          | 53 ++++++++++++++++++++++++++++-------------
 include/net/flow_dissector.h    |  5 ++++
 include/net/sock.h              |  2 +-
 include/trace/events/net.h      |  2 +-
 net/core/flow_dissector.c       | 27 ++++++++++++++++-----
 6 files changed, 65 insertions(+), 26 deletions(-)

Comments

Paul Durrant Feb. 17, 2016, 9:01 a.m. UTC | #1
Ping?

> -----Original Message-----
> From: Paul Durrant [mailto:paul.durrant@citrix.com]
> Sent: 15 February 2016 08:32
> To: netdev@vger.kernel.org
> Cc: Paul Durrant; David S. Miller; Jay Vosburgh; Veaceslav Falico; Andy
> Gospodarek
> Subject: [PATCH net-next] store complete hash type information in socket
> buffer...
> 
> ...rather than a boolean merely indicating a canonical L4 hash.
> 
> skb_set_hash() takes a hash type (from enum pkt_hash_types) as an
> argument but information is lost since only a single bit in the skb
> stores whether that hash type is PKT_HASH_TYPE_L4 or not. By using
> two bits it's possible to store the complete hash type information.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Jay Vosburgh <j.vosburgh@gmail.com>
> Cc: Veaceslav Falico <vfalico@gmail.com>
> Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
> ---
>  drivers/net/bonding/bond_main.c |  2 +-
>  include/linux/skbuff.h          | 53 ++++++++++++++++++++++++++++----------
> ---
>  include/net/flow_dissector.h    |  5 ++++
>  include/net/sock.h              |  2 +-
>  include/trace/events/net.h      |  2 +-
>  net/core/flow_dissector.c       | 27 ++++++++++++++++-----
>  6 files changed, 65 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c
> b/drivers/net/bonding/bond_main.c
> index 45bdd87..ad0ee00 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -3173,7 +3173,7 @@ u32 bond_xmit_hash(struct bonding *bond, struct
> sk_buff *skb)
>  	u32 hash;
> 
>  	if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
> -	    skb->l4_hash)
> +	    skb_has_l4_hash(skb))
>  		return skb->hash;
> 
>  	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 3920675..b991ca2 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -595,8 +595,7 @@ static inline bool skb_mstamp_after(const struct
> skb_mstamp *t1,
>   *	@xmit_more: More SKBs are pending for this queue
>   *	@ndisc_nodetype: router type (from link layer)
>   *	@ooo_okay: allow the mapping of a socket to a queue to be changed
> - *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
> - *		ports.
> + *	@hash_type: indicates type of hash (see enum pkt_hash_types
> below)
>   *	@sw_hash: indicates hash was computed in software stack
>   *	@wifi_acked_valid: wifi_acked was set
>   *	@wifi_acked: whether frame was acked on wifi or not
> @@ -701,10 +700,10 @@ struct sk_buff {
>  	__u8			nf_trace:1;
>  	__u8			ip_summed:2;
>  	__u8			ooo_okay:1;
> -	__u8			l4_hash:1;
>  	__u8			sw_hash:1;
>  	__u8			wifi_acked_valid:1;
>  	__u8			wifi_acked:1;
> +	/* 1 bit hole */
> 
>  	__u8			no_fcs:1;
>  	/* Indicates the inner headers are valid in the skbuff. */
> @@ -721,7 +720,8 @@ struct sk_buff {
>  	__u8			ipvs_property:1;
>  	__u8			inner_protocol_type:1;
>  	__u8			remcsum_offload:1;
> -	/* 3 or 5 bit hole */
> +	__u8			hash_type:2;
> +	/* 1 or 3 bit hole */
> 
>  #ifdef CONFIG_NET_SCHED
>  	__u16			tc_index;	/* traffic control index */
> @@ -1030,19 +1030,35 @@ static inline void skb_clear_hash(struct sk_buff
> *skb)
>  {
>  	skb->hash = 0;
>  	skb->sw_hash = 0;
> -	skb->l4_hash = 0;
> +	skb->hash_type = 0;
> +}
> +
> +static inline enum pkt_hash_types skb_hash_type(struct sk_buff *skb)
> +{
> +	return skb->hash_type;
> +}
> +
> +static inline bool skb_has_l4_hash(struct sk_buff *skb)
> +{
> +	return skb_hash_type(skb) == PKT_HASH_TYPE_L4;
> +}
> +
> +static inline bool skb_has_sw_hash(struct sk_buff *skb)
> +{
> +	return !!skb->sw_hash;
>  }
> 
>  static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
>  {
> -	if (!skb->l4_hash)
> +	if (!skb_has_l4_hash(skb))
>  		skb_clear_hash(skb);
>  }
> 
>  static inline void
> -__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4)
> +__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw,
> +	       enum pkt_hash_types type)
>  {
> -	skb->l4_hash = is_l4;
> +	skb->hash_type = type;
>  	skb->sw_hash = is_sw;
>  	skb->hash = hash;
>  }
> @@ -1051,13 +1067,13 @@ static inline void
>  skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
>  {
>  	/* Used by drivers to set hash from HW */
> -	__skb_set_hash(skb, hash, false, type == PKT_HASH_TYPE_L4);
> +	__skb_set_hash(skb, hash, false, type);
>  }
> 
>  static inline void
> -__skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
> +__skb_set_sw_hash(struct sk_buff *skb, __u32 hash, enum
> pkt_hash_types type)
>  {
> -	__skb_set_hash(skb, hash, true, is_l4);
> +	__skb_set_hash(skb, hash, true, type);
>  }
> 
>  void __skb_get_hash(struct sk_buff *skb);
> @@ -1110,9 +1126,10 @@ static inline bool
> skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
>  				  data, proto, nhoff, hlen, flags);
>  }
> 
> +
>  static inline __u32 skb_get_hash(struct sk_buff *skb)
>  {
> -	if (!skb->l4_hash && !skb->sw_hash)
> +	if (!skb_has_l4_hash(skb) && !skb_has_sw_hash(skb))
>  		__skb_get_hash(skb);
> 
>  	return skb->hash;
> @@ -1122,11 +1139,12 @@ __u32 __skb_get_hash_flowi6(struct sk_buff
> *skb, const struct flowi6 *fl6);
> 
>  static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct
> flowi6 *fl6)
>  {
> -	if (!skb->l4_hash && !skb->sw_hash) {
> +	if (!skb_has_l4_hash(skb) && !skb_has_sw_hash(skb)) {
>  		struct flow_keys keys;
>  		__u32 hash = __get_hash_from_flowi6(fl6, &keys);
> 
> -		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
> +		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys) ?
> +				  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
>  	}
> 
>  	return skb->hash;
> @@ -1136,11 +1154,12 @@ __u32 __skb_get_hash_flowi4(struct sk_buff
> *skb, const struct flowi4 *fl);
> 
>  static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct
> flowi4 *fl4)
>  {
> -	if (!skb->l4_hash && !skb->sw_hash) {
> +	if (!skb_has_l4_hash(skb) && !skb_has_sw_hash(skb)) {
>  		struct flow_keys keys;
>  		__u32 hash = __get_hash_from_flowi4(fl4, &keys);
> 
> -		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
> +		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys) ?
> +				  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
>  	}
> 
>  	return skb->hash;
> @@ -1157,7 +1176,7 @@ static inline void skb_copy_hash(struct sk_buff *to,
> const struct sk_buff *from)
>  {
>  	to->hash = from->hash;
>  	to->sw_hash = from->sw_hash;
> -	to->l4_hash = from->l4_hash;
> +	to->hash_type = from->hash_type;
>  };
> 
>  static inline void skb_sender_cpu_clear(struct sk_buff *skb)
> diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> index 8c8548c..418b8c5 100644
> --- a/include/net/flow_dissector.h
> +++ b/include/net/flow_dissector.h
> @@ -182,6 +182,11 @@ static inline bool flow_keys_have_l4(struct
> flow_keys *keys)
>  	return (keys->ports.ports || keys->tags.flow_label);
>  }
> 
> +static inline bool flow_keys_have_l3(struct flow_keys *keys)
> +{
> +	return !!keys->control.addr_type;
> +}
> +
>  u32 flow_hash_from_keys(struct flow_keys *keys);
> 
>  #endif
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 255d3e0..21b8cdc 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1828,7 +1828,7 @@ static inline void sock_poll_wait(struct file *filp,
>  static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
>  {
>  	if (sk->sk_txhash) {
> -		skb->l4_hash = 1;
> +		skb->hash_type = PKT_HASH_TYPE_L4;
>  		skb->hash = sk->sk_txhash;
>  	}
>  }
> diff --git a/include/trace/events/net.h b/include/trace/events/net.h
> index 49cc7c3..25e7979 100644
> --- a/include/trace/events/net.h
> +++ b/include/trace/events/net.h
> @@ -180,7 +180,7 @@
> DECLARE_EVENT_CLASS(net_dev_rx_verbose_template,
>  		__entry->protocol = ntohs(skb->protocol);
>  		__entry->ip_summed = skb->ip_summed;
>  		__entry->hash = skb->hash;
> -		__entry->l4_hash = skb->l4_hash;
> +		__entry->l4_hash = skb->hash_type == PKT_HASH_TYPE_L4;
>  		__entry->len = skb->len;
>  		__entry->data_len = skb->data_len;
>  		__entry->truesize = skb->truesize;
> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> index d79699c..956208b 100644
> --- a/net/core/flow_dissector.c
> +++ b/net/core/flow_dissector.c
> @@ -658,17 +658,30 @@ EXPORT_SYMBOL(make_flow_keys_digest);
>   *
>   * This function calculates a flow hash based on src/dst addresses
>   * and src/dst port numbers.  Sets hash in skb to non-zero hash value
> - * on success, zero indicates no valid hash.  Also, sets l4_hash in skb
> - * if hash is a canonical 4-tuple hash over transport ports.
> + * on success, zero indicates no valid hash in which case the hash type
> + * is set to NONE. If the hash is a canonical 4-tuple hash over transport
> + * ports then type is set to L4. If the hash did not include transport
> + * then type is set to L3, otherwise it is assumed to be L2 only.
>   */
>  void __skb_get_hash(struct sk_buff *skb)
>  {
>  	struct flow_keys keys;
> +	u32 hash;
> +	enum pkt_hash_types type;
> 
>  	__flow_hash_secret_init();
> 
> -	__skb_set_sw_hash(skb, ___skb_get_hash(skb, &keys, hashrnd),
> -			  flow_keys_have_l4(&keys));
> +	hash = ___skb_get_hash(skb, &keys, hashrnd);
> +	if (hash == 0)
> +		type = PKT_HASH_TYPE_NONE;
> +	else if (flow_keys_have_l4(&keys))
> +		type = PKT_HASH_TYPE_L4;
> +	else if (flow_keys_have_l3(&keys))
> +		type = PKT_HASH_TYPE_L3;
> +	else
> +		type = PKT_HASH_TYPE_L2;
> +
> +	__skb_set_sw_hash(skb, hash, type);
>  }
>  EXPORT_SYMBOL(__skb_get_hash);
> 
> @@ -698,7 +711,8 @@ __u32 __skb_get_hash_flowi6(struct sk_buff *skb,
> const struct flowi6 *fl6)
>  	keys.basic.ip_proto = fl6->flowi6_proto;
> 
>  	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
> -			  flow_keys_have_l4(&keys));
> +			  flow_keys_have_l4(&keys) ?
> +			  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
> 
>  	return skb->hash;
>  }
> @@ -719,7 +733,8 @@ __u32 __skb_get_hash_flowi4(struct sk_buff *skb,
> const struct flowi4 *fl4)
>  	keys.basic.ip_proto = fl4->flowi4_proto;
> 
>  	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
> -			  flow_keys_have_l4(&keys));
> +			  flow_keys_have_l4(&keys) ?
> +			  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
> 
>  	return skb->hash;
>  }
> --
> 2.1.4
David Miller Feb. 17, 2016, 8:44 p.m. UTC | #2
From: Paul Durrant <paul.durrant@citrix.com>
Date: Mon, 15 Feb 2016 08:32:08 +0000

> ...rather than a boolean merely indicating a canonical L4 hash.
> 
> skb_set_hash() takes a hash type (from enum pkt_hash_types) as an
> argument but information is lost since only a single bit in the skb
> stores whether that hash type is PKT_HASH_TYPE_L4 or not. By using
> two bits it's possible to store the complete hash type information.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>

Tom and/or Eric, please have a look at this.

> ---
>  drivers/net/bonding/bond_main.c |  2 +-
>  include/linux/skbuff.h          | 53 ++++++++++++++++++++++++++++-------------
>  include/net/flow_dissector.h    |  5 ++++
>  include/net/sock.h              |  2 +-
>  include/trace/events/net.h      |  2 +-
>  net/core/flow_dissector.c       | 27 ++++++++++++++++-----
>  6 files changed, 65 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 45bdd87..ad0ee00 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -3173,7 +3173,7 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
>  	u32 hash;
>  
>  	if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
> -	    skb->l4_hash)
> +	    skb_has_l4_hash(skb))
>  		return skb->hash;
>  
>  	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 3920675..b991ca2 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -595,8 +595,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
>   *	@xmit_more: More SKBs are pending for this queue
>   *	@ndisc_nodetype: router type (from link layer)
>   *	@ooo_okay: allow the mapping of a socket to a queue to be changed
> - *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
> - *		ports.
> + *	@hash_type: indicates type of hash (see enum pkt_hash_types below)
>   *	@sw_hash: indicates hash was computed in software stack
>   *	@wifi_acked_valid: wifi_acked was set
>   *	@wifi_acked: whether frame was acked on wifi or not
> @@ -701,10 +700,10 @@ struct sk_buff {
>  	__u8			nf_trace:1;
>  	__u8			ip_summed:2;
>  	__u8			ooo_okay:1;
> -	__u8			l4_hash:1;
>  	__u8			sw_hash:1;
>  	__u8			wifi_acked_valid:1;
>  	__u8			wifi_acked:1;
> +	/* 1 bit hole */
>  
>  	__u8			no_fcs:1;
>  	/* Indicates the inner headers are valid in the skbuff. */
> @@ -721,7 +720,8 @@ struct sk_buff {
>  	__u8			ipvs_property:1;
>  	__u8			inner_protocol_type:1;
>  	__u8			remcsum_offload:1;
> -	/* 3 or 5 bit hole */
> +	__u8			hash_type:2;
> +	/* 1 or 3 bit hole */
>  
>  #ifdef CONFIG_NET_SCHED
>  	__u16			tc_index;	/* traffic control index */
> @@ -1030,19 +1030,35 @@ static inline void skb_clear_hash(struct sk_buff *skb)
>  {
>  	skb->hash = 0;
>  	skb->sw_hash = 0;
> -	skb->l4_hash = 0;
> +	skb->hash_type = 0;
> +}
> +
> +static inline enum pkt_hash_types skb_hash_type(struct sk_buff *skb)
> +{
> +	return skb->hash_type;
> +}
> +
> +static inline bool skb_has_l4_hash(struct sk_buff *skb)
> +{
> +	return skb_hash_type(skb) == PKT_HASH_TYPE_L4;
> +}
> +
> +static inline bool skb_has_sw_hash(struct sk_buff *skb)
> +{
> +	return !!skb->sw_hash;
>  }
>  
>  static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
>  {
> -	if (!skb->l4_hash)
> +	if (!skb_has_l4_hash(skb))
>  		skb_clear_hash(skb);
>  }
>  
>  static inline void
> -__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4)
> +__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw,
> +	       enum pkt_hash_types type)
>  {
> -	skb->l4_hash = is_l4;
> +	skb->hash_type = type;
>  	skb->sw_hash = is_sw;
>  	skb->hash = hash;
>  }
> @@ -1051,13 +1067,13 @@ static inline void
>  skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
>  {
>  	/* Used by drivers to set hash from HW */
> -	__skb_set_hash(skb, hash, false, type == PKT_HASH_TYPE_L4);
> +	__skb_set_hash(skb, hash, false, type);
>  }
>  
>  static inline void
> -__skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
> +__skb_set_sw_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
>  {
> -	__skb_set_hash(skb, hash, true, is_l4);
> +	__skb_set_hash(skb, hash, true, type);
>  }
>  
>  void __skb_get_hash(struct sk_buff *skb);
> @@ -1110,9 +1126,10 @@ static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
>  				  data, proto, nhoff, hlen, flags);
>  }
>  
> +
>  static inline __u32 skb_get_hash(struct sk_buff *skb)
>  {
> -	if (!skb->l4_hash && !skb->sw_hash)
> +	if (!skb_has_l4_hash(skb) && !skb_has_sw_hash(skb))
>  		__skb_get_hash(skb);
>  
>  	return skb->hash;
> @@ -1122,11 +1139,12 @@ __u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6);
>  
>  static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
>  {
> -	if (!skb->l4_hash && !skb->sw_hash) {
> +	if (!skb_has_l4_hash(skb) && !skb_has_sw_hash(skb)) {
>  		struct flow_keys keys;
>  		__u32 hash = __get_hash_from_flowi6(fl6, &keys);
>  
> -		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
> +		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys) ?
> +				  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
>  	}
>  
>  	return skb->hash;
> @@ -1136,11 +1154,12 @@ __u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl);
>  
>  static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
>  {
> -	if (!skb->l4_hash && !skb->sw_hash) {
> +	if (!skb_has_l4_hash(skb) && !skb_has_sw_hash(skb)) {
>  		struct flow_keys keys;
>  		__u32 hash = __get_hash_from_flowi4(fl4, &keys);
>  
> -		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
> +		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys) ?
> +				  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
>  	}
>  
>  	return skb->hash;
> @@ -1157,7 +1176,7 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
>  {
>  	to->hash = from->hash;
>  	to->sw_hash = from->sw_hash;
> -	to->l4_hash = from->l4_hash;
> +	to->hash_type = from->hash_type;
>  };
>  
>  static inline void skb_sender_cpu_clear(struct sk_buff *skb)
> diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> index 8c8548c..418b8c5 100644
> --- a/include/net/flow_dissector.h
> +++ b/include/net/flow_dissector.h
> @@ -182,6 +182,11 @@ static inline bool flow_keys_have_l4(struct flow_keys *keys)
>  	return (keys->ports.ports || keys->tags.flow_label);
>  }
>  
> +static inline bool flow_keys_have_l3(struct flow_keys *keys)
> +{
> +	return !!keys->control.addr_type;
> +}
> +
>  u32 flow_hash_from_keys(struct flow_keys *keys);
>  
>  #endif
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 255d3e0..21b8cdc 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1828,7 +1828,7 @@ static inline void sock_poll_wait(struct file *filp,
>  static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
>  {
>  	if (sk->sk_txhash) {
> -		skb->l4_hash = 1;
> +		skb->hash_type = PKT_HASH_TYPE_L4;
>  		skb->hash = sk->sk_txhash;
>  	}
>  }
> diff --git a/include/trace/events/net.h b/include/trace/events/net.h
> index 49cc7c3..25e7979 100644
> --- a/include/trace/events/net.h
> +++ b/include/trace/events/net.h
> @@ -180,7 +180,7 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template,
>  		__entry->protocol = ntohs(skb->protocol);
>  		__entry->ip_summed = skb->ip_summed;
>  		__entry->hash = skb->hash;
> -		__entry->l4_hash = skb->l4_hash;
> +		__entry->l4_hash = skb->hash_type == PKT_HASH_TYPE_L4;
>  		__entry->len = skb->len;
>  		__entry->data_len = skb->data_len;
>  		__entry->truesize = skb->truesize;
> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> index d79699c..956208b 100644
> --- a/net/core/flow_dissector.c
> +++ b/net/core/flow_dissector.c
> @@ -658,17 +658,30 @@ EXPORT_SYMBOL(make_flow_keys_digest);
>   *
>   * This function calculates a flow hash based on src/dst addresses
>   * and src/dst port numbers.  Sets hash in skb to non-zero hash value
> - * on success, zero indicates no valid hash.  Also, sets l4_hash in skb
> - * if hash is a canonical 4-tuple hash over transport ports.
> + * on success, zero indicates no valid hash in which case the hash type
> + * is set to NONE. If the hash is a canonical 4-tuple hash over transport
> + * ports then type is set to L4. If the hash did not include transport
> + * then type is set to L3, otherwise it is assumed to be L2 only.
>   */
>  void __skb_get_hash(struct sk_buff *skb)
>  {
>  	struct flow_keys keys;
> +	u32 hash;
> +	enum pkt_hash_types type;
>  
>  	__flow_hash_secret_init();
>  
> -	__skb_set_sw_hash(skb, ___skb_get_hash(skb, &keys, hashrnd),
> -			  flow_keys_have_l4(&keys));
> +	hash = ___skb_get_hash(skb, &keys, hashrnd);
> +	if (hash == 0)
> +		type = PKT_HASH_TYPE_NONE;
> +	else if (flow_keys_have_l4(&keys))
> +		type = PKT_HASH_TYPE_L4;
> +	else if (flow_keys_have_l3(&keys))
> +		type = PKT_HASH_TYPE_L3;
> +	else
> +		type = PKT_HASH_TYPE_L2;
> +
> +	__skb_set_sw_hash(skb, hash, type);
>  }
>  EXPORT_SYMBOL(__skb_get_hash);
>  
> @@ -698,7 +711,8 @@ __u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
>  	keys.basic.ip_proto = fl6->flowi6_proto;
>  
>  	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
> -			  flow_keys_have_l4(&keys));
> +			  flow_keys_have_l4(&keys) ?
> +			  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
>  
>  	return skb->hash;
>  }
> @@ -719,7 +733,8 @@ __u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
>  	keys.basic.ip_proto = fl4->flowi4_proto;
>  
>  	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
> -			  flow_keys_have_l4(&keys));
> +			  flow_keys_have_l4(&keys) ?
> +			  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
>  
>  	return skb->hash;
>  }
> -- 
> 2.1.4
>
Eric Dumazet Feb. 17, 2016, 9:30 p.m. UTC | #3
On mer., 2016-02-17 at 15:44 -0500, David Miller wrote:
> From: Paul Durrant <paul.durrant@citrix.com>
> Date: Mon, 15 Feb 2016 08:32:08 +0000
> 
> > ...rather than a boolean merely indicating a canonical L4 hash.
> > 
> > skb_set_hash() takes a hash type (from enum pkt_hash_types) as an
> > argument but information is lost since only a single bit in the skb
> > stores whether that hash type is PKT_HASH_TYPE_L4 or not. By using
> > two bits it's possible to store the complete hash type information.
> > 
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> 
> Tom and/or Eric, please have a look at this.

I guess my question is simply 'why do we need this' ?

Consuming a bit in our precious sk_buff is not something we want for
some obscure feature.
Tom Herbert Feb. 19, 2016, 2:22 a.m. UTC | #4
On Wed, Feb 17, 2016 at 1:30 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On mer., 2016-02-17 at 15:44 -0500, David Miller wrote:
>> From: Paul Durrant <paul.durrant@citrix.com>
>> Date: Mon, 15 Feb 2016 08:32:08 +0000
>>
>> > ...rather than a boolean merely indicating a canonical L4 hash.
>> >
>> > skb_set_hash() takes a hash type (from enum pkt_hash_types) as an
>> > argument but information is lost since only a single bit in the skb
>> > stores whether that hash type is PKT_HASH_TYPE_L4 or not. By using
>> > two bits it's possible to store the complete hash type information.
>> >
>> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
>>
>> Tom and/or Eric, please have a look at this.
>
> I guess my question is simply 'why do we need this' ?
>
> Consuming a bit in our precious sk_buff is not something we want for
> some obscure feature.
>
Right. I think the reason Paul wants this is be able to pass the hash
to a Windows guest. As I pointed out though, we'd also need an
indication that the hash is Toeplitz to be really correct with Windows
interface. The Linux driver interface does allow indicating L2, L3, or
L4 hash with the assumption that differentiation might be useful some
day, but so far it only appears that distinguishing L4 from others has
any value. It would be interesting to know if Windows actually does
anything useful in differentiating L2 and L3 hashes.

Tom

>
>
Paul Durrant Feb. 19, 2016, 8:55 a.m. UTC | #5
> -----Original Message-----

> From: Tom Herbert [mailto:tom@herbertland.com]

> Sent: 19 February 2016 02:22

> To: Eric Dumazet

> Cc: David Miller; Paul Durrant; Linux Kernel Network Developers; Jay

> Vosburgh; Veaceslav Falico; Andy Gospodarek

> Subject: Re: [PATCH net-next] store complete hash type information in

> socket buffer...

> 

> On Wed, Feb 17, 2016 at 1:30 PM, Eric Dumazet <eric.dumazet@gmail.com>

> wrote:

> > On mer., 2016-02-17 at 15:44 -0500, David Miller wrote:

> >> From: Paul Durrant <paul.durrant@citrix.com>

> >> Date: Mon, 15 Feb 2016 08:32:08 +0000

> >>

> >> > ...rather than a boolean merely indicating a canonical L4 hash.

> >> >

> >> > skb_set_hash() takes a hash type (from enum pkt_hash_types) as an

> >> > argument but information is lost since only a single bit in the skb

> >> > stores whether that hash type is PKT_HASH_TYPE_L4 or not. By using

> >> > two bits it's possible to store the complete hash type information.

> >> >

> >> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>

> >>

> >> Tom and/or Eric, please have a look at this.

> >

> > I guess my question is simply 'why do we need this' ?

> >

> > Consuming a bit in our precious sk_buff is not something we want for

> > some obscure feature.

> >

> Right. I think the reason Paul wants this is be able to pass the hash

> to a Windows guest. As I pointed out though, we'd also need an

> indication that the hash is Toeplitz to be really correct with Windows

> interface.


Tom,

  Actually xen-netback can live without this change. Windows (at the moment at least) only cares about L3 and L4 hashes. The motivation for this change was just to fix the apparent mismatch where some functions deal with the hash type enum and some the single 'is l4' bit. I agree it would also be useful and sensible for sk_buff to also encode a hash algorithm as most NIC h/w out there will probably use Toeplitz (since it's a hard requirement for Windows) but may also be able use alternative hashes.

> The Linux driver interface does allow indicating L2, L3, or

> L4 hash with the assumption that differentiation might be useful some

> day, but so far it only appears that distinguishing L4 from others has

> any value. It would be interesting to know if Windows actually does

> anything useful in differentiating L2 and L3 hashes.

> 


  As I said above, it does not distinguish L2 and L3 at the moment so this patch is not necessary. I still think it’s a step in the right direction though.

  Cheers,

    Paul

> Tom

> 

> >

> >
diff mbox

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 45bdd87..ad0ee00 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3173,7 +3173,7 @@  u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
 	u32 hash;
 
 	if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
-	    skb->l4_hash)
+	    skb_has_l4_hash(skb))
 		return skb->hash;
 
 	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3920675..b991ca2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -595,8 +595,7 @@  static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *	@xmit_more: More SKBs are pending for this queue
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
- *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
- *		ports.
+ *	@hash_type: indicates type of hash (see enum pkt_hash_types below)
  *	@sw_hash: indicates hash was computed in software stack
  *	@wifi_acked_valid: wifi_acked was set
  *	@wifi_acked: whether frame was acked on wifi or not
@@ -701,10 +700,10 @@  struct sk_buff {
 	__u8			nf_trace:1;
 	__u8			ip_summed:2;
 	__u8			ooo_okay:1;
-	__u8			l4_hash:1;
 	__u8			sw_hash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
+	/* 1 bit hole */
 
 	__u8			no_fcs:1;
 	/* Indicates the inner headers are valid in the skbuff. */
@@ -721,7 +720,8 @@  struct sk_buff {
 	__u8			ipvs_property:1;
 	__u8			inner_protocol_type:1;
 	__u8			remcsum_offload:1;
-	/* 3 or 5 bit hole */
+	__u8			hash_type:2;
+	/* 1 or 3 bit hole */
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
@@ -1030,19 +1030,35 @@  static inline void skb_clear_hash(struct sk_buff *skb)
 {
 	skb->hash = 0;
 	skb->sw_hash = 0;
-	skb->l4_hash = 0;
+	skb->hash_type = 0;
+}
+
+static inline enum pkt_hash_types skb_hash_type(struct sk_buff *skb)
+{
+	return skb->hash_type;
+}
+
+static inline bool skb_has_l4_hash(struct sk_buff *skb)
+{
+	return skb_hash_type(skb) == PKT_HASH_TYPE_L4;
+}
+
+static inline bool skb_has_sw_hash(struct sk_buff *skb)
+{
+	return !!skb->sw_hash;
 }
 
 static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
 {
-	if (!skb->l4_hash)
+	if (!skb_has_l4_hash(skb))
 		skb_clear_hash(skb);
 }
 
 static inline void
-__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4)
+__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw,
+	       enum pkt_hash_types type)
 {
-	skb->l4_hash = is_l4;
+	skb->hash_type = type;
 	skb->sw_hash = is_sw;
 	skb->hash = hash;
 }
@@ -1051,13 +1067,13 @@  static inline void
 skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
 {
 	/* Used by drivers to set hash from HW */
-	__skb_set_hash(skb, hash, false, type == PKT_HASH_TYPE_L4);
+	__skb_set_hash(skb, hash, false, type);
 }
 
 static inline void
-__skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
+__skb_set_sw_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
 {
-	__skb_set_hash(skb, hash, true, is_l4);
+	__skb_set_hash(skb, hash, true, type);
 }
 
 void __skb_get_hash(struct sk_buff *skb);
@@ -1110,9 +1126,10 @@  static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
 				  data, proto, nhoff, hlen, flags);
 }
 
+
 static inline __u32 skb_get_hash(struct sk_buff *skb)
 {
-	if (!skb->l4_hash && !skb->sw_hash)
+	if (!skb_has_l4_hash(skb) && !skb_has_sw_hash(skb))
 		__skb_get_hash(skb);
 
 	return skb->hash;
@@ -1122,11 +1139,12 @@  __u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6);
 
 static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
 {
-	if (!skb->l4_hash && !skb->sw_hash) {
+	if (!skb_has_l4_hash(skb) && !skb_has_sw_hash(skb)) {
 		struct flow_keys keys;
 		__u32 hash = __get_hash_from_flowi6(fl6, &keys);
 
-		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
+		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys) ?
+				  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
 	}
 
 	return skb->hash;
@@ -1136,11 +1154,12 @@  __u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl);
 
 static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
 {
-	if (!skb->l4_hash && !skb->sw_hash) {
+	if (!skb_has_l4_hash(skb) && !skb_has_sw_hash(skb)) {
 		struct flow_keys keys;
 		__u32 hash = __get_hash_from_flowi4(fl4, &keys);
 
-		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
+		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys) ?
+				  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
 	}
 
 	return skb->hash;
@@ -1157,7 +1176,7 @@  static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
 {
 	to->hash = from->hash;
 	to->sw_hash = from->sw_hash;
-	to->l4_hash = from->l4_hash;
+	to->hash_type = from->hash_type;
 };
 
 static inline void skb_sender_cpu_clear(struct sk_buff *skb)
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 8c8548c..418b8c5 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -182,6 +182,11 @@  static inline bool flow_keys_have_l4(struct flow_keys *keys)
 	return (keys->ports.ports || keys->tags.flow_label);
 }
 
+static inline bool flow_keys_have_l3(struct flow_keys *keys)
+{
+	return !!keys->control.addr_type;
+}
+
 u32 flow_hash_from_keys(struct flow_keys *keys);
 
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 255d3e0..21b8cdc 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1828,7 +1828,7 @@  static inline void sock_poll_wait(struct file *filp,
 static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
 {
 	if (sk->sk_txhash) {
-		skb->l4_hash = 1;
+		skb->hash_type = PKT_HASH_TYPE_L4;
 		skb->hash = sk->sk_txhash;
 	}
 }
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 49cc7c3..25e7979 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -180,7 +180,7 @@  DECLARE_EVENT_CLASS(net_dev_rx_verbose_template,
 		__entry->protocol = ntohs(skb->protocol);
 		__entry->ip_summed = skb->ip_summed;
 		__entry->hash = skb->hash;
-		__entry->l4_hash = skb->l4_hash;
+		__entry->l4_hash = skb->hash_type == PKT_HASH_TYPE_L4;
 		__entry->len = skb->len;
 		__entry->data_len = skb->data_len;
 		__entry->truesize = skb->truesize;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d79699c..956208b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -658,17 +658,30 @@  EXPORT_SYMBOL(make_flow_keys_digest);
  *
  * This function calculates a flow hash based on src/dst addresses
  * and src/dst port numbers.  Sets hash in skb to non-zero hash value
- * on success, zero indicates no valid hash.  Also, sets l4_hash in skb
- * if hash is a canonical 4-tuple hash over transport ports.
+ * on success, zero indicates no valid hash in which case the hash type
+ * is set to NONE. If the hash is a canonical 4-tuple hash over transport
+ * ports then type is set to L4. If the hash did not include transport
+ * then type is set to L3, otherwise it is assumed to be L2 only.
  */
 void __skb_get_hash(struct sk_buff *skb)
 {
 	struct flow_keys keys;
+	u32 hash;
+	enum pkt_hash_types type;
 
 	__flow_hash_secret_init();
 
-	__skb_set_sw_hash(skb, ___skb_get_hash(skb, &keys, hashrnd),
-			  flow_keys_have_l4(&keys));
+	hash = ___skb_get_hash(skb, &keys, hashrnd);
+	if (hash == 0)
+		type = PKT_HASH_TYPE_NONE;
+	else if (flow_keys_have_l4(&keys))
+		type = PKT_HASH_TYPE_L4;
+	else if (flow_keys_have_l3(&keys))
+		type = PKT_HASH_TYPE_L3;
+	else
+		type = PKT_HASH_TYPE_L2;
+
+	__skb_set_sw_hash(skb, hash, type);
 }
 EXPORT_SYMBOL(__skb_get_hash);
 
@@ -698,7 +711,8 @@  __u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
 	keys.basic.ip_proto = fl6->flowi6_proto;
 
 	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
-			  flow_keys_have_l4(&keys));
+			  flow_keys_have_l4(&keys) ?
+			  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
 
 	return skb->hash;
 }
@@ -719,7 +733,8 @@  __u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
 	keys.basic.ip_proto = fl4->flowi4_proto;
 
 	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
-			  flow_keys_have_l4(&keys));
+			  flow_keys_have_l4(&keys) ?
+			  PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
 
 	return skb->hash;
 }