diff mbox series

[bpf-next,V2,1/5] igc: enable and fix RX hash usage by netstack

Message ID 168182464270.616355.11391652654430626584.stgit@firesoul
State Handled Elsewhere
Headers show
Series XDP-hints: XDP kfunc metadata for driver igc | expand

Commit Message

Jesper Dangaard Brouer April 18, 2023, 1:30 p.m. UTC
When function igc_rx_hash() was introduced in v4.20 via commit 0507ef8a0372
("igc: Add transmit and receive fastpath and interrupt handlers"), the
hardware wasn't configured to provide RSS hash, thus it made sense to not
enable net_device NETIF_F_RXHASH feature bit.

The NIC hardware was configured to enable RSS hash info in v5.2 via commit
2121c2712f82 ("igc: Add multiple receive queues control supporting"), but
forgot to set the NETIF_F_RXHASH feature bit.

The original implementation of igc_rx_hash() didn't extract the associated
pkt_hash_type, but statically set PKT_HASH_TYPE_L3. The largest portions of
this patch are about extracting the RSS Type from the hardware and mapping
this to enum pkt_hash_types. This was based on Foxville i225 software user
manual rev-1.3.1 and tested on Intel Ethernet Controller I225-LM (rev 03).

For UDP it's worth noting that RSS (type) hashing have been disabled both for
IPv4 and IPv6 (see IGC_MRQC_RSS_FIELD_IPV4_UDP + IGC_MRQC_RSS_FIELD_IPV6_UDP)
because hardware RSS doesn't handle fragmented pkts well when enabled (can
cause out-of-order). This results in PKT_HASH_TYPE_L3 for UDP packets, and
hash value doesn't include UDP port numbers. Not being PKT_HASH_TYPE_L4, have
the effect that netstack will do a software based hash calc calling into
flow_dissect, but only when code calls skb_get_hash(), which doesn't
necessary happen for local delivery.

For QA verification testing I wrote a small bpftrace prog:
 [0] https://github.com/xdp-project/xdp-project/blob/master/areas/hints/monitor_skb_hash_on_dev.bt

Fixes: 2121c2712f82 ("igc: Add multiple receive queues control supporting")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 drivers/net/ethernet/intel/igc/igc.h      |   28 ++++++++++++++++++++++++++
 drivers/net/ethernet/intel/igc/igc_main.c |   31 +++++++++++++++++++++++++----
 2 files changed, 55 insertions(+), 4 deletions(-)

Comments

John Fastabend April 23, 2023, 2:46 p.m. UTC | #1
Jesper Dangaard Brouer wrote:
> When function igc_rx_hash() was introduced in v4.20 via commit 0507ef8a0372
> ("igc: Add transmit and receive fastpath and interrupt handlers"), the
> hardware wasn't configured to provide RSS hash, thus it made sense to not
> enable net_device NETIF_F_RXHASH feature bit.
> 
> The NIC hardware was configured to enable RSS hash info in v5.2 via commit
> 2121c2712f82 ("igc: Add multiple receive queues control supporting"), but
> forgot to set the NETIF_F_RXHASH feature bit.
> 
> The original implementation of igc_rx_hash() didn't extract the associated
> pkt_hash_type, but statically set PKT_HASH_TYPE_L3. The largest portions of
> this patch are about extracting the RSS Type from the hardware and mapping
> this to enum pkt_hash_types. This was based on Foxville i225 software user
> manual rev-1.3.1 and tested on Intel Ethernet Controller I225-LM (rev 03).
> 
> For UDP it's worth noting that RSS (type) hashing have been disabled both for
> IPv4 and IPv6 (see IGC_MRQC_RSS_FIELD_IPV4_UDP + IGC_MRQC_RSS_FIELD_IPV6_UDP)
> because hardware RSS doesn't handle fragmented pkts well when enabled (can
> cause out-of-order). This results in PKT_HASH_TYPE_L3 for UDP packets, and
> hash value doesn't include UDP port numbers. Not being PKT_HASH_TYPE_L4, have
> the effect that netstack will do a software based hash calc calling into
> flow_dissect, but only when code calls skb_get_hash(), which doesn't
> necessary happen for local delivery.
> 
> For QA verification testing I wrote a small bpftrace prog:
>  [0] https://github.com/xdp-project/xdp-project/blob/master/areas/hints/monitor_skb_hash_on_dev.bt
> 
> Fixes: 2121c2712f82 ("igc: Add multiple receive queues control supporting")
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> ---
>  drivers/net/ethernet/intel/igc/igc.h      |   28 ++++++++++++++++++++++++++
>  drivers/net/ethernet/intel/igc/igc_main.c |   31 +++++++++++++++++++++++++----
>  2 files changed, 55 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
> index 34aebf00a512..f7f9e217e7b4 100644
> --- a/drivers/net/ethernet/intel/igc/igc.h
> +++ b/drivers/net/ethernet/intel/igc/igc.h
> @@ -13,6 +13,7 @@
>  #include <linux/ptp_clock_kernel.h>
>  #include <linux/timecounter.h>
>  #include <linux/net_tstamp.h>
> +#include <linux/bitfield.h>
>  
>  #include "igc_hw.h"
>  
> @@ -311,6 +312,33 @@ extern char igc_driver_name[];
>  #define IGC_MRQC_RSS_FIELD_IPV4_UDP	0x00400000
>  #define IGC_MRQC_RSS_FIELD_IPV6_UDP	0x00800000
>  
> +/* RX-desc Write-Back format RSS Type's */
> +enum igc_rss_type_num {
> +	IGC_RSS_TYPE_NO_HASH		= 0,
> +	IGC_RSS_TYPE_HASH_TCP_IPV4	= 1,
> +	IGC_RSS_TYPE_HASH_IPV4		= 2,
> +	IGC_RSS_TYPE_HASH_TCP_IPV6	= 3,
> +	IGC_RSS_TYPE_HASH_IPV6_EX	= 4,
> +	IGC_RSS_TYPE_HASH_IPV6		= 5,
> +	IGC_RSS_TYPE_HASH_TCP_IPV6_EX	= 6,
> +	IGC_RSS_TYPE_HASH_UDP_IPV4	= 7,
> +	IGC_RSS_TYPE_HASH_UDP_IPV6	= 8,
> +	IGC_RSS_TYPE_HASH_UDP_IPV6_EX	= 9,
> +	IGC_RSS_TYPE_MAX		= 10,
> +};
> +#define IGC_RSS_TYPE_MAX_TABLE		16
> +#define IGC_RSS_TYPE_MASK		GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */
> +
> +/* igc_rss_type - Rx descriptor RSS type field */
> +static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
> +{
> +	/* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved)
> +	 * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info)
> +	 * is slightly slower than via u32 (wb.lower.lo_dword.data)
> +	 */
> +	return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK);
> +}
> +
>  /* Interrupt defines */
>  #define IGC_START_ITR			648 /* ~6000 ints/sec */
>  #define IGC_4K_ITR			980
> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> index 1c4676882082..bfa9768d447f 100644
> --- a/drivers/net/ethernet/intel/igc/igc_main.c
> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> @@ -1690,14 +1690,36 @@ static void igc_rx_checksum(struct igc_ring *ring,
>  		   le32_to_cpu(rx_desc->wb.upper.status_error));
>  }
>  
> +/* Mapping HW RSS Type to enum pkt_hash_types */
> +static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
> +	[IGC_RSS_TYPE_NO_HASH]		= PKT_HASH_TYPE_L2,
> +	[IGC_RSS_TYPE_HASH_TCP_IPV4]	= PKT_HASH_TYPE_L4,
> +	[IGC_RSS_TYPE_HASH_IPV4]	= PKT_HASH_TYPE_L3,
> +	[IGC_RSS_TYPE_HASH_TCP_IPV6]	= PKT_HASH_TYPE_L4,
> +	[IGC_RSS_TYPE_HASH_IPV6_EX]	= PKT_HASH_TYPE_L3,
> +	[IGC_RSS_TYPE_HASH_IPV6]	= PKT_HASH_TYPE_L3,
> +	[IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
> +	[IGC_RSS_TYPE_HASH_UDP_IPV4]	= PKT_HASH_TYPE_L4,
> +	[IGC_RSS_TYPE_HASH_UDP_IPV6]	= PKT_HASH_TYPE_L4,
> +	[IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
> +	[10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
> +	[11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask   */
> +	[12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons       */
> +	[13] = PKT_HASH_TYPE_NONE,
> +	[14] = PKT_HASH_TYPE_NONE,
> +	[15] = PKT_HASH_TYPE_NONE,
> +};
> +
>  static inline void igc_rx_hash(struct igc_ring *ring,
>  			       union igc_adv_rx_desc *rx_desc,
>  			       struct sk_buff *skb)
>  {
> -	if (ring->netdev->features & NETIF_F_RXHASH)
> -		skb_set_hash(skb,
> -			     le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
> -			     PKT_HASH_TYPE_L3);
> +	if (ring->netdev->features & NETIF_F_RXHASH) {
> +		u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
> +		u32 rss_type = igc_rss_type(rx_desc);
> +
> +		skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);

Just curious why not copy the logic from the other driver fms10k, ice, ect.

	skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
		     (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);

avoiding the table logic. Do the driver folks care?
Jesper Dangaard Brouer April 24, 2023, 2:20 p.m. UTC | #2
On 23/04/2023 16.46, John Fastabend wrote:
> Jesper Dangaard Brouer wrote:
>> When function igc_rx_hash() was introduced in v4.20 via commit 0507ef8a0372
>> ("igc: Add transmit and receive fastpath and interrupt handlers"), the
>> hardware wasn't configured to provide RSS hash, thus it made sense to not
>> enable net_device NETIF_F_RXHASH feature bit.
>>
>> The NIC hardware was configured to enable RSS hash info in v5.2 via commit
>> 2121c2712f82 ("igc: Add multiple receive queues control supporting"), but
>> forgot to set the NETIF_F_RXHASH feature bit.
>>
>> The original implementation of igc_rx_hash() didn't extract the associated
>> pkt_hash_type, but statically set PKT_HASH_TYPE_L3. The largest portions of
>> this patch are about extracting the RSS Type from the hardware and mapping
>> this to enum pkt_hash_types. This was based on Foxville i225 software user
>> manual rev-1.3.1 and tested on Intel Ethernet Controller I225-LM (rev 03).
>>
>> For UDP it's worth noting that RSS (type) hashing have been disabled both for
>> IPv4 and IPv6 (see IGC_MRQC_RSS_FIELD_IPV4_UDP + IGC_MRQC_RSS_FIELD_IPV6_UDP)
>> because hardware RSS doesn't handle fragmented pkts well when enabled (can
>> cause out-of-order). This results in PKT_HASH_TYPE_L3 for UDP packets, and
>> hash value doesn't include UDP port numbers. Not being PKT_HASH_TYPE_L4, have
>> the effect that netstack will do a software based hash calc calling into
>> flow_dissect, but only when code calls skb_get_hash(), which doesn't
>> necessary happen for local delivery.
>>
>> For QA verification testing I wrote a small bpftrace prog:
>>   [0] https://github.com/xdp-project/xdp-project/blob/master/areas/hints/monitor_skb_hash_on_dev.bt
>>
>> Fixes: 2121c2712f82 ("igc: Add multiple receive queues control supporting")
>> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
>> ---
>>   drivers/net/ethernet/intel/igc/igc.h      |   28 ++++++++++++++++++++++++++
>>   drivers/net/ethernet/intel/igc/igc_main.c |   31 +++++++++++++++++++++++++----
>>   2 files changed, 55 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
>> index 34aebf00a512..f7f9e217e7b4 100644
>> --- a/drivers/net/ethernet/intel/igc/igc.h
>> +++ b/drivers/net/ethernet/intel/igc/igc.h
>> @@ -13,6 +13,7 @@
>>   #include <linux/ptp_clock_kernel.h>
>>   #include <linux/timecounter.h>
>>   #include <linux/net_tstamp.h>
>> +#include <linux/bitfield.h>
>>   
>>   #include "igc_hw.h"
>>   
>> @@ -311,6 +312,33 @@ extern char igc_driver_name[];
>>   #define IGC_MRQC_RSS_FIELD_IPV4_UDP	0x00400000
>>   #define IGC_MRQC_RSS_FIELD_IPV6_UDP	0x00800000
>>   
>> +/* RX-desc Write-Back format RSS Type's */
>> +enum igc_rss_type_num {
>> +	IGC_RSS_TYPE_NO_HASH		= 0,
>> +	IGC_RSS_TYPE_HASH_TCP_IPV4	= 1,
>> +	IGC_RSS_TYPE_HASH_IPV4		= 2,
>> +	IGC_RSS_TYPE_HASH_TCP_IPV6	= 3,
>> +	IGC_RSS_TYPE_HASH_IPV6_EX	= 4,
>> +	IGC_RSS_TYPE_HASH_IPV6		= 5,
>> +	IGC_RSS_TYPE_HASH_TCP_IPV6_EX	= 6,
>> +	IGC_RSS_TYPE_HASH_UDP_IPV4	= 7,
>> +	IGC_RSS_TYPE_HASH_UDP_IPV6	= 8,
>> +	IGC_RSS_TYPE_HASH_UDP_IPV6_EX	= 9,
>> +	IGC_RSS_TYPE_MAX		= 10,
>> +};
>> +#define IGC_RSS_TYPE_MAX_TABLE		16
>> +#define IGC_RSS_TYPE_MASK		GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */
>> +
>> +/* igc_rss_type - Rx descriptor RSS type field */
>> +static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
>> +{
>> +	/* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved)
>> +	 * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info)
>> +	 * is slightly slower than via u32 (wb.lower.lo_dword.data)
>> +	 */
>> +	return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK);
>> +}
>> +
>>   /* Interrupt defines */
>>   #define IGC_START_ITR			648 /* ~6000 ints/sec */
>>   #define IGC_4K_ITR			980
>> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
>> index 1c4676882082..bfa9768d447f 100644
>> --- a/drivers/net/ethernet/intel/igc/igc_main.c
>> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
>> @@ -1690,14 +1690,36 @@ static void igc_rx_checksum(struct igc_ring *ring,
>>   		   le32_to_cpu(rx_desc->wb.upper.status_error));
>>   }
>>   
>> +/* Mapping HW RSS Type to enum pkt_hash_types */
>> +static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
>> +	[IGC_RSS_TYPE_NO_HASH]		= PKT_HASH_TYPE_L2,
>> +	[IGC_RSS_TYPE_HASH_TCP_IPV4]	= PKT_HASH_TYPE_L4,
>> +	[IGC_RSS_TYPE_HASH_IPV4]	= PKT_HASH_TYPE_L3,
>> +	[IGC_RSS_TYPE_HASH_TCP_IPV6]	= PKT_HASH_TYPE_L4,
>> +	[IGC_RSS_TYPE_HASH_IPV6_EX]	= PKT_HASH_TYPE_L3,
>> +	[IGC_RSS_TYPE_HASH_IPV6]	= PKT_HASH_TYPE_L3,
>> +	[IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
>> +	[IGC_RSS_TYPE_HASH_UDP_IPV4]	= PKT_HASH_TYPE_L4,
>> +	[IGC_RSS_TYPE_HASH_UDP_IPV6]	= PKT_HASH_TYPE_L4,
>> +	[IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
>> +	[10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
>> +	[11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask   */
>> +	[12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons       */
>> +	[13] = PKT_HASH_TYPE_NONE,
>> +	[14] = PKT_HASH_TYPE_NONE,
>> +	[15] = PKT_HASH_TYPE_NONE,
>> +};
>> +
>>   static inline void igc_rx_hash(struct igc_ring *ring,
>>   			       union igc_adv_rx_desc *rx_desc,
>>   			       struct sk_buff *skb)
>>   {
>> -	if (ring->netdev->features & NETIF_F_RXHASH)
>> -		skb_set_hash(skb,
>> -			     le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
>> -			     PKT_HASH_TYPE_L3);
>> +	if (ring->netdev->features & NETIF_F_RXHASH) {
>> +		u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
>> +		u32 rss_type = igc_rss_type(rx_desc);
>> +
>> +		skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);
> 
> Just curious why not copy the logic from the other driver fms10k, ice, ect.
> 
> 	skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
> 		     (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
> 		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);

Detail: This code mis-categorize (e.g. ARP) PKT_HASH_TYPE_L2 as
PKT_HASH_TYPE_L3, but as core reduces this further to one SKB bit, it
doesn't really matter.

> avoiding the table logic. Do the driver folks care?

The define IXGBE_RSS_L4_TYPES_MASK becomes the "table" logic as a 1-bit
true/false table.  It is a more compact table, let me know if this is
preferred.

Yes, it is really upto driver maintainer people to decide, what code is
preferred ?

--Jesper
John Fastabend April 24, 2023, 7:17 p.m. UTC | #3
Jesper Dangaard Brouer wrote:
> 
> 
> On 23/04/2023 16.46, John Fastabend wrote:
> > Jesper Dangaard Brouer wrote:
> >> When function igc_rx_hash() was introduced in v4.20 via commit 0507ef8a0372
> >> ("igc: Add transmit and receive fastpath and interrupt handlers"), the
> >> hardware wasn't configured to provide RSS hash, thus it made sense to not
> >> enable net_device NETIF_F_RXHASH feature bit.
> >>
> >> The NIC hardware was configured to enable RSS hash info in v5.2 via commit
> >> 2121c2712f82 ("igc: Add multiple receive queues control supporting"), but
> >> forgot to set the NETIF_F_RXHASH feature bit.
> >>
> >> The original implementation of igc_rx_hash() didn't extract the associated
> >> pkt_hash_type, but statically set PKT_HASH_TYPE_L3. The largest portions of
> >> this patch are about extracting the RSS Type from the hardware and mapping
> >> this to enum pkt_hash_types. This was based on Foxville i225 software user
> >> manual rev-1.3.1 and tested on Intel Ethernet Controller I225-LM (rev 03).
> >>
> >> For UDP it's worth noting that RSS (type) hashing have been disabled both for
> >> IPv4 and IPv6 (see IGC_MRQC_RSS_FIELD_IPV4_UDP + IGC_MRQC_RSS_FIELD_IPV6_UDP)
> >> because hardware RSS doesn't handle fragmented pkts well when enabled (can
> >> cause out-of-order). This results in PKT_HASH_TYPE_L3 for UDP packets, and
> >> hash value doesn't include UDP port numbers. Not being PKT_HASH_TYPE_L4, have
> >> the effect that netstack will do a software based hash calc calling into
> >> flow_dissect, but only when code calls skb_get_hash(), which doesn't
> >> necessary happen for local delivery.
> >>
> >> For QA verification testing I wrote a small bpftrace prog:
> >>   [0] https://github.com/xdp-project/xdp-project/blob/master/areas/hints/monitor_skb_hash_on_dev.bt
> >>
> >> Fixes: 2121c2712f82 ("igc: Add multiple receive queues control supporting")
> >> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> >> ---
> >>   drivers/net/ethernet/intel/igc/igc.h      |   28 ++++++++++++++++++++++++++
> >>   drivers/net/ethernet/intel/igc/igc_main.c |   31 +++++++++++++++++++++++++----
> >>   2 files changed, 55 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
> >> index 34aebf00a512..f7f9e217e7b4 100644
> >> --- a/drivers/net/ethernet/intel/igc/igc.h
> >> +++ b/drivers/net/ethernet/intel/igc/igc.h
> >> @@ -13,6 +13,7 @@
> >>   #include <linux/ptp_clock_kernel.h>
> >>   #include <linux/timecounter.h>
> >>   #include <linux/net_tstamp.h>
> >> +#include <linux/bitfield.h>
> >>   
> >>   #include "igc_hw.h"
> >>   
> >> @@ -311,6 +312,33 @@ extern char igc_driver_name[];
> >>   #define IGC_MRQC_RSS_FIELD_IPV4_UDP	0x00400000
> >>   #define IGC_MRQC_RSS_FIELD_IPV6_UDP	0x00800000
> >>   
> >> +/* RX-desc Write-Back format RSS Type's */
> >> +enum igc_rss_type_num {
> >> +	IGC_RSS_TYPE_NO_HASH		= 0,
> >> +	IGC_RSS_TYPE_HASH_TCP_IPV4	= 1,
> >> +	IGC_RSS_TYPE_HASH_IPV4		= 2,
> >> +	IGC_RSS_TYPE_HASH_TCP_IPV6	= 3,
> >> +	IGC_RSS_TYPE_HASH_IPV6_EX	= 4,
> >> +	IGC_RSS_TYPE_HASH_IPV6		= 5,
> >> +	IGC_RSS_TYPE_HASH_TCP_IPV6_EX	= 6,
> >> +	IGC_RSS_TYPE_HASH_UDP_IPV4	= 7,
> >> +	IGC_RSS_TYPE_HASH_UDP_IPV6	= 8,
> >> +	IGC_RSS_TYPE_HASH_UDP_IPV6_EX	= 9,
> >> +	IGC_RSS_TYPE_MAX		= 10,
> >> +};
> >> +#define IGC_RSS_TYPE_MAX_TABLE		16
> >> +#define IGC_RSS_TYPE_MASK		GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */
> >> +
> >> +/* igc_rss_type - Rx descriptor RSS type field */
> >> +static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
> >> +{
> >> +	/* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved)
> >> +	 * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info)
> >> +	 * is slightly slower than via u32 (wb.lower.lo_dword.data)
> >> +	 */
> >> +	return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK);
> >> +}
> >> +
> >>   /* Interrupt defines */
> >>   #define IGC_START_ITR			648 /* ~6000 ints/sec */
> >>   #define IGC_4K_ITR			980
> >> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> >> index 1c4676882082..bfa9768d447f 100644
> >> --- a/drivers/net/ethernet/intel/igc/igc_main.c
> >> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> >> @@ -1690,14 +1690,36 @@ static void igc_rx_checksum(struct igc_ring *ring,
> >>   		   le32_to_cpu(rx_desc->wb.upper.status_error));
> >>   }
> >>   
> >> +/* Mapping HW RSS Type to enum pkt_hash_types */
> >> +static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
> >> +	[IGC_RSS_TYPE_NO_HASH]		= PKT_HASH_TYPE_L2,
> >> +	[IGC_RSS_TYPE_HASH_TCP_IPV4]	= PKT_HASH_TYPE_L4,
> >> +	[IGC_RSS_TYPE_HASH_IPV4]	= PKT_HASH_TYPE_L3,
> >> +	[IGC_RSS_TYPE_HASH_TCP_IPV6]	= PKT_HASH_TYPE_L4,
> >> +	[IGC_RSS_TYPE_HASH_IPV6_EX]	= PKT_HASH_TYPE_L3,
> >> +	[IGC_RSS_TYPE_HASH_IPV6]	= PKT_HASH_TYPE_L3,
> >> +	[IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
> >> +	[IGC_RSS_TYPE_HASH_UDP_IPV4]	= PKT_HASH_TYPE_L4,
> >> +	[IGC_RSS_TYPE_HASH_UDP_IPV6]	= PKT_HASH_TYPE_L4,
> >> +	[IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
> >> +	[10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
> >> +	[11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask   */
> >> +	[12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons       */
> >> +	[13] = PKT_HASH_TYPE_NONE,
> >> +	[14] = PKT_HASH_TYPE_NONE,
> >> +	[15] = PKT_HASH_TYPE_NONE,
> >> +};
> >> +
> >>   static inline void igc_rx_hash(struct igc_ring *ring,
> >>   			       union igc_adv_rx_desc *rx_desc,
> >>   			       struct sk_buff *skb)
> >>   {
> >> -	if (ring->netdev->features & NETIF_F_RXHASH)
> >> -		skb_set_hash(skb,
> >> -			     le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
> >> -			     PKT_HASH_TYPE_L3);
> >> +	if (ring->netdev->features & NETIF_F_RXHASH) {
> >> +		u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
> >> +		u32 rss_type = igc_rss_type(rx_desc);
> >> +
> >> +		skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);
> > 
> > Just curious why not copy the logic from the other driver fms10k, ice, ect.
> > 
> > 	skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
> > 		     (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
> > 		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
> 
> Detail: This code mis-categorize (e.g. ARP) PKT_HASH_TYPE_L2 as
> PKT_HASH_TYPE_L3, but as core reduces this further to one SKB bit, it
> doesn't really matter.
> 
> > avoiding the table logic. Do the driver folks care?
> 
> The define IXGBE_RSS_L4_TYPES_MASK becomes the "table" logic as a 1-bit
> true/false table.  It is a more compact table, let me know if this is
> preferred.
> 
> Yes, it is really upto driver maintainer people to decide, what code is
> preferred ?

Yeah doesn't matter much to me either way. I was just looking at code
compared to ice driver while reviewing.

> 
> --Jesper
>
Jesper Dangaard Brouer April 25, 2023, 8:43 a.m. UTC | #4
On 24/04/2023 21.17, John Fastabend wrote:
>>> Just curious why not copy the logic from the other driver fms10k, ice, ect.
>>>
>>> 	skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
>>> 		     (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
>>> 		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
>> Detail: This code mis-categorize (e.g. ARP) PKT_HASH_TYPE_L2 as
>> PKT_HASH_TYPE_L3, but as core reduces this further to one SKB bit, it
>> doesn't really matter.
>>
>>> avoiding the table logic. Do the driver folks care?
>> The define IXGBE_RSS_L4_TYPES_MASK becomes the "table" logic as a 1-bit
>> true/false table.  It is a more compact table, let me know if this is
>> preferred.
>>
>> Yes, it is really upto driver maintainer people to decide, what code is
>> preferred ?
 >
> Yeah doesn't matter much to me either way. I was just looking at code
> compared to ice driver while reviewing.

My preference is to apply this patchset. We/I can easily followup and
change this to use the more compact approach later (if someone prefers).

I know net-next is "closed", but this patchset was posted prior to the
close.  Plus, a number of companies are waiting for the XDP-hint for HW
RX timestamp.  The support for driver stmmac is already in net-next
(commit e3f9c3e34840 ("net: stmmac: add Rx HWTS metadata to XDP receive
pkt")). Thus, it would be a help if both igc+stmmac changes land in same
kernel version, as both drivers are being evaluated by these companies.

Pretty please,
--Jesper
Sasha Neftin April 25, 2023, 9:40 a.m. UTC | #5
On 4/25/2023 11:43, Jesper Dangaard Brouer wrote:
> 
> On 24/04/2023 21.17, John Fastabend wrote:
>>>> Just curious why not copy the logic from the other driver fms10k, 
>>>> ice, ect.
>>>>
>>>>     skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
>>>>              (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
>>>>              PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
>>> Detail: This code mis-categorize (e.g. ARP) PKT_HASH_TYPE_L2 as
>>> PKT_HASH_TYPE_L3, but as core reduces this further to one SKB bit, it
>>> doesn't really matter.
>>>
>>>> avoiding the table logic. Do the driver folks care?
>>> The define IXGBE_RSS_L4_TYPES_MASK becomes the "table" logic as a 1-bit
>>> true/false table.  It is a more compact table, let me know if this is
>>> preferred.
>>>
>>> Yes, it is really upto driver maintainer people to decide, what code is
>>> preferred ?
>  >
>> Yeah doesn't matter much to me either way. I was just looking at code
>> compared to ice driver while reviewing.
> 
> My preference is to apply this patchset.
I have no objection.
  We/I can easily followup and
> change this to use the more compact approach later (if someone prefers).
> 
> I know net-next is "closed", but this patchset was posted prior to the
> close.  Plus, a number of companies are waiting for the XDP-hint for HW
> RX timestamp.  The support for driver stmmac is already in net-next
> (commit e3f9c3e34840 ("net: stmmac: add Rx HWTS metadata to XDP receive
> pkt")). Thus, it would be a help if both igc+stmmac changes land in same
> kernel version, as both drivers are being evaluated by these companies.
> 
> Pretty please,
> --Jesper
> 
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
Daniel Borkmann April 27, 2023, 5 p.m. UTC | #6
On 4/25/23 10:43 AM, Jesper Dangaard Brouer wrote:
> On 24/04/2023 21.17, John Fastabend wrote:
>>>> Just curious why not copy the logic from the other driver fms10k, ice, ect.
>>>>
>>>>     skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
>>>>              (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
>>>>              PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
>>> Detail: This code mis-categorize (e.g. ARP) PKT_HASH_TYPE_L2 as
>>> PKT_HASH_TYPE_L3, but as core reduces this further to one SKB bit, it
>>> doesn't really matter.
>>>
>>>> avoiding the table logic. Do the driver folks care?
>>> The define IXGBE_RSS_L4_TYPES_MASK becomes the "table" logic as a 1-bit
>>> true/false table.  It is a more compact table, let me know if this is
>>> preferred.
>>>
>>> Yes, it is really upto driver maintainer people to decide, what code is
>>> preferred ?
>  >
>> Yeah doesn't matter much to me either way. I was just looking at code
>> compared to ice driver while reviewing.
> 
> My preference is to apply this patchset. We/I can easily followup and
> change this to use the more compact approach later (if someone prefers).

Consistency might help imo and would avoid questions/confusion on /why/
doing it differently for igc vs some of the others.

> I know net-next is "closed", but this patchset was posted prior to the
> close.  Plus, a number of companies are waiting for the XDP-hint for HW
> RX timestamp.  The support for driver stmmac is already in net-next
> (commit e3f9c3e34840 ("net: stmmac: add Rx HWTS metadata to XDP receive
> pkt")). Thus, it would be a help if both igc+stmmac changes land in same
> kernel version, as both drivers are being evaluated by these companies.

Given merge window is open now and net-next closed, it's too late to land
(unless Dave/Jakub thinks otherwise given it touches also driver bits).
I've applied the series to bpf-next right now.

Thanks,
Daniel
Jesper Dangaard Brouer April 28, 2023, 10:13 a.m. UTC | #7
On 27/04/2023 19.00, Daniel Borkmann wrote:
> On 4/25/23 10:43 AM, Jesper Dangaard Brouer wrote:
>> On 24/04/2023 21.17, John Fastabend wrote:
>>>>> Just curious why not copy the logic from the other driver fms10k, 
>>>>> ice, ect.
>>>>>
>>>>>     skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
>>>>>              (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
>>>>>              PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
>>>> Detail: This code mis-categorize (e.g. ARP) PKT_HASH_TYPE_L2 as
>>>> PKT_HASH_TYPE_L3, but as core reduces this further to one SKB bit, it
>>>> doesn't really matter.
>>>>
>>>>> avoiding the table logic. Do the driver folks care?
>>>> The define IXGBE_RSS_L4_TYPES_MASK becomes the "table" logic as a 1-bit
>>>> true/false table.  It is a more compact table, let me know if this is
>>>> preferred.
>>>>
>>>> Yes, it is really upto driver maintainer people to decide, what code is
>>>> preferred ?
>>  >
>>> Yeah doesn't matter much to me either way. I was just looking at code
>>> compared to ice driver while reviewing.
>>
>> My preference is to apply this patchset. We/I can easily followup and
>> change this to use the more compact approach later (if someone prefers).
> 
> Consistency might help imo and would avoid questions/confusion on /why/
> doing it differently for igc vs some of the others.
>

Well, drivers often do things differently, so that not something new. I
found the other approach less readable (and theoretically wrong for the
L2 case).  For igc this approach makes it easier to read (IMHO I'm
biased of cause) and easier to compare with kfunc metadata hint type
(that doesn't have RSS type information loss).

>> I know net-next is "closed", but this patchset was posted prior to the
>> close.  Plus, a number of companies are waiting for the XDP-hint for HW
>> RX timestamp.  The support for driver stmmac is already in net-next
>> (commit e3f9c3e34840 ("net: stmmac: add Rx HWTS metadata to XDP receive
>> pkt")). Thus, it would be a help if both igc+stmmac changes land in same
>> kernel version, as both drivers are being evaluated by these companies.
> 
> Given merge window is open now and net-next closed, it's too late to land
> (unless Dave/Jakub thinks otherwise given it touches also driver bits).
> I've applied the series to bpf-next right now.

It's not a big deal that it didn't reached net-next, end-users will just
have to wait for another kernel release to see this feature, or backport
the feature themselves.

Thanks for applying it.

--Jesper
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 34aebf00a512..f7f9e217e7b4 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -13,6 +13,7 @@ 
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
+#include <linux/bitfield.h>
 
 #include "igc_hw.h"
 
@@ -311,6 +312,33 @@  extern char igc_driver_name[];
 #define IGC_MRQC_RSS_FIELD_IPV4_UDP	0x00400000
 #define IGC_MRQC_RSS_FIELD_IPV6_UDP	0x00800000
 
+/* RX-desc Write-Back format RSS Type's */
+enum igc_rss_type_num {
+	IGC_RSS_TYPE_NO_HASH		= 0,
+	IGC_RSS_TYPE_HASH_TCP_IPV4	= 1,
+	IGC_RSS_TYPE_HASH_IPV4		= 2,
+	IGC_RSS_TYPE_HASH_TCP_IPV6	= 3,
+	IGC_RSS_TYPE_HASH_IPV6_EX	= 4,
+	IGC_RSS_TYPE_HASH_IPV6		= 5,
+	IGC_RSS_TYPE_HASH_TCP_IPV6_EX	= 6,
+	IGC_RSS_TYPE_HASH_UDP_IPV4	= 7,
+	IGC_RSS_TYPE_HASH_UDP_IPV6	= 8,
+	IGC_RSS_TYPE_HASH_UDP_IPV6_EX	= 9,
+	IGC_RSS_TYPE_MAX		= 10,
+};
+#define IGC_RSS_TYPE_MAX_TABLE		16
+#define IGC_RSS_TYPE_MASK		GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */
+
+/* igc_rss_type - Rx descriptor RSS type field */
+static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
+{
+	/* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved)
+	 * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info)
+	 * is slightly slower than via u32 (wb.lower.lo_dword.data)
+	 */
+	return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK);
+}
+
 /* Interrupt defines */
 #define IGC_START_ITR			648 /* ~6000 ints/sec */
 #define IGC_4K_ITR			980
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 1c4676882082..bfa9768d447f 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1690,14 +1690,36 @@  static void igc_rx_checksum(struct igc_ring *ring,
 		   le32_to_cpu(rx_desc->wb.upper.status_error));
 }
 
+/* Mapping HW RSS Type to enum pkt_hash_types */
+static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
+	[IGC_RSS_TYPE_NO_HASH]		= PKT_HASH_TYPE_L2,
+	[IGC_RSS_TYPE_HASH_TCP_IPV4]	= PKT_HASH_TYPE_L4,
+	[IGC_RSS_TYPE_HASH_IPV4]	= PKT_HASH_TYPE_L3,
+	[IGC_RSS_TYPE_HASH_TCP_IPV6]	= PKT_HASH_TYPE_L4,
+	[IGC_RSS_TYPE_HASH_IPV6_EX]	= PKT_HASH_TYPE_L3,
+	[IGC_RSS_TYPE_HASH_IPV6]	= PKT_HASH_TYPE_L3,
+	[IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
+	[IGC_RSS_TYPE_HASH_UDP_IPV4]	= PKT_HASH_TYPE_L4,
+	[IGC_RSS_TYPE_HASH_UDP_IPV6]	= PKT_HASH_TYPE_L4,
+	[IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
+	[10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
+	[11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask   */
+	[12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons       */
+	[13] = PKT_HASH_TYPE_NONE,
+	[14] = PKT_HASH_TYPE_NONE,
+	[15] = PKT_HASH_TYPE_NONE,
+};
+
 static inline void igc_rx_hash(struct igc_ring *ring,
 			       union igc_adv_rx_desc *rx_desc,
 			       struct sk_buff *skb)
 {
-	if (ring->netdev->features & NETIF_F_RXHASH)
-		skb_set_hash(skb,
-			     le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
-			     PKT_HASH_TYPE_L3);
+	if (ring->netdev->features & NETIF_F_RXHASH) {
+		u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+		u32 rss_type = igc_rss_type(rx_desc);
+
+		skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);
+	}
 }
 
 static void igc_rx_vlan(struct igc_ring *rx_ring,
@@ -6554,6 +6576,7 @@  static int igc_probe(struct pci_dev *pdev,
 	netdev->features |= NETIF_F_TSO;
 	netdev->features |= NETIF_F_TSO6;
 	netdev->features |= NETIF_F_TSO_ECN;
+	netdev->features |= NETIF_F_RXHASH;
 	netdev->features |= NETIF_F_RXCSUM;
 	netdev->features |= NETIF_F_HW_CSUM;
 	netdev->features |= NETIF_F_SCTP_CRC;