diff mbox series

[2/4] usbnet: smsc95xx: align tx-buffer to word

Message ID 20181002092645.1115-3-ben.dooks@codethink.co.uk
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [1/4] usbnet: smsc95xx: add kconfig for turbo mode | expand

Commit Message

Ben Dooks Oct. 2, 2018, 9:26 a.m. UTC
The tegra driver requires alignment of the buffer, so try and
make this better by pushing the buffer start back to an word
aligned address. At the worst this makes memcpy() easier as
it is word aligned, at best it makes sure the usb can directly
map the buffer.

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
[todo - make this configurable]
---
 drivers/net/usb/Kconfig    | 12 ++++++++++++
 drivers/net/usb/smsc95xx.c | 22 ++++++++++++++++++++--
 2 files changed, 32 insertions(+), 2 deletions(-)

Comments

David Laight Oct. 2, 2018, 1:19 p.m. UTC | #1
From: Ben Dooks
> Sent: 02 October 2018 10:27
> 
> The tegra driver requires alignment of the buffer, so try and
> make this better by pushing the buffer start back to an word
> aligned address. At the worst this makes memcpy() easier as
> it is word aligned, at best it makes sure the usb can directly
> map the buffer.
> 
> Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
> [todo - make this configurable]
> ---
>  drivers/net/usb/Kconfig    | 12 ++++++++++++
>  drivers/net/usb/smsc95xx.c | 22 ++++++++++++++++++++--
>  2 files changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
...
> +static bool align_tx = IS_ENABLED(CONFIG_USB_NET_SMSC95XX_TXALIGN);
> +module_param(align_tx, bool, 0644);
> +MODULE_PARM_DESC(align_tx, "Align TX buffers to word boundaries");

DM doesn't like module parameters.

>  static bool turbo_mode = IS_ENABLED(CONFIG_USB_NET_SMSC95XX_TURBO);
>  module_param(turbo_mode, bool, 0644);
>  MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
> @@ -2005,10 +2009,18 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
>  	bool csum = skb->ip_summed == CHECKSUM_PARTIAL;
>  	int overhead = csum ? SMSC95XX_TX_OVERHEAD_CSUM : SMSC95XX_TX_OVERHEAD;
>  	u32 tx_cmd_a, tx_cmd_b;
> +	u32 data_len;
> +	uintptr_t align = 0;
> 
>  	/* We do not advertise SG, so skbs should be already linearized */
>  	BUG_ON(skb_shinfo(skb)->nr_frags);
> 
> +	if (IS_ENABLED(CONFIG_USB_NET_SMSC95XX_TXALIGN) && align_tx) {
> +		align = (uintptr_t)skb->data & 3;
> +		if (align)
> +			overhead += 4 - align;

Better to calculate the pad size once:
		align = (-(long)skb->data) & 3;
should do it - and you can unconditionally add it in.

> +	}
> +
>  	/* Make writable and expand header space by overhead if required */
>  	if (skb_cow_head(skb, overhead)) {
>  		/* Must deallocate here as returning NULL to indicate error
> @@ -2037,16 +2049,22 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
>  		}
>  	}
> 
> +	data_len = skb->len;
> +	if (align)
> +		skb_push(skb, 4 - align);
> +
>  	skb_push(skb, 4);

You don't want to call skb_push() twice.
IIRC really horrid things happen if the data has to be copied.
(Actually what happens to the alignment in that case??)
And there is another skb_push() below....

> -	tx_cmd_b = (u32)(skb->len - 4);
> +	tx_cmd_b = (u32)(data_len);

You don't need the cast here at all (if it was ever needed).
Actually you don't need the new 'data_len' variable.
Just set tx_cmd_b earlier.

...

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Ben Dooks Oct. 2, 2018, 1:35 p.m. UTC | #2
On 02/10/18 14:19, David Laight wrote:
> From: Ben Dooks
>> Sent: 02 October 2018 10:27
>>
>> The tegra driver requires alignment of the buffer, so try and
>> make this better by pushing the buffer start back to an word
>> aligned address. At the worst this makes memcpy() easier as
>> it is word aligned, at best it makes sure the usb can directly
>> map the buffer.
>>
>> Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
>> [todo - make this configurable]
>> ---
>>   drivers/net/usb/Kconfig    | 12 ++++++++++++
>>   drivers/net/usb/smsc95xx.c | 22 ++++++++++++++++++++--
>>   2 files changed, 32 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
> ...
>> +static bool align_tx = IS_ENABLED(CONFIG_USB_NET_SMSC95XX_TXALIGN);
>> +module_param(align_tx, bool, 0644);
>> +MODULE_PARM_DESC(align_tx, "Align TX buffers to word boundaries");
> 
> DM doesn't like module parameters.
> 
>>   static bool turbo_mode = IS_ENABLED(CONFIG_USB_NET_SMSC95XX_TURBO);
>>   module_param(turbo_mode, bool, 0644);
>>   MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
>> @@ -2005,10 +2009,18 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
>>   	bool csum = skb->ip_summed == CHECKSUM_PARTIAL;
>>   	int overhead = csum ? SMSC95XX_TX_OVERHEAD_CSUM : SMSC95XX_TX_OVERHEAD;
>>   	u32 tx_cmd_a, tx_cmd_b;
>> +	u32 data_len;
>> +	uintptr_t align = 0;
>>
>>   	/* We do not advertise SG, so skbs should be already linearized */
>>   	BUG_ON(skb_shinfo(skb)->nr_frags);
>>
>> +	if (IS_ENABLED(CONFIG_USB_NET_SMSC95XX_TXALIGN) && align_tx) {
>> +		align = (uintptr_t)skb->data & 3;
>> +		if (align)
>> +			overhead += 4 - align;
> 
> Better to calculate the pad size once:
> 		align = (-(long)skb->data) & 3;
> should do it - and you can unconditionally add it in.
> 
>> +	}
>> +
>>   	/* Make writable and expand header space by overhead if required */
>>   	if (skb_cow_head(skb, overhead)) {
>>   		/* Must deallocate here as returning NULL to indicate error
>> @@ -2037,16 +2049,22 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
>>   		}
>>   	}
>>
>> +	data_len = skb->len;
>> +	if (align)
>> +		skb_push(skb, 4 - align);
>> +
>>   	skb_push(skb, 4);
> 
> You don't want to call skb_push() twice.
> IIRC really horrid things happen if the data has to be copied.
> (Actually what happens to the alignment in that case??)
> And there is another skb_push() below....

The driver does it /multiple/ times depending on the path used.
Is it wise to try and make a separate patch to skb_push() once
and also move the tx_cmd_a and tx_cmd_b bit to a single point?

>> -	tx_cmd_b = (u32)(skb->len - 4);
>> +	tx_cmd_b = (u32)(data_len);
> 
> You don't need the cast here at all (if it was ever needed).
> Actually you don't need the new 'data_len' variable.
> Just set tx_cmd_b earlier.
> 
> ...
> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
> 
>
Ben Hutchings Oct. 4, 2018, 4:53 p.m. UTC | #3
On Tue, 2018-10-02 at 10:26 +0100, Ben Dooks wrote:
> The tegra driver requires alignment of the buffer, so try and
> make this better by pushing the buffer start back to an word
> aligned address. At the worst this makes memcpy() easier as
> it is word aligned, at best it makes sure the usb can directly
> map the buffer.
> 
> Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
> [todo - make this configurable]
[...]

I don't think you need a separate kconfig symbol for this.  Aligning RX
buffers to words (or better, cache lines) is almost always a win, so
long as the CPU can handle misaligned fields in the network/transport
headers.  You can use #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS to
check that.

It seems like NET_IP_ALIGN should be defined to 0 or 2 depending on
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, but NET_IP_ALIGN predates the
latter.

Ben.
diff mbox series

Patch

diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index a32f1a446ce9..35bad8bd2e2a 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -360,6 +360,18 @@  config USB_NET_SMSC95XX_TURBO
 	  driver's receive path. These can also be altered by the
 	  turbo_mode module parameter.
 
+config USB_NET_SMSC95XX_TXALIGN
+	bool "Add bytes to align transmit buffers"
+	depends on USB_NET_SMSC95XX
+	default n
+	help
+	  This option makes the tx buffers 32 bit aligned which might
+	  help with systems that want tx data aligned to a 32 bit
+	  boundary.
+
+	  Using this option will mean there may be up to 3 bytes of
+	  data per packet sent.
+
 config USB_NET_GL620A
 	tristate "GeneSys GL620USB-A based cables"
 	depends on USB_USBNET
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index fe13bef9579e..d244357bf1ad 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -78,6 +78,10 @@  struct smsc95xx_priv {
 	struct usbnet *dev;
 };
 
+static bool align_tx = IS_ENABLED(CONFIG_USB_NET_SMSC95XX_TXALIGN);
+module_param(align_tx, bool, 0644);
+MODULE_PARM_DESC(align_tx, "Align TX buffers to word boundaries");
+
 static bool turbo_mode = IS_ENABLED(CONFIG_USB_NET_SMSC95XX_TURBO);
 module_param(turbo_mode, bool, 0644);
 MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
@@ -2005,10 +2009,18 @@  static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
 	bool csum = skb->ip_summed == CHECKSUM_PARTIAL;
 	int overhead = csum ? SMSC95XX_TX_OVERHEAD_CSUM : SMSC95XX_TX_OVERHEAD;
 	u32 tx_cmd_a, tx_cmd_b;
+	u32 data_len;
+	uintptr_t align = 0;
 
 	/* We do not advertise SG, so skbs should be already linearized */
 	BUG_ON(skb_shinfo(skb)->nr_frags);
 
+	if (IS_ENABLED(CONFIG_USB_NET_SMSC95XX_TXALIGN) && align_tx) {
+		align = (uintptr_t)skb->data & 3;
+		if (align)
+			overhead += 4 - align;
+	}
+
 	/* Make writable and expand header space by overhead if required */
 	if (skb_cow_head(skb, overhead)) {
 		/* Must deallocate here as returning NULL to indicate error
@@ -2037,16 +2049,22 @@  static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
 		}
 	}
 
+	data_len = skb->len;
+	if (align)
+		skb_push(skb, 4 - align);
+
 	skb_push(skb, 4);
-	tx_cmd_b = (u32)(skb->len - 4);
+	tx_cmd_b = (u32)(data_len);
 	if (csum)
 		tx_cmd_b |= TX_CMD_B_CSUM_ENABLE;
 	cpu_to_le32s(&tx_cmd_b);
 	memcpy(skb->data, &tx_cmd_b, 4);
 
 	skb_push(skb, 4);
-	tx_cmd_a = (u32)(skb->len - 8) | TX_CMD_A_FIRST_SEG_ |
+	tx_cmd_a = (u32)(data_len) | TX_CMD_A_FIRST_SEG_ |
 		TX_CMD_A_LAST_SEG_;
+	if (align)
+		tx_cmd_a |= (4 - align) << 16;
 	cpu_to_le32s(&tx_cmd_a);
 	memcpy(skb->data, &tx_cmd_a, 4);