diff mbox series

ath79/ag71xx: rearrange ag71xx structs to remove holes

Message ID 20210919103839.2503-1-rsalvaterra@gmail.com
State Superseded
Delegated to: Rui Salvaterra
Headers show
Series ath79/ag71xx: rearrange ag71xx structs to remove holes | expand

Commit Message

Rui Salvaterra Sept. 19, 2021, 10:38 a.m. UTC
From: Rosen Penev <rosenp@gmail.com>

Remove ____cacheline_aligned attribute to ring structs. This actually causes
holes in the ag71xx struct as well as lower performance.

Rearrange struct members to fall within respective cachelines. The RX ring
struct now does not share a cacheline with the TX ring. The NAPI struct now
takes up its own cachelines and does not share.

According to pahole -C ag71xx -c 32

Before:

struct ag71xx {
	/* size: 384, cachelines: 12, members: 22 */
	/* sum members: 375, holes: 2, sum holes: 9 */

After:

struct ag71xx {
	/* size: 376, cachelines: 12, members: 22 */
	/* last cacheline: 24 bytes */

Signed-off-by: Rosen Penev <rosenp@gmail.com>
[Fix typos in the patch description]
Signed-off-by: Rui Salvaterra <rsalvaterra@gmail.com>
---
The ag71xx_ring changes are already part of the upstream driver. However, since
we're not using it at all yet, let's apply this to our driver for the time
being, as David explicitly requests performance numbers before applying it
upstream [1] (and rightly so, in my opinion).

[1] https://lore.kernel.org/netdev/20190725.112108.2287417619951369896.davem@davemloft.net/

 .../net/ethernet/atheros/ag71xx/ag71xx.h      | 32 ++++++++++---------
 1 file changed, 17 insertions(+), 15 deletions(-)

Comments

Adrian Schmutzler Sept. 24, 2021, 9:28 p.m. UTC | #1
> -----Original Message-----
> From: openwrt-devel [mailto:openwrt-devel-bounces@lists.openwrt.org]
> On Behalf Of Rui Salvaterra
> Sent: Sonntag, 19. September 2021 12:39
> To: openwrt-devel@lists.openwrt.org
> Cc: rosenp@gmail.com; Rui Salvaterra <rsalvaterra@gmail.com>
> Subject: [PATCH] ath79/ag71xx: rearrange ag71xx structs to remove holes

Commit title prefix should be "ath79: ag71xx:" or just "ath79:".

Apart from that, I can contribute nothing useful :-(

Best

Adrian

> 
> From: Rosen Penev <rosenp@gmail.com>
> 
> Remove ____cacheline_aligned attribute to ring structs. This actually
causes
> holes in the ag71xx struct as well as lower performance.
> 
> Rearrange struct members to fall within respective cachelines. The RX ring
> struct now does not share a cacheline with the TX ring. The NAPI struct
now
> takes up its own cachelines and does not share.
> 
> According to pahole -C ag71xx -c 32
> 
> Before:
> 
> struct ag71xx {
> 	/* size: 384, cachelines: 12, members: 22 */
> 	/* sum members: 375, holes: 2, sum holes: 9 */
> 
> After:
> 
> struct ag71xx {
> 	/* size: 376, cachelines: 12, members: 22 */
> 	/* last cacheline: 24 bytes */
> 
> Signed-off-by: Rosen Penev <rosenp@gmail.com> [Fix typos in the patch
> description]
> Signed-off-by: Rui Salvaterra <rsalvaterra@gmail.com>
> ---
> The ag71xx_ring changes are already part of the upstream driver. However,
> since we're not using it at all yet, let's apply this to our driver for
the time
> being, as David explicitly requests performance numbers before applying it
> upstream [1] (and rightly so, in my opinion).
> 
> [1]
> https://lore.kernel.org/netdev/20190725.112108.2287417619951369896.dave
> m@davemloft.net/
> 
>  .../net/ethernet/atheros/ag71xx/ag71xx.h      | 32 ++++++++++---------
>  1 file changed, 17 insertions(+), 15 deletions(-)
> 
> diff --git
> a/target/linux/ath79/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
> b/target/linux/ath79/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
> index 5ff9439f0d..6ed1f78459 100644
> ---
> a/target/linux/ath79/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
> +++ b/target/linux/ath79/files/drivers/net/ethernet/atheros/ag71xx/ag71x
> +++ x.h
> @@ -107,13 +107,16 @@ struct ag71xx_buf {  };
> 
>  struct ag71xx_ring {
> -	struct ag71xx_buf	*buf;
> -	u8			*descs_cpu;
> -	dma_addr_t		descs_dma;
> -	u16			desc_split;
> -	u16			order;
> +	/* "Hot" fields in the data path. */
>  	unsigned int		curr;
>  	unsigned int		dirty;
> +
> +	/* "Cold" fields - not used in the data path. */
> +	struct ag71xx_buf	*buf;
> +	u16			order;
> +	u16			desc_split;
> +	dma_addr_t		descs_dma;
> +	u8			*descs_cpu;
>  };
> 
>  struct ag71xx_int_stats {
> @@ -151,21 +154,20 @@ struct ag71xx {
>  	 * Critical data related to the per-packet data path are clustered
>  	 * early in this structure to help improve the D-cache footprint.
>  	 */
> -	struct ag71xx_ring	rx_ring ____cacheline_aligned;
> -	struct ag71xx_ring	tx_ring ____cacheline_aligned;
> +	struct ag71xx_ring	rx_ring;
> +	u16			rx_buf_size;
> +	u16			rx_buf_offset;
> +	u32			msg_enable;
> 
> +	struct ag71xx_ring	tx_ring;
>  	int			mac_idx;
> -
>  	u16			desc_pktlen_mask;
> -	u16			rx_buf_size;
> -	u8			rx_buf_offset;
>  	u8			tx_hang_workaround:1;
> 
> +	struct napi_struct	napi;
>  	struct net_device	*dev;
>  	struct platform_device  *pdev;
>  	spinlock_t		lock;
> -	struct napi_struct	napi;
> -	u32			msg_enable;
> 
>  	/*
>  	 * From this point onwards we're not looking at per-packet fields.
> @@ -188,12 +190,12 @@ struct ag71xx {
>  	unsigned int		speed;
>  	int			duplex;
> 
> -	struct delayed_work	restart_work;
> -	struct timer_list	oom_timer;
> -
>  	struct reset_control *mac_reset;
>  	struct reset_control *mdio_reset;
> 
> +	struct delayed_work	restart_work;
> +	struct timer_list	oom_timer;
> +
>  	u32			fifodata[3];
>  	u32			plldata[3];
>  	u32			pllreg[3];
> --
> 2.33.0
> 
> 
> _______________________________________________
> openwrt-devel mailing list
> openwrt-devel@lists.openwrt.org
> https://lists.openwrt.org/mailman/listinfo/openwrt-devel
Rui Salvaterra Sept. 25, 2021, 6:32 p.m. UTC | #2
Hi, Adrian,

On Fri, 24 Sept 2021 at 22:28, Adrian Schmutzler
<mail@adrianschmutzler.de> wrote:
>
> Commit title prefix should be "ath79: ag71xx:" or just "ath79:".

To be honest, it makes more sense to me to have "

ath79/ag71xx:", as it conveys the same information requiring one
character less (and this format has often been used before). However,
"ath79:" works for me too. Shall I respin?


> Apart from that, I can contribute nothing useful :-(

Thanks,
Rui
Adrian Schmutzler Sept. 25, 2021, 6:38 p.m. UTC | #3
Hi,

> -----Original Message-----
> From: openwrt-devel [mailto:openwrt-devel-bounces@lists.openwrt.org]
> On Behalf Of Rui Salvaterra
> Sent: Samstag, 25. September 2021 20:32
> To: Adrian Schmutzler <mail@adrianschmutzler.de>
> Cc: openwrt-devel@lists.openwrt.org; rosenp@gmail.com
> Subject: Re: [PATCH] ath79/ag71xx: rearrange ag71xx structs to remove
> holes
> 
> Hi, Adrian,
> 
> On Fri, 24 Sept 2021 at 22:28, Adrian Schmutzler <mail@adrianschmutzler.de>
> wrote:
> >
> > Commit title prefix should be "ath79: ag71xx:" or just "ath79:".
> 
> To be honest, it makes more sense to me to have "
> 
> ath79/ag71xx:", as it conveys the same information requiring one character
> less (and this format has often been used before). However, "ath79:" works
> for me too. Shall I respin?

Well, rule is that prefix has to be the target. I don't see why we should deviate from this rule now just to save a character.

Best

Adrian

> 
> 
> > Apart from that, I can contribute nothing useful :-(
> 
> Thanks,
> Rui
> 
> _______________________________________________
> openwrt-devel mailing list
> openwrt-devel@lists.openwrt.org
> https://lists.openwrt.org/mailman/listinfo/openwrt-devel
David Bauer Sept. 26, 2021, 11:37 a.m. UTC | #4
Hi Rui,
Hi Rosen,

On 9/19/21 12:38, Rui Salvaterra wrote:
> The ag71xx_ring changes are already part of the upstream driver. However, since
> we're not using it at all yet, let's apply this to our driver for the time
> being, as David explicitly requests performance numbers before applying it
> upstream [1] (and rightly so, in my opinion).

Does this mean parts of this patch are already applied upstream?

Also are there already performance numbers available?

Best
David
Rui Salvaterra Sept. 26, 2021, 6:31 p.m. UTC | #5
Hi, David,

On Sun, 26 Sept 2021 at 12:37, David Bauer <mail@david-bauer.net> wrote:
>
> Does this mean parts of this patch are already applied upstream?

Yes, the ag71xx_ring section.

> Also are there already performance numbers available?

Rosen has performance numbers comparing ar71xx to ath79 DSA including
this patch (with huge improvements [1]), but not for this specific
patch in isolation, I think. Rosen?

[1] https://gist.github.com/neheb/5f3636eb8008431239f73613cbf543b6

Thanks,
Rui
Rosen Penev Sept. 26, 2021, 7 p.m. UTC | #6
On Sun, Sep 26, 2021 at 11:31 AM Rui Salvaterra <rsalvaterra@gmail.com> wrote:
>
> Hi, David,
>
> On Sun, 26 Sept 2021 at 12:37, David Bauer <mail@david-bauer.net> wrote:
> >
> > Does this mean parts of this patch are already applied upstream?
>
> Yes, the ag71xx_ring section.
>
> > Also are there already performance numbers available?
>
> Rosen has performance numbers comparing ar71xx to ath79 DSA including
> this patch (with huge improvements [1]), but not for this specific
> patch in isolation, I think. Rosen?

Those benchmarks had this patch. I only changed the switch driver between them.
>
> [1] https://gist.github.com/neheb/5f3636eb8008431239f73613cbf543b6
>
> Thanks,
> Rui
diff mbox series

Patch

diff --git a/target/linux/ath79/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h b/target/linux/ath79/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
index 5ff9439f0d..6ed1f78459 100644
--- a/target/linux/ath79/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
+++ b/target/linux/ath79/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
@@ -107,13 +107,16 @@  struct ag71xx_buf {
 };
 
 struct ag71xx_ring {
-	struct ag71xx_buf	*buf;
-	u8			*descs_cpu;
-	dma_addr_t		descs_dma;
-	u16			desc_split;
-	u16			order;
+	/* "Hot" fields in the data path. */
 	unsigned int		curr;
 	unsigned int		dirty;
+
+	/* "Cold" fields - not used in the data path. */
+	struct ag71xx_buf	*buf;
+	u16			order;
+	u16			desc_split;
+	dma_addr_t		descs_dma;
+	u8			*descs_cpu;
 };
 
 struct ag71xx_int_stats {
@@ -151,21 +154,20 @@  struct ag71xx {
 	 * Critical data related to the per-packet data path are clustered
 	 * early in this structure to help improve the D-cache footprint.
 	 */
-	struct ag71xx_ring	rx_ring ____cacheline_aligned;
-	struct ag71xx_ring	tx_ring ____cacheline_aligned;
+	struct ag71xx_ring	rx_ring;
+	u16			rx_buf_size;
+	u16			rx_buf_offset;
+	u32			msg_enable;
 
+	struct ag71xx_ring	tx_ring;
 	int			mac_idx;
-
 	u16			desc_pktlen_mask;
-	u16			rx_buf_size;
-	u8			rx_buf_offset;
 	u8			tx_hang_workaround:1;
 
+	struct napi_struct	napi;
 	struct net_device	*dev;
 	struct platform_device  *pdev;
 	spinlock_t		lock;
-	struct napi_struct	napi;
-	u32			msg_enable;
 
 	/*
 	 * From this point onwards we're not looking at per-packet fields.
@@ -188,12 +190,12 @@  struct ag71xx {
 	unsigned int		speed;
 	int			duplex;
 
-	struct delayed_work	restart_work;
-	struct timer_list	oom_timer;
-
 	struct reset_control *mac_reset;
 	struct reset_control *mdio_reset;
 
+	struct delayed_work	restart_work;
+	struct timer_list	oom_timer;
+
 	u32			fifodata[3];
 	u32			plldata[3];
 	u32			pllreg[3];