diff mbox series

[LEDE-DEV,1/5] ar71xx: Add QCA cache blasting patch

Message ID 1512416424-629-1-git-send-email-rosenp@gmail.com
State Accepted
Headers show
Series [LEDE-DEV,1/5] ar71xx: Add QCA cache blasting patch | expand

Commit Message

Rosen Penev Dec. 4, 2017, 7:40 p.m. UTC
Qualcomm claims this improves cache efficiency for MIPS. Original commit message below:

From: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Date: Fri, 7 Jun 2013 18:35:22 -0500
Subject: [r4k_mips] efficient cache blast

Optimize the compiler output for larger cache blast cases that are
common for DMA-based networking.

Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Signed-off-by: Rosen Penev <rosenp@gmail.com>
---
 .../903-QCA-ar71xx-efficient-cache-blast.patch     | 58 ++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 target/linux/ar71xx/patches-4.9/903-QCA-ar71xx-efficient-cache-blast.patch

Comments

Felix Fietkau Dec. 4, 2017, 9:46 p.m. UTC | #1
On 2017-12-04 20:40, Rosen Penev wrote:
> Qualcomm claims this improves cache efficiency for MIPS. Original commit message below:
> 
> From: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
> Date: Fri, 7 Jun 2013 18:35:22 -0500
> Subject: [r4k_mips] efficient cache blast
> 
> Optimize the compiler output for larger cache blast cases that are
> common for DMA-based networking.
> 
> Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
> Signed-off-by: Rosen Penev <rosenp@gmail.com>
Merged to my staging tree with some modifications:

- better patch description
- moved to generic instead of ar71xx

Thanks,

- Felix
diff mbox series

Patch

diff --git a/target/linux/ar71xx/patches-4.9/903-QCA-ar71xx-efficient-cache-blast.patch b/target/linux/ar71xx/patches-4.9/903-QCA-ar71xx-efficient-cache-blast.patch
new file mode 100644
index 0000000..2913119
--- /dev/null
+++ b/target/linux/ar71xx/patches-4.9/903-QCA-ar71xx-efficient-cache-blast.patch
@@ -0,0 +1,58 @@ 
+diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
+index 667ca3c..b0138b4 100644
+--- a/arch/mips/include/asm/r4kcache.h
++++ b/arch/mips/include/asm/r4kcache.h
+@@ -665,16 +665,48 @@ static inline void prot##extra##blast_##pfx##cache##_range(unsigned long start,
+ 						    unsigned long end)	\
+ {									\
+ 	unsigned long lsize = cpu_##desc##_line_size();			\
++	unsigned long lsize_2 = lsize * 2;				\
++	unsigned long lsize_3 = lsize * 3;				\
++	unsigned long lsize_4 = lsize * 4;				\
++	unsigned long lsize_5 = lsize * 5;				\
++	unsigned long lsize_6 = lsize * 6;				\
++	unsigned long lsize_7 = lsize * 7;				\
++	unsigned long lsize_8 = lsize * 8;				\
+ 	unsigned long addr = start & ~(lsize - 1);			\
+-	unsigned long aend = (end - 1) & ~(lsize - 1);			\
++	unsigned long aend = (end + lsize - 1) & ~(lsize - 1);		\
++	int lines = (aend - addr) / lsize;				\
+ 									\
+ 	__##pfx##flush_prologue						\
+ 									\
+-	while (1) {							\
++	while (lines >= 8) {						\
++		prot##cache_op(hitop, addr);				\
++		prot##cache_op(hitop, addr + lsize);			\
++		prot##cache_op(hitop, addr + lsize_2);			\
++		prot##cache_op(hitop, addr + lsize_3);			\
++		prot##cache_op(hitop, addr + lsize_4);			\
++		prot##cache_op(hitop, addr + lsize_5);			\
++		prot##cache_op(hitop, addr + lsize_6);			\
++		prot##cache_op(hitop, addr + lsize_7);			\
++		addr += lsize_8;					\
++		lines -= 8;						\
++	}								\
++									\
++	if (lines & 0x4) {						\
++		prot##cache_op(hitop, addr);				\
++		prot##cache_op(hitop, addr + lsize);			\
++		prot##cache_op(hitop, addr + lsize_2);			\
++		prot##cache_op(hitop, addr + lsize_3);			\
++		addr += lsize_4;					\
++	}								\
++									\
++	if (lines & 0x2) {						\
++		prot##cache_op(hitop, addr);				\
++		prot##cache_op(hitop, addr + lsize);			\
++		addr += lsize_2;					\
++	}								\
++									\
++	if (lines & 0x1) {						\
+ 		prot##cache_op(hitop, addr);				\
+-		if (addr == aend)					\
+-			break;						\
+-		addr += lsize;						\
+ 	}								\
+ 									\
+ 	__##pfx##flush_epilogue						\