diff mbox series

[1/2] radix/kfence: map __kfence_pool at page granularity

Message ID 20240424110926.184077-1-hbathini@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series [1/2] radix/kfence: map __kfence_pool at page granularity | expand

Commit Message

Hari Bathini April 24, 2024, 11:09 a.m. UTC
When KFENCE is enabled, total system memory is mapped at page level
granularity. But in radix MMU mode, ~3GB additional memory is needed
to map 100GB of system memory at page level granularity when compared
to using 2MB direct mapping. This is not desired considering KFENCE is
designed to be enabled in production kernels [1]. Also, mapping memory
allocated for KFENCE pool at page granularity seems sufficient enough
to enable KFENCE support. So, allocate __kfence_pool during bootup and
map it at page granularity instead of mapping all system memory at
page granularity.

Without patch:
    # cat /proc/meminfo
    MemTotal:       101201920 kB

With patch:
    # cat /proc/meminfo
    MemTotal:       104483904 kB

All kfence_test.c testcases passed with this patch.

[1] https://lore.kernel.org/all/20201103175841.3495947-2-elver@google.com/

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/include/asm/kfence.h        |  5 ++++
 arch/powerpc/mm/book3s64/radix_pgtable.c | 34 ++++++++++++++++++------
 arch/powerpc/mm/init_64.c                | 14 ++++++++++
 3 files changed, 45 insertions(+), 8 deletions(-)

Comments

Ritesh Harjani (IBM) May 1, 2024, 5:45 a.m. UTC | #1
Hari Bathini <hbathini@linux.ibm.com> writes:

> When KFENCE is enabled, total system memory is mapped at page level
> granularity. But in radix MMU mode, ~3GB additional memory is needed
> to map 100GB of system memory at page level granularity when compared
> to using 2MB direct mapping. This is not desired considering KFENCE is
> designed to be enabled in production kernels [1]. Also, mapping memory
> allocated for KFENCE pool at page granularity seems sufficient enough
> to enable KFENCE support. So, allocate __kfence_pool during bootup and
> map it at page granularity instead of mapping all system memory at
> page granularity.
>
> Without patch:
>     # cat /proc/meminfo
>     MemTotal:       101201920 kB
>
> With patch:
>     # cat /proc/meminfo
>     MemTotal:       104483904 kB
>
> All kfence_test.c testcases passed with this patch.
>
> [1] https://lore.kernel.org/all/20201103175841.3495947-2-elver@google.com/
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/kfence.h        |  5 ++++
>  arch/powerpc/mm/book3s64/radix_pgtable.c | 34 ++++++++++++++++++------
>  arch/powerpc/mm/init_64.c                | 14 ++++++++++

New at this. But the patch looked interesting, hence my review comments.

>  3 files changed, 45 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
> index 424ceef82ae6..18ec2b06ba1e 100644
> --- a/arch/powerpc/include/asm/kfence.h
> +++ b/arch/powerpc/include/asm/kfence.h
> @@ -8,6 +8,7 @@
>  #ifndef __ASM_POWERPC_KFENCE_H
>  #define __ASM_POWERPC_KFENCE_H
>  
> +#include <linux/kfence.h>
>  #include <linux/mm.h>
>  #include <asm/pgtable.h>
>  
> @@ -15,6 +16,10 @@
>  #define ARCH_FUNC_PREFIX "."
>  #endif
>  
> +#ifdef CONFIG_KFENCE
> +extern bool kfence_early_init;
> +#endif
> +
>  static inline bool arch_kfence_init_pool(void)
>  {
>  	return true;

Shouldn't we return false for !kfence_early_init?
Because otherwise, this patch may break the late init case which your
next patch is fixing, and maybe git bisect will break?


> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 15e88f1439ec..fccbf92f279b 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -31,6 +31,7 @@
>  #include <asm/uaccess.h>
>  #include <asm/ultravisor.h>
>  #include <asm/set_memory.h>
> +#include <asm/kfence.h>
>  
>  #include <trace/events/thp.h>
>  
> @@ -291,9 +292,8 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end)
>  	return end;
>  }
>  
> -static int __meminit create_physical_mapping(unsigned long start,
> -					     unsigned long end,
> -					     int nid, pgprot_t _prot)
> +static int __meminit create_physical_mapping(unsigned long start, unsigned long end, int nid,
> +					     pgprot_t _prot, unsigned long mapping_sz_limit)

lines over 80 chars.

>  {
>  	unsigned long vaddr, addr, mapping_size = 0;
>  	bool prev_exec, exec = false;
> @@ -301,7 +301,10 @@ static int __meminit create_physical_mapping(unsigned long start,
>  	int psize;
>  	unsigned long max_mapping_size = memory_block_size;
>  
> -	if (debug_pagealloc_enabled_or_kfence())
> +	if (mapping_sz_limit < max_mapping_size)
> +		max_mapping_size = mapping_sz_limit;
> +
> +	if (debug_pagealloc_enabled())
>  		max_mapping_size = PAGE_SIZE;
>  
>  	start = ALIGN(start, PAGE_SIZE);
> @@ -358,6 +361,7 @@ static int __meminit create_physical_mapping(unsigned long start,
>  
>  static void __init radix_init_pgtable(void)
>  {
> +	phys_addr_t kfence_pool __maybe_unused;
>  	unsigned long rts_field;
>  	phys_addr_t start, end;
>  	u64 i;
> @@ -365,6 +369,13 @@ static void __init radix_init_pgtable(void)
>  	/* We don't support slb for radix */
>  	slb_set_size(0);
>  
> +#ifdef CONFIG_KFENCE
> +	if (kfence_early_init) {
> +		kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);

What if memblock_phys_alloc() failed? error handling?
> +		memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +	}
> +#endif
> +

Instead of #ifdef CONFIG_KFENCE in the function,
maybe we can define radix_kfence_alloc_pool()? Then we won't need
__maybe_unused too.

>  	/*
>  	 * Create the linear mapping
>  	 */
> @@ -380,10 +391,18 @@ static void __init radix_init_pgtable(void)
>  			continue;
>  		}
>  
> -		WARN_ON(create_physical_mapping(start, end,
> -						-1, PAGE_KERNEL));
> +		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
>  	}
>  
> +#ifdef CONFIG_KFENCE
> +	if (kfence_early_init) {
> +		create_physical_mapping(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, -1,
> +					PAGE_KERNEL, PAGE_SIZE);

Even this can return an error. Maybe WARN_ON_ONCE()? or disabling kfence
for an error?

> +		memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +		__kfence_pool = __va(kfence_pool);
> +	}
> +#endif
> +

This #ifdef can be called as radix_kfence_map_pool() then?


>  	if (!cpu_has_feature(CPU_FTR_HVMODE) &&
>  			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
>  		/*
> @@ -874,8 +893,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
>  		return -1;
>  	}
>  
> -	return create_physical_mapping(__pa(start), __pa(end),
> -				       nid, prot);
> +	return create_physical_mapping(__pa(start), __pa(end), nid, prot, ~0UL);
>  }
>  
>  int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index d96bbc001e73..8155bfd6c16b 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -64,6 +64,20 @@
>  
>  #include <mm/mmu_decl.h>
>  
> +#ifdef CONFIG_KFENCE
> +bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
> +
> +static int __init parse_kfence_early_init(char *arg)
> +{
> +	int val;
> +
> +	if (get_option(&arg, &val))
> +		kfence_early_init = !!val;
> +	return 0;
> +}
> +early_param("kfence.sample_interval", parse_kfence_early_init);
> +#endif
> +
>  #ifdef CONFIG_SPARSEMEM_VMEMMAP
>  /*
>   * Given an address within the vmemmap, determine the page that
> -- 
> 2.44.0
Christophe Leroy May 7, 2024, 12:33 p.m. UTC | #2
Le 24/04/2024 à 13:09, Hari Bathini a écrit :
> When KFENCE is enabled, total system memory is mapped at page level
> granularity. But in radix MMU mode, ~3GB additional memory is needed
> to map 100GB of system memory at page level granularity when compared
> to using 2MB direct mapping. This is not desired considering KFENCE is
> designed to be enabled in production kernels [1]. Also, mapping memory
> allocated for KFENCE pool at page granularity seems sufficient enough
> to enable KFENCE support. So, allocate __kfence_pool during bootup and
> map it at page granularity instead of mapping all system memory at
> page granularity.


That seems to be more or less copied from ARM64 ? Is that the best 
approach ?

Can't you implement arch_kfence_init_pool() instead ?

Also, it seems your patch only addresses PPC64. The same should be done 
for PPC32 and there are probably parts that should be common.

> 
> Without patch:
>      # cat /proc/meminfo
>      MemTotal:       101201920 kB
> 
> With patch:
>      # cat /proc/meminfo
>      MemTotal:       104483904 kB
> 
> All kfence_test.c testcases passed with this patch.
> 
> [1] https://lore.kernel.org/all/20201103175841.3495947-2-elver@google.com/
> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/kfence.h        |  5 ++++
>   arch/powerpc/mm/book3s64/radix_pgtable.c | 34 ++++++++++++++++++------
>   arch/powerpc/mm/init_64.c                | 14 ++++++++++
>   3 files changed, 45 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
> index 424ceef82ae6..18ec2b06ba1e 100644
> --- a/arch/powerpc/include/asm/kfence.h
> +++ b/arch/powerpc/include/asm/kfence.h
> @@ -8,6 +8,7 @@
>   #ifndef __ASM_POWERPC_KFENCE_H
>   #define __ASM_POWERPC_KFENCE_H
>   
> +#include <linux/kfence.h>

Why do you need that ? It can't be needed by the extern bool you are 
adding below.

If it is needed by some C file that includes asm/kfence.h, it should 
include linux/kfence.h by itself, see for instance 
mm/kfence/kfence_test.c and mm/kfence/core.c

>   #include <linux/mm.h>
>   #include <asm/pgtable.h>
>   
> @@ -15,6 +16,10 @@
>   #define ARCH_FUNC_PREFIX "."
>   #endif
>   
> +#ifdef CONFIG_KFENCE
> +extern bool kfence_early_init;
> +#endif
> +
>   static inline bool arch_kfence_init_pool(void)
>   {
>   	return true;
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 15e88f1439ec..fccbf92f279b 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -31,6 +31,7 @@
>   #include <asm/uaccess.h>
>   #include <asm/ultravisor.h>
>   #include <asm/set_memory.h>
> +#include <asm/kfence.h>
>   
>   #include <trace/events/thp.h>
>   
> @@ -291,9 +292,8 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end)
>   	return end;
>   }
>   
> -static int __meminit create_physical_mapping(unsigned long start,
> -					     unsigned long end,
> -					     int nid, pgprot_t _prot)
> +static int __meminit create_physical_mapping(unsigned long start, unsigned long end, int nid,
> +					     pgprot_t _prot, unsigned long mapping_sz_limit)
>   {
>   	unsigned long vaddr, addr, mapping_size = 0;
>   	bool prev_exec, exec = false;
> @@ -301,7 +301,10 @@ static int __meminit create_physical_mapping(unsigned long start,
>   	int psize;
>   	unsigned long max_mapping_size = memory_block_size;
>   
> -	if (debug_pagealloc_enabled_or_kfence())
> +	if (mapping_sz_limit < max_mapping_size)
> +		max_mapping_size = mapping_sz_limit;
> +
> +	if (debug_pagealloc_enabled())
>   		max_mapping_size = PAGE_SIZE;
>   
>   	start = ALIGN(start, PAGE_SIZE);
> @@ -358,6 +361,7 @@ static int __meminit create_physical_mapping(unsigned long start,
>   
>   static void __init radix_init_pgtable(void)
>   {
> +	phys_addr_t kfence_pool __maybe_unused;

Don't do that. Avoid using __maybe_unused.

Instead, declare this var where it is used.

>   	unsigned long rts_field;
>   	phys_addr_t start, end;
>   	u64 i;
> @@ -365,6 +369,13 @@ static void __init radix_init_pgtable(void)
>   	/* We don't support slb for radix */
>   	slb_set_size(0);
>   
> +#ifdef CONFIG_KFENCE
> +	if (kfence_early_init) {

Declare kfence_pool here.

> +		kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
> +		memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +	}
> +#endif
> +
>   	/*
>   	 * Create the linear mapping
>   	 */
> @@ -380,10 +391,18 @@ static void __init radix_init_pgtable(void)
>   			continue;
>   		}
>   
> -		WARN_ON(create_physical_mapping(start, end,
> -						-1, PAGE_KERNEL));
> +		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
>   	}
>   
> +#ifdef CONFIG_KFENCE
> +	if (kfence_early_init) {
> +		create_physical_mapping(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, -1,
> +					PAGE_KERNEL, PAGE_SIZE);
> +		memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +		__kfence_pool = __va(kfence_pool);
> +	}
> +#endif
> +
>   	if (!cpu_has_feature(CPU_FTR_HVMODE) &&
>   			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
>   		/*
> @@ -874,8 +893,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
>   		return -1;
>   	}
>   
> -	return create_physical_mapping(__pa(start), __pa(end),
> -				       nid, prot);
> +	return create_physical_mapping(__pa(start), __pa(end), nid, prot, ~0UL);
>   }
>   
>   int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index d96bbc001e73..8155bfd6c16b 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -64,6 +64,20 @@
>   
>   #include <mm/mmu_decl.h>
>   
> +#ifdef CONFIG_KFENCE
> +bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
> +
> +static int __init parse_kfence_early_init(char *arg)
> +{
> +	int val;
> +
> +	if (get_option(&arg, &val))
> +		kfence_early_init = !!val;
> +	return 0;
> +}
> +early_param("kfence.sample_interval", parse_kfence_early_init);
> +#endif
> +
>   #ifdef CONFIG_SPARSEMEM_VMEMMAP
>   /*
>    * Given an address within the vmemmap, determine the page that
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
index 424ceef82ae6..18ec2b06ba1e 100644
--- a/arch/powerpc/include/asm/kfence.h
+++ b/arch/powerpc/include/asm/kfence.h
@@ -8,6 +8,7 @@ 
 #ifndef __ASM_POWERPC_KFENCE_H
 #define __ASM_POWERPC_KFENCE_H
 
+#include <linux/kfence.h>
 #include <linux/mm.h>
 #include <asm/pgtable.h>
 
@@ -15,6 +16,10 @@ 
 #define ARCH_FUNC_PREFIX "."
 #endif
 
+#ifdef CONFIG_KFENCE
+extern bool kfence_early_init;
+#endif
+
 static inline bool arch_kfence_init_pool(void)
 {
 	return true;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 15e88f1439ec..fccbf92f279b 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -31,6 +31,7 @@ 
 #include <asm/uaccess.h>
 #include <asm/ultravisor.h>
 #include <asm/set_memory.h>
+#include <asm/kfence.h>
 
 #include <trace/events/thp.h>
 
@@ -291,9 +292,8 @@  static unsigned long next_boundary(unsigned long addr, unsigned long end)
 	return end;
 }
 
-static int __meminit create_physical_mapping(unsigned long start,
-					     unsigned long end,
-					     int nid, pgprot_t _prot)
+static int __meminit create_physical_mapping(unsigned long start, unsigned long end, int nid,
+					     pgprot_t _prot, unsigned long mapping_sz_limit)
 {
 	unsigned long vaddr, addr, mapping_size = 0;
 	bool prev_exec, exec = false;
@@ -301,7 +301,10 @@  static int __meminit create_physical_mapping(unsigned long start,
 	int psize;
 	unsigned long max_mapping_size = memory_block_size;
 
-	if (debug_pagealloc_enabled_or_kfence())
+	if (mapping_sz_limit < max_mapping_size)
+		max_mapping_size = mapping_sz_limit;
+
+	if (debug_pagealloc_enabled())
 		max_mapping_size = PAGE_SIZE;
 
 	start = ALIGN(start, PAGE_SIZE);
@@ -358,6 +361,7 @@  static int __meminit create_physical_mapping(unsigned long start,
 
 static void __init radix_init_pgtable(void)
 {
+	phys_addr_t kfence_pool __maybe_unused;
 	unsigned long rts_field;
 	phys_addr_t start, end;
 	u64 i;
@@ -365,6 +369,13 @@  static void __init radix_init_pgtable(void)
 	/* We don't support slb for radix */
 	slb_set_size(0);
 
+#ifdef CONFIG_KFENCE
+	if (kfence_early_init) {
+		kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+		memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
+	}
+#endif
+
 	/*
 	 * Create the linear mapping
 	 */
@@ -380,10 +391,18 @@  static void __init radix_init_pgtable(void)
 			continue;
 		}
 
-		WARN_ON(create_physical_mapping(start, end,
-						-1, PAGE_KERNEL));
+		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
 	}
 
+#ifdef CONFIG_KFENCE
+	if (kfence_early_init) {
+		create_physical_mapping(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, -1,
+					PAGE_KERNEL, PAGE_SIZE);
+		memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
+		__kfence_pool = __va(kfence_pool);
+	}
+#endif
+
 	if (!cpu_has_feature(CPU_FTR_HVMODE) &&
 			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
 		/*
@@ -874,8 +893,7 @@  int __meminit radix__create_section_mapping(unsigned long start,
 		return -1;
 	}
 
-	return create_physical_mapping(__pa(start), __pa(end),
-				       nid, prot);
+	return create_physical_mapping(__pa(start), __pa(end), nid, prot, ~0UL);
 }
 
 int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index d96bbc001e73..8155bfd6c16b 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -64,6 +64,20 @@ 
 
 #include <mm/mmu_decl.h>
 
+#ifdef CONFIG_KFENCE
+bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
+
+static int __init parse_kfence_early_init(char *arg)
+{
+	int val;
+
+	if (get_option(&arg, &val))
+		kfence_early_init = !!val;
+	return 0;
+}
+early_param("kfence.sample_interval", parse_kfence_early_init);
+#endif
+
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Given an address within the vmemmap, determine the page that