diff mbox series

[2/2] radix/kfence: support late __kfence_pool allocation

Message ID 20240424110926.184077-2-hbathini@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series [1/2] radix/kfence: map __kfence_pool at page granularity | expand

Checks

Context Check Description
snowpatch_ozlabs/github-powerpc_ppctests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_selftests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_sparse success Successfully ran 4 jobs.
snowpatch_ozlabs/github-powerpc_clang success Successfully ran 6 jobs.
snowpatch_ozlabs/github-powerpc_kernel_qemu fail 4 of 23 jobs failed.

Commit Message

Hari Bathini April 24, 2024, 11:09 a.m. UTC
With commit b33f778bba5ef ("kfence: alloc kfence_pool after system
startup"), KFENCE pool can be allocated after system startup via the
page allocator. This can lead to problems as all memory is not mapped
at page granularity anymore with CONFIG_KFENCE. Address this by direct
mapping all memory at PMD level and split the mapping for PMD pages
that overlap with __kfence_pool to page level granularity if and when
__kfence_pool is allocated after system startup.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/radix.h |  2 +
 arch/powerpc/include/asm/kfence.h          | 14 +++++-
 arch/powerpc/mm/book3s64/radix_pgtable.c   | 50 +++++++++++++++++++++-
 3 files changed, 64 insertions(+), 2 deletions(-)

Comments

Ritesh Harjani (IBM) May 1, 2024, 7:03 a.m. UTC | #1
Hari Bathini <hbathini@linux.ibm.com> writes:

> With commit b33f778bba5ef ("kfence: alloc kfence_pool after system
> startup"), KFENCE pool can be allocated after system startup via the
> page allocator. This can lead to problems as all memory is not mapped
> at page granularity anymore with CONFIG_KFENCE. Address this by direct
> mapping all memory at PMD level and split the mapping for PMD pages
> that overlap with __kfence_pool to page level granularity if and when
> __kfence_pool is allocated after system startup.
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/radix.h |  2 +
>  arch/powerpc/include/asm/kfence.h          | 14 +++++-
>  arch/powerpc/mm/book3s64/radix_pgtable.c   | 50 +++++++++++++++++++++-
>  3 files changed, 64 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
> index 8f55ff74bb68..0423ddbcf73c 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -340,6 +340,8 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
>  extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
>  				 pgprot_t flags, unsigned int psz);
>  
> +extern bool radix_kfence_init_pool(void);
> +
>  static inline unsigned long radix__get_tree_size(void)
>  {
>  	unsigned long rts_field;
> diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
> index 18ec2b06ba1e..c5d2fb2f9ecb 100644
> --- a/arch/powerpc/include/asm/kfence.h
> +++ b/arch/powerpc/include/asm/kfence.h
> @@ -18,12 +18,24 @@
>  
>  #ifdef CONFIG_KFENCE
>  extern bool kfence_early_init;
> -#endif
> +
> +static inline bool kfence_alloc_pool_late(void)
> +{
> +	return !kfence_early_init;
> +}

Minor nit, but do we need kfence_alloc_pool_late()?
The function name looks confusing. Can we not just use
!kfence_early_init? If not then maybe bool kfence_late_init?

>  
>  static inline bool arch_kfence_init_pool(void)
>  {
> +#ifdef CONFIG_PPC_BOOK3S_64
> +	if (radix_enabled())
> +		return radix_kfence_init_pool();

Can we directly check...
        if (radix_enabled() && !kfence_early_init)
... instead of embedding the check inside radix_kfence_late_init_pool()

> +#endif
> +
>  	return true;
>  }
> +#else
> +static inline bool kfence_alloc_pool_late(void) { return false; }
> +#endif
>  
>  #ifdef CONFIG_PPC64
>  static inline bool kfence_protect_page(unsigned long addr, bool protect)
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index fccbf92f279b..f4374e3e31e1 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -253,6 +253,53 @@ void radix__mark_initmem_nx(void)
>  }
>  #endif /* CONFIG_STRICT_KERNEL_RWX */
>  
> +#ifdef CONFIG_KFENCE
> +static inline int radix_split_pmd_page(pmd_t *pmd, unsigned long addr)
> +{
> +	pte_t *pte = pte_alloc_one_kernel(&init_mm);
> +	unsigned long pfn = PFN_DOWN(__pa(addr));

Minor nit. Since addr will always be page aligned, so maybe PHYS_PFN() is better
suited. Although it does not matter.

> +	int i;
> +
> +	if (!pte)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < PTRS_PER_PTE; i++) {
> +		__set_pte_at(&init_mm, addr, pte + i, pfn_pte(pfn + i, PAGE_KERNEL), 0);
> +		asm volatile("ptesync": : :"memory");
> +	}

Maybe a comment above the loop on why __set_pte_at() is ok for late
kfence init? and why not pte_update()? [1]

[1]: https://lore.kernel.org/linuxppc-dev/87y318wp9r.fsf@linux.ibm.com/


> +	pmd_populate_kernel(&init_mm, pmd, pte);
> +
> +	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
> +	return 0;
> +}
> +
> +bool radix_kfence_init_pool(void)
> +{
> +	unsigned int page_psize, pmd_psize;
> +	unsigned long addr;
> +	pmd_t *pmd;
> +
> +	if (!kfence_alloc_pool_late())
> +		return true;
> +
> +	page_psize = shift_to_mmu_psize(PAGE_SHIFT);
> +	pmd_psize = shift_to_mmu_psize(PMD_SHIFT);
> +	for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
> +	     addr += PAGE_SIZE) {
> +		pmd = pmd_off_k(addr);
> +
> +		if (pmd_leaf(*pmd)) {
> +			if (radix_split_pmd_page(pmd, addr & PMD_MASK))
> +				return false;
> +			update_page_count(pmd_psize, -1);
> +			update_page_count(page_psize, PTRS_PER_PTE);
> +		}
> +	}
> +
> +	return true;
> +}
> +#endif
> +
>  static inline void __meminit
>  print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
>  {
> @@ -391,7 +438,8 @@ static void __init radix_init_pgtable(void)
>  			continue;
>  		}
>  
> -		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
> +		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL,
> +						kfence_alloc_pool_late() ? PMD_SIZE : ~0UL));

So everytime we have !kfence_early_init to true, we always use PMD_SIZE. 
So do we never map 1G mapping for direct map? 

>  	}
>  
>  #ifdef CONFIG_KFENCE
> -- 
> 2.44.0
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 8f55ff74bb68..0423ddbcf73c 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -340,6 +340,8 @@  extern void radix__vmemmap_remove_mapping(unsigned long start,
 extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 				 pgprot_t flags, unsigned int psz);
 
+extern bool radix_kfence_init_pool(void);
+
 static inline unsigned long radix__get_tree_size(void)
 {
 	unsigned long rts_field;
diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
index 18ec2b06ba1e..c5d2fb2f9ecb 100644
--- a/arch/powerpc/include/asm/kfence.h
+++ b/arch/powerpc/include/asm/kfence.h
@@ -18,12 +18,24 @@ 
 
 #ifdef CONFIG_KFENCE
 extern bool kfence_early_init;
-#endif
+
+static inline bool kfence_alloc_pool_late(void)
+{
+	return !kfence_early_init;
+}
 
 static inline bool arch_kfence_init_pool(void)
 {
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (radix_enabled())
+		return radix_kfence_init_pool();
+#endif
+
 	return true;
 }
+#else
+static inline bool kfence_alloc_pool_late(void) { return false; }
+#endif
 
 #ifdef CONFIG_PPC64
 static inline bool kfence_protect_page(unsigned long addr, bool protect)
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index fccbf92f279b..f4374e3e31e1 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -253,6 +253,53 @@  void radix__mark_initmem_nx(void)
 }
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
+#ifdef CONFIG_KFENCE
+static inline int radix_split_pmd_page(pmd_t *pmd, unsigned long addr)
+{
+	pte_t *pte = pte_alloc_one_kernel(&init_mm);
+	unsigned long pfn = PFN_DOWN(__pa(addr));
+	int i;
+
+	if (!pte)
+		return -ENOMEM;
+
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		__set_pte_at(&init_mm, addr, pte + i, pfn_pte(pfn + i, PAGE_KERNEL), 0);
+		asm volatile("ptesync": : :"memory");
+	}
+	pmd_populate_kernel(&init_mm, pmd, pte);
+
+	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+	return 0;
+}
+
+bool radix_kfence_init_pool(void)
+{
+	unsigned int page_psize, pmd_psize;
+	unsigned long addr;
+	pmd_t *pmd;
+
+	if (!kfence_alloc_pool_late())
+		return true;
+
+	page_psize = shift_to_mmu_psize(PAGE_SHIFT);
+	pmd_psize = shift_to_mmu_psize(PMD_SHIFT);
+	for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
+	     addr += PAGE_SIZE) {
+		pmd = pmd_off_k(addr);
+
+		if (pmd_leaf(*pmd)) {
+			if (radix_split_pmd_page(pmd, addr & PMD_MASK))
+				return false;
+			update_page_count(pmd_psize, -1);
+			update_page_count(page_psize, PTRS_PER_PTE);
+		}
+	}
+
+	return true;
+}
+#endif
+
 static inline void __meminit
 print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
 {
@@ -391,7 +438,8 @@  static void __init radix_init_pgtable(void)
 			continue;
 		}
 
-		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
+		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL,
+						kfence_alloc_pool_late() ? PMD_SIZE : ~0UL));
 	}
 
 #ifdef CONFIG_KFENCE