diff mbox series

[v2,5/5] um: Add 4 level page table support

Message ID 20240612134804.1626427-6-benjamin@sipsolutions.net
State Superseded
Headers show
Series Increased address space for 64 bit | expand

Commit Message

Benjamin Berg June 12, 2024, 1:48 p.m. UTC
From: Benjamin Berg <benjamin.berg@intel.com>

The larger memory space is useful to support more applications inside
UML. One example for this is ASAN instrumentation of userspace
applications which requires addresses that would otherwise not be
available.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>

---

v2:
- Do not hide option behind the EXPERT flag
- Fix typo in new "Two-level pagetables" option
---
 arch/um/Kconfig                      |   1 +
 arch/um/include/asm/page.h           |  14 +++-
 arch/um/include/asm/pgalloc.h        |  11 ++-
 arch/um/include/asm/pgtable-4level.h | 119 +++++++++++++++++++++++++++
 arch/um/include/asm/pgtable.h        |   6 +-
 arch/um/kernel/mem.c                 |  17 +++-
 arch/x86/um/Kconfig                  |  38 ++++++---
 7 files changed, 189 insertions(+), 17 deletions(-)
 create mode 100644 arch/um/include/asm/pgtable-4level.h
diff mbox series

Patch

diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 93a5a8999b07..5d111fc8ccb7 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -208,6 +208,7 @@  config MMAPPER
 
 config PGTABLE_LEVELS
 	int
+	default 4 if 4_LEVEL_PGTABLES
 	default 3 if 3_LEVEL_PGTABLES
 	default 2
 
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 9ef9a8aedfa6..c3b2ae03b60c 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -57,14 +57,22 @@  typedef unsigned long long phys_t;
 typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pgd; } pgd_t;
 
-#ifdef CONFIG_3_LEVEL_PGTABLES
+#if CONFIG_PGTABLE_LEVELS > 2
+
 typedef struct { unsigned long pmd; } pmd_t;
 #define pmd_val(x)	((x).pmd)
 #define __pmd(x) ((pmd_t) { (x) } )
-#endif
 
-#define pte_val(x)	((x).pte)
+#if CONFIG_PGTABLE_LEVELS > 3
 
+typedef struct { unsigned long pud; } pud_t;
+#define pud_val(x)	((x).pud)
+#define __pud(x) ((pud_t) { (x) } )
+
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+
+#define pte_val(x)	((x).pte)
 
 #define pte_get_bits(p, bits) ((p).pte & (bits))
 #define pte_set_bits(p, bits) ((p).pte |= (bits))
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index de5e31c64793..04fb4e6969a4 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -31,7 +31,7 @@  do {								\
 	tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte)));	\
 } while (0)
 
-#ifdef CONFIG_3_LEVEL_PGTABLES
+#if CONFIG_PGTABLE_LEVELS > 2
 
 #define __pmd_free_tlb(tlb, pmd, address)			\
 do {								\
@@ -39,6 +39,15 @@  do {								\
 	tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd));	\
 } while (0)
 
+#if CONFIG_PGTABLE_LEVELS > 3
+
+#define __pud_free_tlb(tlb, pud, address)			\
+do {								\
+	pagetable_pud_dtor(virt_to_ptdesc(pud));		\
+	tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud));	\
+} while (0)
+
+#endif
 #endif
 
 #endif
diff --git a/arch/um/include/asm/pgtable-4level.h b/arch/um/include/asm/pgtable-4level.h
new file mode 100644
index 000000000000..f912fcc16b7a
--- /dev/null
+++ b/arch/um/include/asm/pgtable-4level.h
@@ -0,0 +1,119 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2003 PathScale Inc
+ * Derived from include/asm-i386/pgtable.h
+ */
+
+#ifndef __UM_PGTABLE_4LEVEL_H
+#define __UM_PGTABLE_4LEVEL_H
+
+#include <asm-generic/pgtable-nop4d.h>
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+
+#define PGDIR_SHIFT	39
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/* PUD_SHIFT determines the size of the area a third-level page table can
+ * map
+ */
+
+#define PUD_SHIFT	30
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
+/* PMD_SHIFT determines the size of the area a second-level page table can
+ * map
+ */
+
+#define PMD_SHIFT	21
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/*
+ * entries per page directory level
+ */
+
+#define PTRS_PER_PTE 512
+#define PTRS_PER_PMD 512
+#define PTRS_PER_PUD 512
+#define PTRS_PER_PGD 512
+
+#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE)
+
+#define pte_ERROR(e) \
+        printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), \
+	       pte_val(e))
+#define pmd_ERROR(e) \
+        printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), \
+	       pmd_val(e))
+#define pud_ERROR(e) \
+        printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), \
+	       pud_val(e))
+#define pgd_ERROR(e) \
+        printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), \
+	       pgd_val(e))
+
+#define pud_none(x)	(!(pud_val(x) & ~_PAGE_NEWPAGE))
+#define	pud_bad(x)	((pud_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+#define pud_present(x)	(pud_val(x) & _PAGE_PRESENT)
+#define pud_populate(mm, pud, pmd) \
+	set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
+
+#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
+
+#define p4d_none(x)	(!(p4d_val(x) & ~_PAGE_NEWPAGE))
+#define	p4d_bad(x)	((p4d_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+#define p4d_present(x)	(p4d_val(x) & _PAGE_PRESENT)
+#define p4d_populate(mm, p4d, pud) \
+	set_p4d(p4d, __p4d(_PAGE_TABLE + __pa(pud)))
+
+#define set_p4d(p4dptr, p4dval) (*(p4dptr) = (p4dval))
+
+
+static inline int pgd_newpage(pgd_t pgd)
+{
+	return(pgd_val(pgd) & _PAGE_NEWPAGE);
+}
+
+static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
+
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+
+static inline void pud_clear (pud_t *pud)
+{
+	set_pud(pud, __pud(_PAGE_NEWPAGE));
+}
+
+static inline void p4d_clear (p4d_t *p4d)
+{
+	set_p4d(p4d, __p4d(_PAGE_NEWPAGE));
+}
+
+#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK)
+#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & PAGE_MASK))
+
+#define p4d_page(p4d) phys_to_page(p4d_val(p4d) & PAGE_MASK)
+#define p4d_pgtable(p4d) ((pud_t *) __va(p4d_val(p4d) & PAGE_MASK))
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return phys_to_pfn(pte_val(pte));
+}
+
+static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+{
+	pte_t pte;
+	phys_t phys = pfn_to_phys(page_nr);
+
+	pte_set_val(pte, phys, pgprot);
+	return pte;
+}
+
+static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+{
+	return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot));
+}
+
+#endif
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index e1ece21dbe3f..71a7651e2db7 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -24,9 +24,11 @@ 
 /* We borrow bit 10 to store the exclusive marker in swap PTEs. */
 #define _PAGE_SWP_EXCLUSIVE	0x400
 
-#ifdef CONFIG_3_LEVEL_PGTABLES
+#if CONFIG_PGTABLE_LEVELS == 4
+#include <asm/pgtable-4level.h>
+#elif CONFIG_PGTABLE_LEVELS == 3
 #include <asm/pgtable-3level.h>
-#else
+#elif CONFIG_PGTABLE_LEVELS == 2
 #include <asm/pgtable-2level.h>
 #endif
 
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index ca91accd64fc..2dc0d90c0550 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -99,7 +99,7 @@  static void __init one_page_table_init(pmd_t *pmd)
 
 static void __init one_md_table_init(pud_t *pud)
 {
-#ifdef CONFIG_3_LEVEL_PGTABLES
+#if CONFIG_PGTABLE_LEVELS > 2
 	pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 	if (!pmd_table)
 		panic("%s: Failed to allocate %lu bytes align=%lx\n",
@@ -110,6 +110,19 @@  static void __init one_md_table_init(pud_t *pud)
 #endif
 }
 
+static void __init one_ud_table_init(p4d_t *p4d)
+{
+#if CONFIG_PGTABLE_LEVELS > 3
+	pud_t *pud_table = (pud_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+	if (!pud_table)
+		panic("%s: Failed to allocate %lu bytes align=%lx\n",
+		      __func__, PAGE_SIZE, PAGE_SIZE);
+
+	set_p4d(p4d, __p4d(_KERNPG_TABLE + (unsigned long) __pa(pud_table)));
+	BUG_ON(pud_table != pud_offset(p4d, 0));
+#endif
+}
+
 static void __init fixrange_init(unsigned long start, unsigned long end,
 				 pgd_t *pgd_base)
 {
@@ -127,6 +140,8 @@  static void __init fixrange_init(unsigned long start, unsigned long end,
 
 	for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
 		p4d = p4d_offset(pgd, vaddr);
+		if (p4d_none(*p4d))
+			one_ud_table_init(p4d);
 		pud = pud_offset(p4d, vaddr);
 		if (pud_none(*pud))
 			one_md_table_init(pud);
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 186f13268401..454ad560f627 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -28,16 +28,34 @@  config X86_64
 	def_bool 64BIT
 	select MODULES_USE_ELF_RELA
 
-config 3_LEVEL_PGTABLES
-	bool "Three-level pagetables" if !64BIT
-	default 64BIT
-	help
-	  Three-level pagetables will let UML have more than 4G of physical
-	  memory.  All the memory that can't be mapped directly will be treated
-	  as high memory.
-
-	  However, this it experimental on 32-bit architectures, so if unsure say
-	  N (on x86-64 it's automatically enabled, instead, as it's safe there).
+choice
+	prompt "Pagetable levels"
+	default 2_LEVEL_PGTABLES if !64BIT
+	default 4_LEVEL_PGTABLES if 64BIT
+
+	config 2_LEVEL_PGTABLES
+		bool "Two-level pagetables" if !64BIT
+		depends on !64BIT
+		help
+		  Two-level page table for 32-bit architectures.
+
+	config 3_LEVEL_PGTABLES
+		bool "Three-level pagetables" if 64BIT
+		help
+		  Three-level pagetables will let UML have more than 4G of physical
+		  memory.  All the memory that can't be mapped directly will be treated
+		  as high memory.
+
+		  However, this it experimental on 32-bit architectures, so if unsure say
+		  N (on x86-64 it's automatically enabled, instead, as it's safe there).
+
+	config 4_LEVEL_PGTABLES
+		bool "Four-level pagetables" if 64BIT
+		depends on 64BIT
+		help
+		  Four-level pagetables, gives a bigger address space which can be
+		  useful for some applications (e.g. ASAN).
+endchoice
 
 config ARCH_HAS_SC_SIGNALS
 	def_bool !64BIT