diff mbox series

[RFC,v0,2/5] powerpc/mm/radix: Create separate mappings for hot-plugged memory

Message ID 20200406034925.22586-3-bharata@linux.ibm.com (mailing list archive)
State Changes Requested
Headers show
Series powerpc/mm/radix: Memory unplug fixes | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (2c0ce4ff35994a7b12cc9879ced52c9e7c2e6667)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 1 warnings, 2 checks, 33 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Bharata B Rao April 6, 2020, 3:49 a.m. UTC
Memory that gets hot-plugged _during_ boot (and not the memory
that gets plugged in after boot), is mapped with 1G mappings
and will undergo splitting when it is unplugged. The splitting
code has a few issues:

1. Recursive locking
--------------------
Memory unplug path takes cpu_hotplug_lock and calls stop_machine()
for splitting the mappings. However stop_machine() takes
cpu_hotplug_lock again causing deadlock.

2. BUG: sleeping function called from in_atomic() context
---------------------------------------------------------
Memory unplug path (remove_pagetable) takes init_mm.page_table_lock
spinlock and later calls stop_machine() which does wait_for_completion()

3. Bad unlock unbalance
-----------------------
Memory unplug path takes init_mm.page_table_lock spinlock and calls
stop_machine(). The stop_machine thread function runs in a different
thread context (migration thread) which tries to release and reaquire
ptl. Releasing ptl from a different thread than which acquired it
causes bad unlock unbalance.

These problems can be avoided if we avoid mapping hot-plugged memory
with 1G mapping, thereby removing the need for splitting them during
unplug. During radix init, identify(*) the hot-plugged memory region
and create separate mappings for each LMB so that they don't get mapped
with 1G mappings.

To create separate mappings for every LMB in the hot-plugged
region, we need lmb-size. I am currently using memory_block_size_bytes()
API to get the lmb-size. Since this is early init time code, the
machine type isn't probed yet and hence memory_block_size_bytes()
would return the default LMB size as 16MB. Hence we end up creating
separate mappings at much lower granularity than what we can ideally
do for pseries machine.

(*) Identifying and differentiating hot-plugged memory from the
boot time memory is now possible with PAPR extension to LMB flags.
(Ref: https://lore.kernel.org/linuxppc-dev/f55a7b65a43cc9dc7b22385cf9960f8b11d5ce2e.camel@linux.ibm.com/T/#t)

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 arch/powerpc/mm/book3s64/radix_pgtable.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

Comments

Aneesh Kumar K V June 22, 2020, 12:46 p.m. UTC | #1
Bharata B Rao <bharata@linux.ibm.com> writes:

> Memory that gets hot-plugged _during_ boot (and not the memory
> that gets plugged in after boot), is mapped with 1G mappings
> and will undergo splitting when it is unplugged. The splitting
> code has a few issues:
>
> 1. Recursive locking
> --------------------
> Memory unplug path takes cpu_hotplug_lock and calls stop_machine()
> for splitting the mappings. However stop_machine() takes
> cpu_hotplug_lock again causing deadlock.
>
> 2. BUG: sleeping function called from in_atomic() context
> ---------------------------------------------------------
> Memory unplug path (remove_pagetable) takes init_mm.page_table_lock
> spinlock and later calls stop_machine() which does wait_for_completion()
>
> 3. Bad unlock unbalance
> -----------------------
> Memory unplug path takes init_mm.page_table_lock spinlock and calls
> stop_machine(). The stop_machine thread function runs in a different
> thread context (migration thread) which tries to release and reaquire
> ptl. Releasing ptl from a different thread than which acquired it
> causes bad unlock unbalance.
>
> These problems can be avoided if we avoid mapping hot-plugged memory
> with 1G mapping, thereby removing the need for splitting them during
> unplug. During radix init, identify(*) the hot-plugged memory region
> and create separate mappings for each LMB so that they don't get mapped
> with 1G mappings.
>
> To create separate mappings for every LMB in the hot-plugged
> region, we need lmb-size. I am currently using memory_block_size_bytes()
> API to get the lmb-size. Since this is early init time code, the
> machine type isn't probed yet and hence memory_block_size_bytes()
> would return the default LMB size as 16MB. Hence we end up creating
> separate mappings at much lower granularity than what we can ideally
> do for pseries machine.
>
> (*) Identifying and differentiating hot-plugged memory from the
> boot time memory is now possible with PAPR extension to LMB flags.
> (Ref: https://lore.kernel.org/linuxppc-dev/f55a7b65a43cc9dc7b22385cf9960f8b11d5ce2e.camel@linux.ibm.com/T/#t)
>

Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

> Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> ---
>  arch/powerpc/mm/book3s64/radix_pgtable.c | 15 ++++++++++++---
>  1 file changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index dd1bea45325c..4a4fb30f6c3d 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -16,6 +16,7 @@
>  #include <linux/hugetlb.h>
>  #include <linux/string_helpers.h>
>  #include <linux/stop_machine.h>
> +#include <linux/memory.h>
>  
>  #include <asm/pgtable.h>
>  #include <asm/pgalloc.h>
> @@ -313,6 +314,8 @@ static void __init radix_init_pgtable(void)
>  {
>  	unsigned long rts_field;
>  	struct memblock_region *reg;
> +	phys_addr_t addr;
> +	u64 lmb_size = memory_block_size_bytes();
>  
>  	/* We don't support slb for radix */
>  	mmu_slb_size = 0;
> @@ -331,9 +334,15 @@ static void __init radix_init_pgtable(void)
>  			continue;
>  		}
>  
> -		WARN_ON(create_physical_mapping(reg->base,
> -						reg->base + reg->size,
> -						-1));
> +		if (memblock_is_hotpluggable(reg)) {
> +			for (addr = reg->base; addr < (reg->base + reg->size);
> +				addr += lmb_size)
> +				WARN_ON(create_physical_mapping(addr,
> +				addr + lmb_size, -1));

Is that indentation correct? 

> +		} else
> +			WARN_ON(create_physical_mapping(reg->base,
> +							reg->base + reg->size,
> +							-1));
>  	}
>  
>  	/* Find out how many PID bits are supported */
> -- 
> 2.21.0
diff mbox series

Patch

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index dd1bea45325c..4a4fb30f6c3d 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -16,6 +16,7 @@ 
 #include <linux/hugetlb.h>
 #include <linux/string_helpers.h>
 #include <linux/stop_machine.h>
+#include <linux/memory.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -313,6 +314,8 @@  static void __init radix_init_pgtable(void)
 {
 	unsigned long rts_field;
 	struct memblock_region *reg;
+	phys_addr_t addr;
+	u64 lmb_size = memory_block_size_bytes();
 
 	/* We don't support slb for radix */
 	mmu_slb_size = 0;
@@ -331,9 +334,15 @@  static void __init radix_init_pgtable(void)
 			continue;
 		}
 
-		WARN_ON(create_physical_mapping(reg->base,
-						reg->base + reg->size,
-						-1));
+		if (memblock_is_hotpluggable(reg)) {
+			for (addr = reg->base; addr < (reg->base + reg->size);
+				addr += lmb_size)
+				WARN_ON(create_physical_mapping(addr,
+				addr + lmb_size, -1));
+		} else
+			WARN_ON(create_physical_mapping(reg->base,
+							reg->base + reg->size,
+							-1));
 	}
 
 	/* Find out how many PID bits are supported */