diff mbox series

[v3] powerpc/memhotplug: Add add_pages override for PPC

Message ID 20220629050925.31447-1-aneesh.kumar@linux.ibm.com (mailing list archive)
State Accepted
Headers show
Series [v3] powerpc/memhotplug: Add add_pages override for PPC | expand

Checks

Context Check Description
snowpatch_ozlabs/github-powerpc_ppctests success Successfully ran 10 jobs.
snowpatch_ozlabs/github-powerpc_selftests success Successfully ran 10 jobs.
snowpatch_ozlabs/github-powerpc_kernel_qemu success Successfully ran 23 jobs.
snowpatch_ozlabs/github-powerpc_clang success Successfully ran 7 jobs.
snowpatch_ozlabs/github-powerpc_sparse success Successfully ran 4 jobs.

Commit Message

Aneesh Kumar K V June 29, 2022, 5:09 a.m. UTC
With commit ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
the kernel now validate the addr against high_memory value. This results
in the below BUG_ON with dax pfns.

[  635.798741][T26531] kernel BUG at mm/page_alloc.c:5521!
1:mon> e
cpu 0x1: Vector: 700 (Program Check) at [c000000007287630]
    pc: c00000000055ed48: free_pages.part.0+0x48/0x110
    lr: c00000000053ca70: tlb_finish_mmu+0x80/0xd0
    sp: c0000000072878d0
   msr: 800000000282b033
  current = 0xc00000000afabe00
  paca    = 0xc00000037ffff300   irqmask: 0x03   irq_happened: 0x05
    pid   = 26531, comm = 50-landscape-sy
kernel BUG at :5521!
Linux version 5.19.0-rc3-14659-g4ec05be7c2e1 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #625 SMP Thu Jun 23 00:35:43 CDT 2022
1:mon> t
[link register   ] c00000000053ca70 tlb_finish_mmu+0x80/0xd0
[c0000000072878d0] c00000000053ca54 tlb_finish_mmu+0x64/0xd0 (unreliable)
[c000000007287900] c000000000539424 exit_mmap+0xe4/0x2a0
[c0000000072879e0] c00000000019fc1c mmput+0xcc/0x210
[c000000007287a20] c000000000629230 begin_new_exec+0x5e0/0xf40
[c000000007287ae0] c00000000070b3cc load_elf_binary+0x3ac/0x1e00
[c000000007287c10] c000000000627af0 bprm_execve+0x3b0/0xaf0
[c000000007287cd0] c000000000628414 do_execveat_common.isra.0+0x1e4/0x310
[c000000007287d80] c00000000062858c sys_execve+0x4c/0x60
[c000000007287db0] c00000000002c1b0 system_call_exception+0x160/0x2c0
[c000000007287e10] c00000000000c53c system_call_common+0xec/0x250

The fix is to make sure we update high_memory on memory hotplug.
This is similar to what x86 does in commit 3072e413e305 ("mm/memory_hotplug: introduce add_pages")

Fixes: ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
Changes from v2:
* drop WARN_ON_ONCE
* check for error from __add_pages

 arch/powerpc/Kconfig  |  4 ++++
 arch/powerpc/mm/mem.c | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

Comments

Kefeng Wang June 29, 2022, 6:30 a.m. UTC | #1
Hi,

On 2022/6/29 13:09, Aneesh Kumar K.V wrote:
> With commit ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
> the kernel now validate the addr against high_memory value. This results
> in the below BUG_ON with dax pfns.
>
> [  635.798741][T26531] kernel BUG at mm/page_alloc.c:5521!
> 1:mon> e
> cpu 0x1: Vector: 700 (Program Check) at [c000000007287630]
>      pc: c00000000055ed48: free_pages.part.0+0x48/0x110
>      lr: c00000000053ca70: tlb_finish_mmu+0x80/0xd0
>      sp: c0000000072878d0
>     msr: 800000000282b033
>    current = 0xc00000000afabe00
>    paca    = 0xc00000037ffff300   irqmask: 0x03   irq_happened: 0x05
>      pid   = 26531, comm = 50-landscape-sy
> kernel BUG at :5521!
> Linux version 5.19.0-rc3-14659-g4ec05be7c2e1 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #625 SMP Thu Jun 23 00:35:43 CDT 2022
> 1:mon> t
> [link register   ] c00000000053ca70 tlb_finish_mmu+0x80/0xd0
> [c0000000072878d0] c00000000053ca54 tlb_finish_mmu+0x64/0xd0 (unreliable)
> [c000000007287900] c000000000539424 exit_mmap+0xe4/0x2a0
> [c0000000072879e0] c00000000019fc1c mmput+0xcc/0x210
> [c000000007287a20] c000000000629230 begin_new_exec+0x5e0/0xf40
> [c000000007287ae0] c00000000070b3cc load_elf_binary+0x3ac/0x1e00
> [c000000007287c10] c000000000627af0 bprm_execve+0x3b0/0xaf0
> [c000000007287cd0] c000000000628414 do_execveat_common.isra.0+0x1e4/0x310
> [c000000007287d80] c00000000062858c sys_execve+0x4c/0x60
> [c000000007287db0] c00000000002c1b0 system_call_exception+0x160/0x2c0
> [c000000007287e10] c00000000000c53c system_call_common+0xec/0x250
>
> The fix is to make sure we update high_memory on memory hotplug.
> This is similar to what x86 does in commit 3072e413e305 ("mm/memory_hotplug: introduce add_pages")
>
> Fixes: ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
> Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> Changes from v2:
> * drop WARN_ON_ONCE
> * check for error from __add_pages
>
>   arch/powerpc/Kconfig  |  4 ++++
>   arch/powerpc/mm/mem.c | 33 ++++++++++++++++++++++++++++++++-
>   2 files changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index c2ce2e60c8f0..7aa12e88c580 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -358,6 +358,10 @@ config ARCH_SUSPEND_NONZERO_CPU
>   	def_bool y
>   	depends on PPC_POWERNV || PPC_PSERIES
>   
> +config ARCH_HAS_ADD_PAGES
> +	def_bool y
> +	depends on ARCH_ENABLE_MEMORY_HOTPLUG
> +
>   config PPC_DCR_NATIVE
>   	bool
>   
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 52b77684acda..a97128a48817 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -105,6 +105,37 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
>   	vm_unmap_aliases();
>   }
>   
> +/*
> + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
> + * updating.
> + */
> +static void update_end_of_memory_vars(u64 start, u64 size)
> +{
> +	unsigned long end_pfn = PFN_UP(start + size);
> +
> +	if (end_pfn > max_pfn) {
> +		max_pfn = end_pfn;
> +		max_low_pfn = end_pfn;
> +		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
> +	}
> +}
> +
> +int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
> +		    struct mhp_params *params)
> +{
> +	int ret;
                 int ret = -EINVAL;
> +
> +	ret = __add_pages(nid, start_pfn, nr_pages, params);
> +	if (ret)
> +		return ret;
> +
> +	/* update max_pfn, max_low_pfn and high_memory */
> +	update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
> +				  nr_pages << PAGE_SHIFT);
> +
> +	return ret;
> +}
> +
and could we only call update_end_of_memory_vars() in arch_add_memory()?
>   int __ref arch_add_memory(int nid, u64 start, u64 size,
>   			  struct mhp_params *params)
>   {
> @@ -115,7 +146,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
>   	rc = arch_create_linear_mapping(nid, start, size, params);
>   	if (rc)
>   		return rc;
> -	rc = __add_pages(nid, start_pfn, nr_pages, params);
> +	rc = add_pages(nid, start_pfn, nr_pages, params);
>   	if (rc)
>   		arch_remove_linear_mapping(start, size);

if (!rc)

     update_end_of_memory_vars(start_pfn << PAGE_SHIFT, nr_pages << 
PAGE_SHIFT);

else

     arch_remove_linear_mapping(start, size);

Thanks

>   	return rc;
Aneesh Kumar K V June 29, 2022, 7:14 a.m. UTC | #2
On 6/29/22 12:00 PM, Kefeng Wang wrote:
> Hi,
> 
> On 2022/6/29 13:09, Aneesh Kumar K.V wrote:
>> With commit ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
>> the kernel now validate the addr against high_memory value. This results
>> in the below BUG_ON with dax pfns.
>>
>> [  635.798741][T26531] kernel BUG at mm/page_alloc.c:5521!
>> 1:mon> e
>> cpu 0x1: Vector: 700 (Program Check) at [c000000007287630]
>>      pc: c00000000055ed48: free_pages.part.0+0x48/0x110
>>      lr: c00000000053ca70: tlb_finish_mmu+0x80/0xd0
>>      sp: c0000000072878d0
>>     msr: 800000000282b033
>>    current = 0xc00000000afabe00
>>    paca    = 0xc00000037ffff300   irqmask: 0x03   irq_happened: 0x05
>>      pid   = 26531, comm = 50-landscape-sy
>> kernel BUG at :5521!
>> Linux version 5.19.0-rc3-14659-g4ec05be7c2e1 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #625 SMP Thu Jun 23 00:35:43 CDT 2022
>> 1:mon> t
>> [link register   ] c00000000053ca70 tlb_finish_mmu+0x80/0xd0
>> [c0000000072878d0] c00000000053ca54 tlb_finish_mmu+0x64/0xd0 (unreliable)
>> [c000000007287900] c000000000539424 exit_mmap+0xe4/0x2a0
>> [c0000000072879e0] c00000000019fc1c mmput+0xcc/0x210
>> [c000000007287a20] c000000000629230 begin_new_exec+0x5e0/0xf40
>> [c000000007287ae0] c00000000070b3cc load_elf_binary+0x3ac/0x1e00
>> [c000000007287c10] c000000000627af0 bprm_execve+0x3b0/0xaf0
>> [c000000007287cd0] c000000000628414 do_execveat_common.isra.0+0x1e4/0x310
>> [c000000007287d80] c00000000062858c sys_execve+0x4c/0x60
>> [c000000007287db0] c00000000002c1b0 system_call_exception+0x160/0x2c0
>> [c000000007287e10] c00000000000c53c system_call_common+0xec/0x250
>>
>> The fix is to make sure we update high_memory on memory hotplug.
>> This is similar to what x86 does in commit 3072e413e305 ("mm/memory_hotplug: introduce add_pages")
>>
>> Fixes: ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
>> Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
>> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>> Changes from v2:
>> * drop WARN_ON_ONCE
>> * check for error from __add_pages
>>
>>   arch/powerpc/Kconfig  |  4 ++++
>>   arch/powerpc/mm/mem.c | 33 ++++++++++++++++++++++++++++++++-
>>   2 files changed, 36 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index c2ce2e60c8f0..7aa12e88c580 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -358,6 +358,10 @@ config ARCH_SUSPEND_NONZERO_CPU
>>       def_bool y
>>       depends on PPC_POWERNV || PPC_PSERIES
>>   +config ARCH_HAS_ADD_PAGES
>> +    def_bool y
>> +    depends on ARCH_ENABLE_MEMORY_HOTPLUG
>> +
>>   config PPC_DCR_NATIVE
>>       bool
>>   diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
>> index 52b77684acda..a97128a48817 100644
>> --- a/arch/powerpc/mm/mem.c
>> +++ b/arch/powerpc/mm/mem.c
>> @@ -105,6 +105,37 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
>>       vm_unmap_aliases();
>>   }
>>   +/*
>> + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
>> + * updating.
>> + */
>> +static void update_end_of_memory_vars(u64 start, u64 size)
>> +{
>> +    unsigned long end_pfn = PFN_UP(start + size);
>> +
>> +    if (end_pfn > max_pfn) {
>> +        max_pfn = end_pfn;
>> +        max_low_pfn = end_pfn;
>> +        high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
>> +    }
>> +}
>> +
>> +int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
>> +            struct mhp_params *params)
>> +{
>> +    int ret;
>                 int ret = -EINVAL;
>> +
>> +    ret = __add_pages(nid, start_pfn, nr_pages, params);
>> +    if (ret)
>> +        return ret;
>> +

considering we are updating ret immediately why should we initialize that to EINVAL?

	int ret = -EINVAL;
	ret = __add_pages(nid, start_pfn, nr_pages, params);
	


>> +    /* update max_pfn, max_low_pfn and high_memory */
>> +    update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
>> +                  nr_pages << PAGE_SHIFT);
>> +
>> +    return ret;
>> +}
>> +
> and could we only call update_end_of_memory_vars() in arch_add_memory()?
>>   int __ref arch_add_memory(int nid, u64 start, u64 size,
>>                 struct mhp_params *params)
>>   {
>> @@ -115,7 +146,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
>>       rc = arch_create_linear_mapping(nid, start, size, params);
>>       if (rc)
>>           return rc;
>> -    rc = __add_pages(nid, start_pfn, nr_pages, params);
>> +    rc = add_pages(nid, start_pfn, nr_pages, params);
>>       if (rc)
>>           arch_remove_linear_mapping(start, size);
> 
> if (!rc)
> 
>     update_end_of_memory_vars(start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
> 
> else
> 
>     arch_remove_linear_mapping(start, size);
> 
> Thanks
> 

commit 3072e413e305 goes into the details of why it is done in add_pages

mm/memory_hotplug: introduce add_pages

There are new users of memory hotplug emerging.  Some of them require
different subset of arch_add_memory.  There are some which only require
allocation of struct pages without mapping those pages to the kernel
address space.  We currently have __add_pages for that purpose.  But this
is rather lowlevel and not very suitable for the code outside of the
memory hotplug.  E.g.  x86_64 wants to update max_pfn which should be done
by the caller.  Introduce add_pages() which should care about those
details if they are needed.  Each architecture should define its
implementation and select CONFIG_ARCH_HAS_ADD_PAGES.  All others use the
currently existing __add_pages.

We could debate whether max_pfn/high_memory should encompass device private memory too.
But the current code on x86 does that and I would also expect virt_addr_valid to return
true for the address mapping private device memory. 



>>       return rc;
Kefeng Wang June 29, 2022, 7:29 a.m. UTC | #3
On 2022/6/29 15:14, Aneesh Kumar K V wrote:
> On 6/29/22 12:00 PM, Kefeng Wang wrote:
>> Hi,
>>
>> On 2022/6/29 13:09, Aneesh Kumar K.V wrote:
>>> With commit ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
>>> the kernel now validate the addr against high_memory value. This results
>>> in the below BUG_ON with dax pfns.
>>>
>>> [  635.798741][T26531] kernel BUG at mm/page_alloc.c:5521!
>>> 1:mon> e
>>> cpu 0x1: Vector: 700 (Program Check) at [c000000007287630]
>>>       pc: c00000000055ed48: free_pages.part.0+0x48/0x110
>>>       lr: c00000000053ca70: tlb_finish_mmu+0x80/0xd0
>>>       sp: c0000000072878d0
>>>      msr: 800000000282b033
>>>     current = 0xc00000000afabe00
>>>     paca    = 0xc00000037ffff300   irqmask: 0x03   irq_happened: 0x05
>>>       pid   = 26531, comm = 50-landscape-sy
>>> kernel BUG at :5521!
>>> Linux version 5.19.0-rc3-14659-g4ec05be7c2e1 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #625 SMP Thu Jun 23 00:35:43 CDT 2022
>>> 1:mon> t
>>> [link register   ] c00000000053ca70 tlb_finish_mmu+0x80/0xd0
>>> [c0000000072878d0] c00000000053ca54 tlb_finish_mmu+0x64/0xd0 (unreliable)
>>> [c000000007287900] c000000000539424 exit_mmap+0xe4/0x2a0
>>> [c0000000072879e0] c00000000019fc1c mmput+0xcc/0x210
>>> [c000000007287a20] c000000000629230 begin_new_exec+0x5e0/0xf40
>>> [c000000007287ae0] c00000000070b3cc load_elf_binary+0x3ac/0x1e00
>>> [c000000007287c10] c000000000627af0 bprm_execve+0x3b0/0xaf0
>>> [c000000007287cd0] c000000000628414 do_execveat_common.isra.0+0x1e4/0x310
>>> [c000000007287d80] c00000000062858c sys_execve+0x4c/0x60
>>> [c000000007287db0] c00000000002c1b0 system_call_exception+0x160/0x2c0
>>> [c000000007287e10] c00000000000c53c system_call_common+0xec/0x250
>>>
>>> The fix is to make sure we update high_memory on memory hotplug.
>>> This is similar to what x86 does in commit 3072e413e305 ("mm/memory_hotplug: introduce add_pages")
>>>
>>> Fixes: ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
>>> Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
>>> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>>> ---
>>> Changes from v2:
>>> * drop WARN_ON_ONCE
>>> * check for error from __add_pages
>>>
>>>    arch/powerpc/Kconfig  |  4 ++++
>>>    arch/powerpc/mm/mem.c | 33 ++++++++++++++++++++++++++++++++-
>>>    2 files changed, 36 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>>> index c2ce2e60c8f0..7aa12e88c580 100644
>>> --- a/arch/powerpc/Kconfig
>>> +++ b/arch/powerpc/Kconfig
>>> @@ -358,6 +358,10 @@ config ARCH_SUSPEND_NONZERO_CPU
>>>        def_bool y
>>>        depends on PPC_POWERNV || PPC_PSERIES
>>>    +config ARCH_HAS_ADD_PAGES
>>> +    def_bool y
>>> +    depends on ARCH_ENABLE_MEMORY_HOTPLUG
>>> +
>>>    config PPC_DCR_NATIVE
>>>        bool
>>>    diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
>>> index 52b77684acda..a97128a48817 100644
>>> --- a/arch/powerpc/mm/mem.c
>>> +++ b/arch/powerpc/mm/mem.c
>>> @@ -105,6 +105,37 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
>>>        vm_unmap_aliases();
>>>    }
>>>    +/*
>>> + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
>>> + * updating.
>>> + */
>>> +static void update_end_of_memory_vars(u64 start, u64 size)
>>> +{
>>> +    unsigned long end_pfn = PFN_UP(start + size);
>>> +
>>> +    if (end_pfn > max_pfn) {
>>> +        max_pfn = end_pfn;
>>> +        max_low_pfn = end_pfn;
>>> +        high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
>>> +    }
>>> +}
>>> +
>>> +int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
>>> +            struct mhp_params *params)
>>> +{
>>> +    int ret;
>>                  int ret = -EINVAL;
>>> +
>>> +    ret = __add_pages(nid, start_pfn, nr_pages, params);
>>> +    if (ret)
>>> +        return ret;
>>> +
> considering we are updating ret immediately why should we initialize that to EINVAL?
>
> 	int ret = -EINVAL;
> 	ret = __add_pages(nid, start_pfn, nr_pages, params);
> 	
Ignore it, I don't know what I was thinking...
>
>
>>> +    /* update max_pfn, max_low_pfn and high_memory */
>>> +    update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
>>> +                  nr_pages << PAGE_SHIFT);
>>> +
>>> +    return ret;
>>> +}
>>> +
>> and could we only call update_end_of_memory_vars() in arch_add_memory()?
>>>    int __ref arch_add_memory(int nid, u64 start, u64 size,
>>>                  struct mhp_params *params)
>>>    {
>>> @@ -115,7 +146,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
>>>        rc = arch_create_linear_mapping(nid, start, size, params);
>>>        if (rc)
>>>            return rc;
>>> -    rc = __add_pages(nid, start_pfn, nr_pages, params);
>>> +    rc = add_pages(nid, start_pfn, nr_pages, params);
>>>        if (rc)
>>>            arch_remove_linear_mapping(start, size);
>> if (!rc)
>>
>>      update_end_of_memory_vars(start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
>>
>> else
>>
>>      arch_remove_linear_mapping(start, size);
>>
>> Thanks
>>
> commit 3072e413e305 goes into the details of why it is done in add_pages
>
> mm/memory_hotplug: introduce add_pages
>
> There are new users of memory hotplug emerging.  Some of them require
> different subset of arch_add_memory.  There are some which only require
> allocation of struct pages without mapping those pages to the kernel
> address space.  We currently have __add_pages for that purpose.  But this
> is rather lowlevel and not very suitable for the code outside of the
> memory hotplug.  E.g.  x86_64 wants to update max_pfn which should be done
> by the caller.  Introduce add_pages() which should care about those
> details if they are needed.  Each architecture should define its
> implementation and select CONFIG_ARCH_HAS_ADD_PAGES.  All others use the
> currently existing __add_pages.
>
> We could debate whether max_pfn/high_memory should encompass device private memory too.
> But the current code on x86 does that and I would also expect virt_addr_valid to return
> true for the address mapping private device memory.
>
OK, I see for private device memory will call add_pages(), which should 
update
the max_pfn/high_memory, not only for arch_add_memory(). Maybe some 
other archs
have the same problem.
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>


>
>>>        return rc;
> .
Michael Ellerman June 29, 2022, 12:14 p.m. UTC | #4
On Wed, 29 Jun 2022 10:39:25 +0530, Aneesh Kumar K.V wrote:
> With commit ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
> the kernel now validate the addr against high_memory value. This results
> in the below BUG_ON with dax pfns.
> 
> [  635.798741][T26531] kernel BUG at mm/page_alloc.c:5521!
> 1:mon> e
> cpu 0x1: Vector: 700 (Program Check) at [c000000007287630]
>     pc: c00000000055ed48: free_pages.part.0+0x48/0x110
>     lr: c00000000053ca70: tlb_finish_mmu+0x80/0xd0
>     sp: c0000000072878d0
>    msr: 800000000282b033
>   current = 0xc00000000afabe00
>   paca    = 0xc00000037ffff300   irqmask: 0x03   irq_happened: 0x05
>     pid   = 26531, comm = 50-landscape-sy
> kernel BUG at :5521!
> Linux version 5.19.0-rc3-14659-g4ec05be7c2e1 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #625 SMP Thu Jun 23 00:35:43 CDT 2022
> 1:mon> t
> [link register   ] c00000000053ca70 tlb_finish_mmu+0x80/0xd0
> [c0000000072878d0] c00000000053ca54 tlb_finish_mmu+0x64/0xd0 (unreliable)
> [c000000007287900] c000000000539424 exit_mmap+0xe4/0x2a0
> [c0000000072879e0] c00000000019fc1c mmput+0xcc/0x210
> [c000000007287a20] c000000000629230 begin_new_exec+0x5e0/0xf40
> [c000000007287ae0] c00000000070b3cc load_elf_binary+0x3ac/0x1e00
> [c000000007287c10] c000000000627af0 bprm_execve+0x3b0/0xaf0
> [c000000007287cd0] c000000000628414 do_execveat_common.isra.0+0x1e4/0x310
> [c000000007287d80] c00000000062858c sys_execve+0x4c/0x60
> [c000000007287db0] c00000000002c1b0 system_call_exception+0x160/0x2c0
> [c000000007287e10] c00000000000c53c system_call_common+0xec/0x250
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/memhotplug: Add add_pages override for PPC
      https://git.kernel.org/powerpc/c/ac790d09885d36143076e7e02825c541e8eee899

cheers
Oscar Salvador June 29, 2022, 2:13 p.m. UTC | #5
On Wed, Jun 29, 2022 at 10:39:25AM +0530, Aneesh Kumar K.V wrote:
> With commit ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
> the kernel now validate the addr against high_memory value. This results
> in the below BUG_ON with dax pfns.
> 
> [  635.798741][T26531] kernel BUG at mm/page_alloc.c:5521!
> 1:mon> e
> cpu 0x1: Vector: 700 (Program Check) at [c000000007287630]
>     pc: c00000000055ed48: free_pages.part.0+0x48/0x110
>     lr: c00000000053ca70: tlb_finish_mmu+0x80/0xd0
>     sp: c0000000072878d0
>    msr: 800000000282b033
>   current = 0xc00000000afabe00
>   paca    = 0xc00000037ffff300   irqmask: 0x03   irq_happened: 0x05
>     pid   = 26531, comm = 50-landscape-sy
> kernel BUG at :5521!
> Linux version 5.19.0-rc3-14659-g4ec05be7c2e1 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #625 SMP Thu Jun 23 00:35:43 CDT 2022
> 1:mon> t
> [link register   ] c00000000053ca70 tlb_finish_mmu+0x80/0xd0
> [c0000000072878d0] c00000000053ca54 tlb_finish_mmu+0x64/0xd0 (unreliable)
> [c000000007287900] c000000000539424 exit_mmap+0xe4/0x2a0
> [c0000000072879e0] c00000000019fc1c mmput+0xcc/0x210
> [c000000007287a20] c000000000629230 begin_new_exec+0x5e0/0xf40
> [c000000007287ae0] c00000000070b3cc load_elf_binary+0x3ac/0x1e00
> [c000000007287c10] c000000000627af0 bprm_execve+0x3b0/0xaf0
> [c000000007287cd0] c000000000628414 do_execveat_common.isra.0+0x1e4/0x310
> [c000000007287d80] c00000000062858c sys_execve+0x4c/0x60
> [c000000007287db0] c00000000002c1b0 system_call_exception+0x160/0x2c0
> [c000000007287e10] c00000000000c53c system_call_common+0xec/0x250
> 
> The fix is to make sure we update high_memory on memory hotplug.
> This is similar to what x86 does in commit 3072e413e305 ("mm/memory_hotplug: introduce add_pages")
> 
> Fixes: ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
> Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Reviewed-by: Oscar Salvador <osalvador@suse.de>

> ---
> Changes from v2:
> * drop WARN_ON_ONCE
> * check for error from __add_pages
> 
>  arch/powerpc/Kconfig  |  4 ++++
>  arch/powerpc/mm/mem.c | 33 ++++++++++++++++++++++++++++++++-
>  2 files changed, 36 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index c2ce2e60c8f0..7aa12e88c580 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -358,6 +358,10 @@ config ARCH_SUSPEND_NONZERO_CPU
>  	def_bool y
>  	depends on PPC_POWERNV || PPC_PSERIES
>  
> +config ARCH_HAS_ADD_PAGES
> +	def_bool y
> +	depends on ARCH_ENABLE_MEMORY_HOTPLUG
> +
>  config PPC_DCR_NATIVE
>  	bool
>  
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 52b77684acda..a97128a48817 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -105,6 +105,37 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
>  	vm_unmap_aliases();
>  }
>  
> +/*
> + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
> + * updating.
> + */
> +static void update_end_of_memory_vars(u64 start, u64 size)
> +{
> +	unsigned long end_pfn = PFN_UP(start + size);
> +
> +	if (end_pfn > max_pfn) {
> +		max_pfn = end_pfn;
> +		max_low_pfn = end_pfn;
> +		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
> +	}
> +}
> +
> +int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
> +		    struct mhp_params *params)
> +{
> +	int ret;
> +
> +	ret = __add_pages(nid, start_pfn, nr_pages, params);
> +	if (ret)
> +		return ret;
> +
> +	/* update max_pfn, max_low_pfn and high_memory */
> +	update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
> +				  nr_pages << PAGE_SHIFT);
> +
> +	return ret;
> +}
> +
>  int __ref arch_add_memory(int nid, u64 start, u64 size,
>  			  struct mhp_params *params)
>  {
> @@ -115,7 +146,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
>  	rc = arch_create_linear_mapping(nid, start, size, params);
>  	if (rc)
>  		return rc;
> -	rc = __add_pages(nid, start_pfn, nr_pages, params);
> +	rc = add_pages(nid, start_pfn, nr_pages, params);
>  	if (rc)
>  		arch_remove_linear_mapping(start, size);
>  	return rc;
> -- 
> 2.36.1
> 
>
diff mbox series

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c2ce2e60c8f0..7aa12e88c580 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -358,6 +358,10 @@  config ARCH_SUSPEND_NONZERO_CPU
 	def_bool y
 	depends on PPC_POWERNV || PPC_PSERIES
 
+config ARCH_HAS_ADD_PAGES
+	def_bool y
+	depends on ARCH_ENABLE_MEMORY_HOTPLUG
+
 config PPC_DCR_NATIVE
 	bool
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 52b77684acda..a97128a48817 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -105,6 +105,37 @@  void __ref arch_remove_linear_mapping(u64 start, u64 size)
 	vm_unmap_aliases();
 }
 
+/*
+ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
+ * updating.
+ */
+static void update_end_of_memory_vars(u64 start, u64 size)
+{
+	unsigned long end_pfn = PFN_UP(start + size);
+
+	if (end_pfn > max_pfn) {
+		max_pfn = end_pfn;
+		max_low_pfn = end_pfn;
+		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+	}
+}
+
+int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+		    struct mhp_params *params)
+{
+	int ret;
+
+	ret = __add_pages(nid, start_pfn, nr_pages, params);
+	if (ret)
+		return ret;
+
+	/* update max_pfn, max_low_pfn and high_memory */
+	update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
+				  nr_pages << PAGE_SHIFT);
+
+	return ret;
+}
+
 int __ref arch_add_memory(int nid, u64 start, u64 size,
 			  struct mhp_params *params)
 {
@@ -115,7 +146,7 @@  int __ref arch_add_memory(int nid, u64 start, u64 size,
 	rc = arch_create_linear_mapping(nid, start, size, params);
 	if (rc)
 		return rc;
-	rc = __add_pages(nid, start_pfn, nr_pages, params);
+	rc = add_pages(nid, start_pfn, nr_pages, params);
 	if (rc)
 		arch_remove_linear_mapping(start, size);
 	return rc;