diff mbox series

[1/2] powerpc/fadump: allocate memory for additional parameters early

Message ID 20241104083528.99520-1-sourabhjain@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series [1/2] powerpc/fadump: allocate memory for additional parameters early | expand

Commit Message

Sourabh Jain Nov. 4, 2024, 8:35 a.m. UTC
From: Hari Bathini <hbathini@linux.ibm.com>

Memory for passing additional parameters to fadump capture kernel
is allocated during subsys_initcall level, using memblock. But
as slab is already available by this time, allocation happens via
the buddy allocator. This may work for radix MMU but is likely to
fail in most cases for hash MMU as hash MMU needs this memory in
the first memory block for it to be accesible in real mode in the
capture kernel (second boot). So, allocate memory for additional
parameters area as soon as MMU mode is obvious.

Fixes: 683eab94da75 ("powerpc/fadump: setup additional parameters for dump capture kernel")
Reported-by: Venkat Rao Bagalkote <venkat88@linux.vnet.ibm.com>
Closes: https://lore.kernel.org/lkml/a70e4064-a040-447b-8556-1fd02f19383d@linux.vnet.ibm.com/T/#u
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
---
 arch/powerpc/include/asm/fadump.h |  2 ++
 arch/powerpc/kernel/fadump.c      | 15 ++++++++++-----
 arch/powerpc/kernel/prom.c        |  3 +++
 3 files changed, 15 insertions(+), 5 deletions(-)

Comments

Michael Ellerman Nov. 5, 2024, 8:16 a.m. UTC | #1
Hi Sourabh,

Sourabh Jain <sourabhjain@linux.ibm.com> writes:
> From: Hari Bathini <hbathini@linux.ibm.com>
>
> Memory for passing additional parameters to fadump capture kernel
> is allocated during subsys_initcall level, using memblock. But
> as slab is already available by this time, allocation happens via
> the buddy allocator. This may work for radix MMU but is likely to

Does it even work for radix? If the memory has been given out the buddy
allocator then it could be overwritten at any moment. Or vice-versa,
fadump might overwrite memory used for something else.

> fail in most cases for hash MMU as hash MMU needs this memory in
> the first memory block for it to be accesible in real mode in the
> capture kernel (second boot). So, allocate memory for additional
> parameters area as soon as MMU mode is obvious.
>
> Fixes: 683eab94da75 ("powerpc/fadump: setup additional parameters for dump capture kernel")
> Reported-by: Venkat Rao Bagalkote <venkat88@linux.vnet.ibm.com>
> Closes: https://lore.kernel.org/lkml/a70e4064-a040-447b-8556-1fd02f19383d@linux.vnet.ibm.com/T/#u
> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/fadump.h |  2 ++
>  arch/powerpc/kernel/fadump.c      | 15 ++++++++++-----
>  arch/powerpc/kernel/prom.c        |  3 +++
>  3 files changed, 15 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
> index ef40c9b6972a..978102c5129e 100644
> --- a/arch/powerpc/include/asm/fadump.h
> +++ b/arch/powerpc/include/asm/fadump.h
> @@ -19,6 +19,7 @@ extern int is_fadump_active(void);
>  extern int should_fadump_crash(void);
>  extern void crash_fadump(struct pt_regs *, const char *);
>  extern void fadump_cleanup(void);
> +extern void fadump_setup_param_area(void);
 
You can drop extern on new declarations.

>  extern void fadump_append_bootargs(void);
>  
>  #else	/* CONFIG_FA_DUMP */
> @@ -26,6 +27,7 @@ static inline int is_fadump_active(void) { return 0; }
>  static inline int should_fadump_crash(void) { return 0; }
>  static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
>  static inline void fadump_cleanup(void) { }
> +static inline void fadump_setup_param_area(void) { }
>  static inline void fadump_append_bootargs(void) { }
>  #endif /* !CONFIG_FA_DUMP */
>  
> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
> index a612e7513a4f..4a3f80f42118 100644
> --- a/arch/powerpc/kernel/fadump.c
> +++ b/arch/powerpc/kernel/fadump.c
> @@ -1586,6 +1586,12 @@ static void __init fadump_init_files(void)
>  		return;
>  	}
>  
> +	if (fw_dump.param_area) {
> +		rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr);
> +		if (rc)
> +			pr_err("unable to create bootargs_append sysfs file (%d)\n", rc);
> +	}
> +
>  	debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
>  			    &fadump_region_fops);
>  
> @@ -1740,7 +1746,7 @@ static void __init fadump_process(void)
>   * Reserve memory to store additional parameters to be passed
>   * for fadump/capture kernel.
>   */
> -static void __init fadump_setup_param_area(void)
> +void __init fadump_setup_param_area(void)
>  {
>  	phys_addr_t range_start, range_end;
>  
> @@ -1748,7 +1754,7 @@ static void __init fadump_setup_param_area(void)
>  		return;
>  
>  	/* This memory can't be used by PFW or bootloader as it is shared across kernels */
> -	if (radix_enabled()) {
> +	if (early_radix_enabled()) {
>  		/*
>  		 * Anywhere in the upper half should be good enough as all memory
>  		 * is accessible in real mode.
> @@ -1776,12 +1782,12 @@ static void __init fadump_setup_param_area(void)
>  						       COMMAND_LINE_SIZE,
>  						       range_start,
>  						       range_end);
> -	if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) {
> +	if (!fw_dump.param_area) {
>  		pr_warn("WARNING: Could not setup area to pass additional parameters!\n");
>  		return;
>  	}
>  
> -	memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
> +	memset(__va(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
 
This will now be running with the MMU off, so I think it would be
clearer to not use __va() here. Using __va() will work, but only because
the CPU ignores the top bits of the address in real mode.

There are cases where it's correct to use __va() in real mode, ie. when
you're storing a pointer for later use in virtual mode, but this is not
one of those cases AFAICS.

cheers
Hari Bathini Nov. 5, 2024, 8:23 a.m. UTC | #2
On 05/11/24 1:46 pm, Michael Ellerman wrote:
> Hi Sourabh,
> 
> Sourabh Jain <sourabhjain@linux.ibm.com> writes:
>> From: Hari Bathini <hbathini@linux.ibm.com>
>>
>> Memory for passing additional parameters to fadump capture kernel
>> is allocated during subsys_initcall level, using memblock. But
>> as slab is already available by this time, allocation happens via
>> the buddy allocator. This may work for radix MMU but is likely to
> 
> Does it even work for radix? If the memory has been given out the buddy
> allocator then it could be overwritten at any moment. Or vice-versa,
> fadump might overwrite memory used for something else.

Right, Michael.
May probably not see the issue on an idle system for radix case but
it is likely to run into weird problems as soon as the memory usage
on the system goes up a bit.

Thanks
Hari

>> fail in most cases for hash MMU as hash MMU needs this memory in
>> the first memory block for it to be accesible in real mode in the
>> capture kernel (second boot). So, allocate memory for additional
>> parameters area as soon as MMU mode is obvious.
>>
>> Fixes: 683eab94da75 ("powerpc/fadump: setup additional parameters for dump capture kernel")
>> Reported-by: Venkat Rao Bagalkote <venkat88@linux.vnet.ibm.com>
>> Closes: https://lore.kernel.org/lkml/a70e4064-a040-447b-8556-1fd02f19383d@linux.vnet.ibm.com/T/#u
>> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
>> ---
>>   arch/powerpc/include/asm/fadump.h |  2 ++
>>   arch/powerpc/kernel/fadump.c      | 15 ++++++++++-----
>>   arch/powerpc/kernel/prom.c        |  3 +++
>>   3 files changed, 15 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
>> index ef40c9b6972a..978102c5129e 100644
>> --- a/arch/powerpc/include/asm/fadump.h
>> +++ b/arch/powerpc/include/asm/fadump.h
>> @@ -19,6 +19,7 @@ extern int is_fadump_active(void);
>>   extern int should_fadump_crash(void);
>>   extern void crash_fadump(struct pt_regs *, const char *);
>>   extern void fadump_cleanup(void);
>> +extern void fadump_setup_param_area(void);
>   
> You can drop extern on new declarations.
> 
>>   extern void fadump_append_bootargs(void);
>>   
>>   #else	/* CONFIG_FA_DUMP */
>> @@ -26,6 +27,7 @@ static inline int is_fadump_active(void) { return 0; }
>>   static inline int should_fadump_crash(void) { return 0; }
>>   static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
>>   static inline void fadump_cleanup(void) { }
>> +static inline void fadump_setup_param_area(void) { }
>>   static inline void fadump_append_bootargs(void) { }
>>   #endif /* !CONFIG_FA_DUMP */
>>   
>> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
>> index a612e7513a4f..4a3f80f42118 100644
>> --- a/arch/powerpc/kernel/fadump.c
>> +++ b/arch/powerpc/kernel/fadump.c
>> @@ -1586,6 +1586,12 @@ static void __init fadump_init_files(void)
>>   		return;
>>   	}
>>   
>> +	if (fw_dump.param_area) {
>> +		rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr);
>> +		if (rc)
>> +			pr_err("unable to create bootargs_append sysfs file (%d)\n", rc);
>> +	}
>> +
>>   	debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
>>   			    &fadump_region_fops);
>>   
>> @@ -1740,7 +1746,7 @@ static void __init fadump_process(void)
>>    * Reserve memory to store additional parameters to be passed
>>    * for fadump/capture kernel.
>>    */
>> -static void __init fadump_setup_param_area(void)
>> +void __init fadump_setup_param_area(void)
>>   {
>>   	phys_addr_t range_start, range_end;
>>   
>> @@ -1748,7 +1754,7 @@ static void __init fadump_setup_param_area(void)
>>   		return;
>>   
>>   	/* This memory can't be used by PFW or bootloader as it is shared across kernels */
>> -	if (radix_enabled()) {
>> +	if (early_radix_enabled()) {
>>   		/*
>>   		 * Anywhere in the upper half should be good enough as all memory
>>   		 * is accessible in real mode.
>> @@ -1776,12 +1782,12 @@ static void __init fadump_setup_param_area(void)
>>   						       COMMAND_LINE_SIZE,
>>   						       range_start,
>>   						       range_end);
>> -	if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) {
>> +	if (!fw_dump.param_area) {
>>   		pr_warn("WARNING: Could not setup area to pass additional parameters!\n");
>>   		return;
>>   	}
>>   
>> -	memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
>> +	memset(__va(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
>   
> This will now be running with the MMU off, so I think it would be
> clearer to not use __va() here. Using __va() will work, but only because
> the CPU ignores the top bits of the address in real mode.
> 
> There are cases where it's correct to use __va() in real mode, ie. when
> you're storing a pointer for later use in virtual mode, but this is not
> one of those cases AFAICS.
> 
> cheers
Sourabh Jain Nov. 6, 2024, 12:38 p.m. UTC | #3
Hello Michael and Hari,

On 05/11/24 13:46, Michael Ellerman wrote:
> Hi Sourabh,
>
> Sourabh Jain <sourabhjain@linux.ibm.com> writes:
>> From: Hari Bathini <hbathini@linux.ibm.com>
>>
>> Memory for passing additional parameters to fadump capture kernel
>> is allocated during subsys_initcall level, using memblock. But
>> as slab is already available by this time, allocation happens via
>> the buddy allocator. This may work for radix MMU but is likely to
> Does it even work for radix? If the memory has been given out the buddy
> allocator then it could be overwritten at any moment. Or vice-versa,
> fadump might overwrite memory used for something else.
>> fail in most cases for hash MMU as hash MMU needs this memory in
>> the first memory block for it to be accesible in real mode in the
>> capture kernel (second boot). So, allocate memory for additional
>> parameters area as soon as MMU mode is obvious.
>>
>> Fixes: 683eab94da75 ("powerpc/fadump: setup additional parameters for dump capture kernel")
>> Reported-by: Venkat Rao Bagalkote <venkat88@linux.vnet.ibm.com>
>> Closes: https://lore.kernel.org/lkml/a70e4064-a040-447b-8556-1fd02f19383d@linux.vnet.ibm.com/T/#u
>> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
>> ---
>>   arch/powerpc/include/asm/fadump.h |  2 ++
>>   arch/powerpc/kernel/fadump.c      | 15 ++++++++++-----
>>   arch/powerpc/kernel/prom.c        |  3 +++
>>   3 files changed, 15 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
>> index ef40c9b6972a..978102c5129e 100644
>> --- a/arch/powerpc/include/asm/fadump.h
>> +++ b/arch/powerpc/include/asm/fadump.h
>> @@ -19,6 +19,7 @@ extern int is_fadump_active(void);
>>   extern int should_fadump_crash(void);
>>   extern void crash_fadump(struct pt_regs *, const char *);
>>   extern void fadump_cleanup(void);
>> +extern void fadump_setup_param_area(void);
>   
> You can drop extern on new declarations.

Sure I will update the function declaration.

>
>>   extern void fadump_append_bootargs(void);
>>   
>>   #else	/* CONFIG_FA_DUMP */
>> @@ -26,6 +27,7 @@ static inline int is_fadump_active(void) { return 0; }
>>   static inline int should_fadump_crash(void) { return 0; }
>>   static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
>>   static inline void fadump_cleanup(void) { }
>> +static inline void fadump_setup_param_area(void) { }
>>   static inline void fadump_append_bootargs(void) { }
>>   #endif /* !CONFIG_FA_DUMP */
>>   
>> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
>> index a612e7513a4f..4a3f80f42118 100644
>> --- a/arch/powerpc/kernel/fadump.c
>> +++ b/arch/powerpc/kernel/fadump.c
>> @@ -1586,6 +1586,12 @@ static void __init fadump_init_files(void)
>>   		return;
>>   	}
>>   
>> +	if (fw_dump.param_area) {
>> +		rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr);
>> +		if (rc)
>> +			pr_err("unable to create bootargs_append sysfs file (%d)\n", rc);
>> +	}
>> +
>>   	debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
>>   			    &fadump_region_fops);
>>   
>> @@ -1740,7 +1746,7 @@ static void __init fadump_process(void)
>>    * Reserve memory to store additional parameters to be passed
>>    * for fadump/capture kernel.
>>    */
>> -static void __init fadump_setup_param_area(void)
>> +void __init fadump_setup_param_area(void)
>>   {
>>   	phys_addr_t range_start, range_end;
>>   
>> @@ -1748,7 +1754,7 @@ static void __init fadump_setup_param_area(void)
>>   		return;
>>   
>>   	/* This memory can't be used by PFW or bootloader as it is shared across kernels */
>> -	if (radix_enabled()) {
>> +	if (early_radix_enabled()) {
>>   		/*
>>   		 * Anywhere in the upper half should be good enough as all memory
>>   		 * is accessible in real mode.
>> @@ -1776,12 +1782,12 @@ static void __init fadump_setup_param_area(void)
>>   						       COMMAND_LINE_SIZE,
>>   						       range_start,
>>   						       range_end);
>> -	if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) {
>> +	if (!fw_dump.param_area) {
>>   		pr_warn("WARNING: Could not setup area to pass additional parameters!\n");
>>   		return;
>>   	}
>>   
>> -	memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
>> +	memset(__va(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
>   
> This will now be running with the MMU off, so I think it would be
> clearer to not use __va() here. Using __va() will work, but only because
> the CPU ignores the top bits of the address in real mode.

For my understanding, at what point during kernel boot the MMU is
turned on, and how can we check if it's on or off ?

>
> There are cases where it's correct to use __va() in real mode, ie. when
> you're storing a pointer for later use in virtual mode, but this is not
> one of those cases AFAICS.


Do we have any other way to reset the reserved memory that early in the 
boot?

Or should we move the memory reset for the parameter area to
fadump_setup()->fadump_init_files()  ??

@@ -1587,6 +1587,7 @@ static void __init fadump_init_files(void)
         }

         if (fw_dump.param_area) {
+              memset(__va(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
                 rc = sysfs_create_file(fadump_kobj, 
&bootargs_append_attr.attr);
                 if (rc)
                         pr_err("unable to create bootargs_append sysfs 
file (%d)\n", rc);
@@ -1787,7 +1788,6 @@ void __init fadump_setup_param_area(void)
                 return;
         }

-       memset(__va(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
  }


Thanks for the review Michael.

- Sourabh Jain
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index ef40c9b6972a..978102c5129e 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -19,6 +19,7 @@  extern int is_fadump_active(void);
 extern int should_fadump_crash(void);
 extern void crash_fadump(struct pt_regs *, const char *);
 extern void fadump_cleanup(void);
+extern void fadump_setup_param_area(void);
 extern void fadump_append_bootargs(void);
 
 #else	/* CONFIG_FA_DUMP */
@@ -26,6 +27,7 @@  static inline int is_fadump_active(void) { return 0; }
 static inline int should_fadump_crash(void) { return 0; }
 static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
 static inline void fadump_cleanup(void) { }
+static inline void fadump_setup_param_area(void) { }
 static inline void fadump_append_bootargs(void) { }
 #endif /* !CONFIG_FA_DUMP */
 
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index a612e7513a4f..4a3f80f42118 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1586,6 +1586,12 @@  static void __init fadump_init_files(void)
 		return;
 	}
 
+	if (fw_dump.param_area) {
+		rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr);
+		if (rc)
+			pr_err("unable to create bootargs_append sysfs file (%d)\n", rc);
+	}
+
 	debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
 			    &fadump_region_fops);
 
@@ -1740,7 +1746,7 @@  static void __init fadump_process(void)
  * Reserve memory to store additional parameters to be passed
  * for fadump/capture kernel.
  */
-static void __init fadump_setup_param_area(void)
+void __init fadump_setup_param_area(void)
 {
 	phys_addr_t range_start, range_end;
 
@@ -1748,7 +1754,7 @@  static void __init fadump_setup_param_area(void)
 		return;
 
 	/* This memory can't be used by PFW or bootloader as it is shared across kernels */
-	if (radix_enabled()) {
+	if (early_radix_enabled()) {
 		/*
 		 * Anywhere in the upper half should be good enough as all memory
 		 * is accessible in real mode.
@@ -1776,12 +1782,12 @@  static void __init fadump_setup_param_area(void)
 						       COMMAND_LINE_SIZE,
 						       range_start,
 						       range_end);
-	if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) {
+	if (!fw_dump.param_area) {
 		pr_warn("WARNING: Could not setup area to pass additional parameters!\n");
 		return;
 	}
 
-	memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
+	memset(__va(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
 }
 
 /*
@@ -1807,7 +1813,6 @@  int __init setup_fadump(void)
 	}
 	/* Initialize the kernel dump memory structure and register with f/w */
 	else if (fw_dump.reserve_dump_area_size) {
-		fadump_setup_param_area();
 		fw_dump.ops->fadump_init_mem_struct(&fw_dump);
 		register_fadump();
 	}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 0be07ed407c7..47db1b1aef25 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -908,6 +908,9 @@  void __init early_init_devtree(void *params)
 
 	mmu_early_init_devtree();
 
+	/* Setup param area for passing additional parameters to fadump capture kernel. */
+	fadump_setup_param_area();
+
 #ifdef CONFIG_PPC_POWERNV
 	/* Scan and build the list of machine check recoverable ranges */
 	of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL);