Message ID | 159466088775.24747.1248185448154277951.stgit@hbathini.in.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | ppc64: enable kdump support for kexec_file_load syscall | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch powerpc/merge (71d6070a8e0e0a1ed82365544f97b86475cb161e) |
snowpatch_ozlabs/checkpatch | warning | total: 0 errors, 0 warnings, 2 checks, 403 lines checked |
snowpatch_ozlabs/needsstable | success | Patch has no Fixes tags |
Hari Bathini <hbathini@linux.ibm.com> writes: > diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h > new file mode 100644 > index 0000000..90deb46 > --- /dev/null > +++ b/arch/powerpc/include/asm/crashdump-ppc64.h > @@ -0,0 +1,10 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H > +#define _ASM_POWERPC_CRASHDUMP_PPC64_H > + > +/* min & max addresses for kdump load segments */ > +#define KDUMP_BUF_MIN (crashk_res.start) > +#define KDUMP_BUF_MAX ((crashk_res.end < ppc64_rma_size) ? \ > + crashk_res.end : (ppc64_rma_size - 1)) > + > +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */ > diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h > index 7008ea1..bf47a01 100644 > --- a/arch/powerpc/include/asm/kexec.h > +++ b/arch/powerpc/include/asm/kexec.h > @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co > #ifdef CONFIG_KEXEC_FILE > extern const struct kexec_file_ops kexec_elf64_ops; > > -#ifdef CONFIG_IMA_KEXEC > #define ARCH_HAS_KIMAGE_ARCH > > struct kimage_arch { > + struct crash_mem *exclude_ranges; > + > +#ifdef CONFIG_IMA_KEXEC > phys_addr_t ima_buffer_addr; > size_t ima_buffer_size; > -}; > #endif > +}; > > int setup_purgatory(struct kimage *image, const void *slave_code, > const void *fdt, unsigned long kernel_load_addr, > @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, > unsigned long initrd_load_addr, > unsigned long initrd_len, const char *cmdline); > #endif /* CONFIG_PPC64 */ > + > #endif /* CONFIG_KEXEC_FILE */ > > #else /* !CONFIG_KEXEC_CORE */ > diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c > index 23ad04c..c695f94 100644 > --- a/arch/powerpc/kexec/elf_64.c > +++ b/arch/powerpc/kexec/elf_64.c > @@ -22,6 +22,7 @@ > #include <linux/of_fdt.h> > #include <linux/slab.h> > #include <linux/types.h> > +#include <asm/crashdump-ppc64.h> > > static void *elf64_load(struct kimage *image, char *kernel_buf, > unsigned long kernel_len, char *initrd, > @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, > if (ret) > goto out; > > + if (image->type == KEXEC_TYPE_CRASH) { > + /* min & max buffer values for kdump case */ > + kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN; > + kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX; This is only my personal opinion and an actual maintainer may disagree, but just looking at the lines above, I would assume that KDUMP_BUF_MIN and KDUMP_BUF_MAX were constants, when in fact they aren't. I suggest using static inline macros in <asm/crashdump-ppc64.h>, for example: static inline resource_size_t get_kdump_buf_min(void) { return crashk_res.start; } static inline resource_size_t get_kdump_buf_max(void) { return (crashk_res.end < ppc64_rma_size) ? \ crashk_res.end : (ppc64_rma_size - 1) } > + } > + > ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); > if (ret) > goto out; <snip> > +/** > + * __locate_mem_hole_top_down - Looks top down for a large enough memory hole > + * in the memory regions between buf_min & buf_max > + * for the buffer. If found, sets kbuf->mem. > + * @kbuf: Buffer contents and memory parameters. > + * @buf_min: Minimum address for the buffer. > + * @buf_max: Maximum address for the buffer. > + * > + * Returns 0 on success, negative errno on error. > + */ > +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, > + u64 buf_min, u64 buf_max) > +{ > + int ret = -EADDRNOTAVAIL; > + phys_addr_t start, end; > + u64 i; > + > + for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, > + MEMBLOCK_NONE, &start, &end, NULL) { > + if (start > buf_max) > + continue; > + > + /* Memory hole not found */ > + if (end < buf_min) > + break; > + > + /* Adjust memory region based on the given range */ > + if (start < buf_min) > + start = buf_min; > + if (end > buf_max) > + end = buf_max; > + > + start = ALIGN(start, kbuf->buf_align); > + if (start < end && (end - start + 1) >= kbuf->memsz) { This is why I dislike using start and end to express address ranges: While struct resource seems to use the [address, end] convention, my reading of memblock code is that it uses [addres, end). This is guaranteed to lead to bugs. So the above has an off-by-one error. To calculate the size of the current range, you need to use `end - start`. > + /* Suitable memory range found. Set kbuf->mem */ > + kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1, Similarly, I believe the `+ 1` here is wrong. > + kbuf->buf_align); > + ret = 0; > + break; > + } > + } > + > + return ret; > +} > + > +/** > + * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a > + * suitable buffer with top down approach. > + * @kbuf: Buffer contents and memory parameters. > + * @buf_min: Minimum address for the buffer. > + * @buf_max: Maximum address for the buffer. > + * @emem: Exclude memory ranges. > + * > + * Returns 0 on success, negative errno on error. > + */ > +static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf, > + u64 buf_min, u64 buf_max, > + const struct crash_mem *emem) > +{ > + int i, ret = 0, err = -EADDRNOTAVAIL; > + u64 start, end, tmin, tmax; > + > + tmax = buf_max; > + for (i = (emem->nr_ranges - 1); i >= 0; i--) { > + start = emem->ranges[i].start; > + end = emem->ranges[i].end; > + > + if (start > tmax) > + continue; > + > + if (end < tmax) { > + tmin = (end < buf_min ? buf_min : end + 1); > + ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); > + if (!ret) > + return 0; > + } > + > + tmax = start - 1; > + > + if (tmax < buf_min) { > + ret = err; > + break; > + } > + ret = 0; > + } > + > + if (!ret) { > + tmin = buf_min; > + ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); > + } > + return ret; > +} > + > +/** > + * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole > + * in the memory regions between buf_min & buf_max > + * for the buffer. If found, sets kbuf->mem. > + * @kbuf: Buffer contents and memory parameters. > + * @buf_min: Minimum address for the buffer. > + * @buf_max: Maximum address for the buffer. > + * > + * Returns 0 on success, negative errno on error. > + */ > +static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf, > + u64 buf_min, u64 buf_max) > +{ > + int ret = -EADDRNOTAVAIL; > + phys_addr_t start, end; > + u64 i; > + > + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, > + MEMBLOCK_NONE, &start, &end, NULL) { > + if (end < buf_min) > + continue; > + > + /* Memory hole not found */ > + if (start > buf_max) > + break; > + > + /* Adjust memory region based on the given range */ > + if (start < buf_min) > + start = buf_min; > + if (end > buf_max) > + end = buf_max; buf_max is an inclusive end address, right? Then this should read `end = buf_max + 1`. Same thing in the top-down version above. > + > + start = ALIGN(start, kbuf->buf_align); > + if (start < end && (end - start + 1) >= kbuf->memsz) { Same off-by-one problem. There shouldn't be a `+ 1` here. > + /* Suitable memory range found. Set kbuf->mem */ > + kbuf->mem = start; > + ret = 0; > + break; > + } > + } > + > + return ret; > +} -- Thiago Jung Bauermann IBM Linux Technology Center
Thiago Jung Bauermann <bauerman@linux.ibm.com> writes: > Hari Bathini <hbathini@linux.ibm.com> writes: > >> diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h >> new file mode 100644 >> index 0000000..90deb46 >> --- /dev/null >> +++ b/arch/powerpc/include/asm/crashdump-ppc64.h >> @@ -0,0 +1,10 @@ >> +/* SPDX-License-Identifier: GPL-2.0-only */ >> +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H >> +#define _ASM_POWERPC_CRASHDUMP_PPC64_H >> + >> +/* min & max addresses for kdump load segments */ >> +#define KDUMP_BUF_MIN (crashk_res.start) >> +#define KDUMP_BUF_MAX ((crashk_res.end < ppc64_rma_size) ? \ >> + crashk_res.end : (ppc64_rma_size - 1)) >> + >> +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */ >> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h >> index 7008ea1..bf47a01 100644 >> --- a/arch/powerpc/include/asm/kexec.h >> +++ b/arch/powerpc/include/asm/kexec.h >> @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co >> #ifdef CONFIG_KEXEC_FILE >> extern const struct kexec_file_ops kexec_elf64_ops; >> >> -#ifdef CONFIG_IMA_KEXEC >> #define ARCH_HAS_KIMAGE_ARCH >> >> struct kimage_arch { >> + struct crash_mem *exclude_ranges; >> + >> +#ifdef CONFIG_IMA_KEXEC >> phys_addr_t ima_buffer_addr; >> size_t ima_buffer_size; >> -}; >> #endif >> +}; >> >> int setup_purgatory(struct kimage *image, const void *slave_code, >> const void *fdt, unsigned long kernel_load_addr, >> @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, >> unsigned long initrd_load_addr, >> unsigned long initrd_len, const char *cmdline); >> #endif /* CONFIG_PPC64 */ >> + >> #endif /* CONFIG_KEXEC_FILE */ >> >> #else /* !CONFIG_KEXEC_CORE */ >> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c >> index 23ad04c..c695f94 100644 >> --- a/arch/powerpc/kexec/elf_64.c >> +++ b/arch/powerpc/kexec/elf_64.c >> @@ -22,6 +22,7 @@ >> #include <linux/of_fdt.h> >> #include <linux/slab.h> >> #include <linux/types.h> >> +#include <asm/crashdump-ppc64.h> >> >> static void *elf64_load(struct kimage *image, char *kernel_buf, >> unsigned long kernel_len, char *initrd, >> @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, >> if (ret) >> goto out; >> >> + if (image->type == KEXEC_TYPE_CRASH) { >> + /* min & max buffer values for kdump case */ >> + kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN; >> + kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX; > > This is only my personal opinion and an actual maintainer may disagree, > but just looking at the lines above, I would assume that KDUMP_BUF_MIN > and KDUMP_BUF_MAX were constants, when in fact they aren't. > > I suggest using static inline macros in <asm/crashdump-ppc64.h>, for > example: > > static inline resource_size_t get_kdump_buf_min(void) > { > return crashk_res.start; > } > > static inline resource_size_t get_kdump_buf_max(void) > { > return (crashk_res.end < ppc64_rma_size) ? \ > crashk_res.end : (ppc64_rma_size - 1) > } I later noticed that KDUMP_BUF_MIN and KDUMP_BUF_MAX are only used here. In this case, I think the best option is to avoid the macros and inline functions and just use the actual expressions in the code.
On 15/07/20 8:09 am, Thiago Jung Bauermann wrote: > > Hari Bathini <hbathini@linux.ibm.com> writes: > <snip> >> +/** >> + * __locate_mem_hole_top_down - Looks top down for a large enough memory hole >> + * in the memory regions between buf_min & buf_max >> + * for the buffer. If found, sets kbuf->mem. >> + * @kbuf: Buffer contents and memory parameters. >> + * @buf_min: Minimum address for the buffer. >> + * @buf_max: Maximum address for the buffer. >> + * >> + * Returns 0 on success, negative errno on error. >> + */ >> +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, >> + u64 buf_min, u64 buf_max) >> +{ >> + int ret = -EADDRNOTAVAIL; >> + phys_addr_t start, end; >> + u64 i; >> + >> + for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, >> + MEMBLOCK_NONE, &start, &end, NULL) { >> + if (start > buf_max) >> + continue; >> + >> + /* Memory hole not found */ >> + if (end < buf_min) >> + break; >> + >> + /* Adjust memory region based on the given range */ >> + if (start < buf_min) >> + start = buf_min; >> + if (end > buf_max) >> + end = buf_max; >> + >> + start = ALIGN(start, kbuf->buf_align); >> + if (start < end && (end - start + 1) >= kbuf->memsz) { > > This is why I dislike using start and end to express address ranges: > > While struct resource seems to use the [address, end] convention, my struct crash_mem also uses [address, end] convention. This off-by-one error did not cause any issues as the hole start and size we try to find are at least page aligned. Nonetheless, I think fixing 'end' early in the loop with "end -= 1" would ensure correctness while continuing to use the same convention for structs crash_mem & resource. Thanks Hari
Hari Bathini <hbathini@linux.ibm.com> writes: > On 15/07/20 8:09 am, Thiago Jung Bauermann wrote: >> >> Hari Bathini <hbathini@linux.ibm.com> writes: >> > > <snip> > >>> +/** >>> + * __locate_mem_hole_top_down - Looks top down for a large enough memory hole >>> + * in the memory regions between buf_min & buf_max >>> + * for the buffer. If found, sets kbuf->mem. >>> + * @kbuf: Buffer contents and memory parameters. >>> + * @buf_min: Minimum address for the buffer. >>> + * @buf_max: Maximum address for the buffer. >>> + * >>> + * Returns 0 on success, negative errno on error. >>> + */ >>> +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, >>> + u64 buf_min, u64 buf_max) >>> +{ >>> + int ret = -EADDRNOTAVAIL; >>> + phys_addr_t start, end; >>> + u64 i; >>> + >>> + for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, >>> + MEMBLOCK_NONE, &start, &end, NULL) { >>> + if (start > buf_max) >>> + continue; >>> + >>> + /* Memory hole not found */ >>> + if (end < buf_min) >>> + break; >>> + >>> + /* Adjust memory region based on the given range */ >>> + if (start < buf_min) >>> + start = buf_min; >>> + if (end > buf_max) >>> + end = buf_max; >>> + >>> + start = ALIGN(start, kbuf->buf_align); >>> + if (start < end && (end - start + 1) >= kbuf->memsz) { >> >> This is why I dislike using start and end to express address ranges: >> >> While struct resource seems to use the [address, end] convention, my > > struct crash_mem also uses [address, end] convention. > This off-by-one error did not cause any issues as the hole start and size we try to find > are at least page aligned. > > Nonetheless, I think fixing 'end' early in the loop with "end -= 1" would ensure > correctness while continuing to use the same convention for structs crash_mem & resource. Sounds good.
diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h new file mode 100644 index 0000000..90deb46 --- /dev/null +++ b/arch/powerpc/include/asm/crashdump-ppc64.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H +#define _ASM_POWERPC_CRASHDUMP_PPC64_H + +/* min & max addresses for kdump load segments */ +#define KDUMP_BUF_MIN (crashk_res.start) +#define KDUMP_BUF_MAX ((crashk_res.end < ppc64_rma_size) ? \ + crashk_res.end : (ppc64_rma_size - 1)) + +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 7008ea1..bf47a01 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; -#ifdef CONFIG_IMA_KEXEC #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { + struct crash_mem *exclude_ranges; + +#ifdef CONFIG_IMA_KEXEC phys_addr_t ima_buffer_addr; size_t ima_buffer_size; -}; #endif +}; int setup_purgatory(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline); #endif /* CONFIG_PPC64 */ + #endif /* CONFIG_KEXEC_FILE */ #else /* !CONFIG_KEXEC_CORE */ diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 23ad04c..c695f94 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -22,6 +22,7 @@ #include <linux/of_fdt.h> #include <linux/slab.h> #include <linux/types.h> +#include <asm/crashdump-ppc64.h> static void *elf64_load(struct kimage *image, char *kernel_buf, unsigned long kernel_len, char *initrd, @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, if (ret) goto out; + if (image->type == KEXEC_TYPE_CRASH) { + /* min & max buffer values for kdump case */ + kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN; + kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX; + } + ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); if (ret) goto out; diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index e6bff960..7673481 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -17,6 +17,9 @@ #include <linux/kexec.h> #include <linux/of_fdt.h> #include <linux/libfdt.h> +#include <linux/memblock.h> +#include <asm/kexec_ranges.h> +#include <asm/crashdump-ppc64.h> const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_elf64_ops, @@ -24,6 +27,240 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { }; /** + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes + * regions like opal/rtas, tce-table, initrd, + * kernel, htab which should be avoided while + * setting up kexec load segments. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_initrd_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_htab_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_kernel_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_ranges(mem_ranges); + if (ret) + goto out; + + /* exclude memory ranges should be sorted for easy lookup */ + sort_memory_ranges(*mem_ranges, true); +out: + if (ret) + pr_err("Failed to setup exclude memory ranges\n"); + return ret; +} + +/** + * __locate_mem_hole_top_down - Looks top down for a large enough memory hole + * in the memory regions between buf_min & buf_max + * for the buffer. If found, sets kbuf->mem. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * + * Returns 0 on success, negative errno on error. + */ +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max) +{ + int ret = -EADDRNOTAVAIL; + phys_addr_t start, end; + u64 i; + + for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { + if (start > buf_max) + continue; + + /* Memory hole not found */ + if (end < buf_min) + break; + + /* Adjust memory region based on the given range */ + if (start < buf_min) + start = buf_min; + if (end > buf_max) + end = buf_max; + + start = ALIGN(start, kbuf->buf_align); + if (start < end && (end - start + 1) >= kbuf->memsz) { + /* Suitable memory range found. Set kbuf->mem */ + kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1, + kbuf->buf_align); + ret = 0; + break; + } + } + + return ret; +} + +/** + * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a + * suitable buffer with top down approach. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * @emem: Exclude memory ranges. + * + * Returns 0 on success, negative errno on error. + */ +static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max, + const struct crash_mem *emem) +{ + int i, ret = 0, err = -EADDRNOTAVAIL; + u64 start, end, tmin, tmax; + + tmax = buf_max; + for (i = (emem->nr_ranges - 1); i >= 0; i--) { + start = emem->ranges[i].start; + end = emem->ranges[i].end; + + if (start > tmax) + continue; + + if (end < tmax) { + tmin = (end < buf_min ? buf_min : end + 1); + ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); + if (!ret) + return 0; + } + + tmax = start - 1; + + if (tmax < buf_min) { + ret = err; + break; + } + ret = 0; + } + + if (!ret) { + tmin = buf_min; + ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); + } + return ret; +} + +/** + * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole + * in the memory regions between buf_min & buf_max + * for the buffer. If found, sets kbuf->mem. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * + * Returns 0 on success, negative errno on error. + */ +static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max) +{ + int ret = -EADDRNOTAVAIL; + phys_addr_t start, end; + u64 i; + + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { + if (end < buf_min) + continue; + + /* Memory hole not found */ + if (start > buf_max) + break; + + /* Adjust memory region based on the given range */ + if (start < buf_min) + start = buf_min; + if (end > buf_max) + end = buf_max; + + start = ALIGN(start, kbuf->buf_align); + if (start < end && (end - start + 1) >= kbuf->memsz) { + /* Suitable memory range found. Set kbuf->mem */ + kbuf->mem = start; + ret = 0; + break; + } + } + + return ret; +} + +/** + * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a + * suitable buffer with bottom up approach. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * @emem: Exclude memory ranges. + * + * Returns 0 on success, negative errno on error. + */ +static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max, + const struct crash_mem *emem) +{ + int i, ret = 0, err = -EADDRNOTAVAIL; + u64 start, end, tmin, tmax; + + tmin = buf_min; + for (i = 0; i < emem->nr_ranges; i++) { + start = emem->ranges[i].start; + end = emem->ranges[i].end; + + if (end < tmin) + continue; + + if (start > tmin) { + tmax = (start > buf_max ? buf_max : start - 1); + ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); + if (!ret) + return 0; + } + + tmin = end + 1; + + if (tmin > buf_max) { + ret = err; + break; + } + ret = 0; + } + + if (!ret) { + tmax = buf_max; + ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); + } + return ret; +} + +/** * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global * variables and call setup_purgatory() to initialize * common global variable. @@ -89,6 +326,67 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, } /** + * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal, + * tce-table, reserved-ranges & such (exclude + * memory ranges) as they can't be used for kexec + * segment buffer. Sets kbuf->mem when a suitable + * memory hole is found. + * @kbuf: Buffer contents and memory parameters. + * + * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) +{ + struct crash_mem **emem; + u64 buf_min, buf_max; + int ret; + + /* + * Use the generic kexec_locate_mem_hole for regular + * kexec_file_load syscall + */ + if (kbuf->image->type != KEXEC_TYPE_CRASH) + return kexec_locate_mem_hole(kbuf); + + /* Look up the exclude ranges list while locating the memory hole */ + emem = &(kbuf->image->arch.exclude_ranges); + if (!(*emem) || ((*emem)->nr_ranges == 0)) { + pr_warn("No exclude range list. Using the default locate mem hole method\n"); + return kexec_locate_mem_hole(kbuf); + } + + /* Segments for kdump kernel should be within crashkernel region */ + buf_min = (kbuf->buf_min < crashk_res.start ? + crashk_res.start : kbuf->buf_min); + buf_max = (kbuf->buf_max > crashk_res.end ? + crashk_res.end : kbuf->buf_max); + + if (buf_min > buf_max) { + pr_err("Invalid buffer min and/or max values\n"); + return -EINVAL; + } + + if (kbuf->top_down) + ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max, + *emem); + else + ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max, + *emem); + + /* Add the buffer allocated to the exclude list for the next lookup */ + if (!ret) { + add_mem_range(emem, kbuf->mem, kbuf->memsz); + sort_memory_ranges(*emem, true); + } else { + pr_err("Failed to locate memory buffer of size %lu\n", + kbuf->memsz); + } + return ret; +} + +/** * arch_kexec_kernel_image_probe - Does additional handling needed to setup * kexec segments. * @image: kexec image being loaded. @@ -100,9 +398,31 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len) { - /* We don't support crash kernels yet. */ - if (image->type == KEXEC_TYPE_CRASH) + if (image->type == KEXEC_TYPE_CRASH) { + int ret; + + /* Get exclude memory ranges needed for setting up kdump segments */ + ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges)); + if (ret) + pr_err("Failed to setup exclude memory ranges for buffer lookup\n"); + /* Return this until all changes for panic kernel are in */ return -EOPNOTSUPP; + } return kexec_image_probe_default(image, buf, buf_len); } + +/** + * arch_kimage_file_post_load_cleanup - Frees up all the allocations done + * while loading the image. + * @image: kexec image being loaded. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kfree(image->arch.exclude_ranges); + image->arch.exclude_ranges = NULL; + + return kexec_image_post_load_cleanup_default(image); +}
crashkernel region could have an overlap with special memory regions like opal, rtas, tce-table & such. These regions are referred to as exclude memory ranges. Setup this ranges during image probe in order to avoid them while finding the buffer for different kdump segments. Override arch_kexec_locate_mem_hole() to locate a memory hole taking these ranges into account. Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> --- v2 -> v3: * If there are no exclude ranges, the right thing to do is fallbacking back to default kexec_locate_mem_hole() implementation instead of returning 0. Fixed that. v1 -> v2: * Did arch_kexec_locate_mem_hole() override to handle special regions. * Ensured holes in the memory are accounted for while locating mem hole. * Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on the new prototype for these functions. arch/powerpc/include/asm/crashdump-ppc64.h | 10 + arch/powerpc/include/asm/kexec.h | 7 - arch/powerpc/kexec/elf_64.c | 7 + arch/powerpc/kexec/file_load_64.c | 324 ++++++++++++++++++++++++++++ 4 files changed, 344 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h