Message ID | 159466098739.24747.5860501703617893464.stgit@hbathini.in.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | ppc64: enable kdump support for kexec_file_load syscall | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch powerpc/merge (71d6070a8e0e0a1ed82365544f97b86475cb161e) |
snowpatch_ozlabs/checkpatch | warning | total: 0 errors, 0 warnings, 5 checks, 317 lines checked |
snowpatch_ozlabs/needsstable | success | Patch has no Fixes tags |
Hari Bathini <hbathini@linux.ibm.com> writes: > /** > + * get_crash_memory_ranges - Get crash memory ranges. This list includes > + * first/crashing kernel's memory regions that > + * would be exported via an elfcore. > + * @mem_ranges: Range list to add the memory ranges to. > + * > + * Returns 0 on success, negative errno on error. > + */ > +static int get_crash_memory_ranges(struct crash_mem **mem_ranges) > +{ > + struct memblock_region *reg; > + struct crash_mem *tmem; > + int ret; > + > + for_each_memblock(memory, reg) { > + u64 base, size; > + > + base = (u64)reg->base; > + size = (u64)reg->size; > + > + /* Skip backup memory region, which needs a separate entry */ > + if (base == BACKUP_SRC_START) { > + if (size > BACKUP_SRC_SIZE) { > + base = BACKUP_SRC_END + 1; > + size -= BACKUP_SRC_SIZE; > + } else > + continue; > + } > + > + ret = add_mem_range(mem_ranges, base, size); > + if (ret) > + goto out; > + > + /* Try merging adjacent ranges before reallocation attempt */ > + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) > + sort_memory_ranges(*mem_ranges, true); > + } > + > + /* Reallocate memory ranges if there is no space to split ranges */ > + tmem = *mem_ranges; > + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { > + tmem = realloc_mem_ranges(mem_ranges); > + if (!tmem) > + goto out; > + } > + > + /* Exclude crashkernel region */ > + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); > + if (ret) > + goto out; > + > + ret = add_rtas_mem_range(mem_ranges); > + if (ret) > + goto out; > + > + ret = add_opal_mem_range(mem_ranges); > + if (ret) > + goto out; Maybe I'm confused, but don't you add the RTAS and OPAL regions as usable memory for the crashkernel? In that case they shouldn't show up in the core file. > + > + /* create a separate program header for the backup region */ > + ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); > + if (ret) > + goto out; > + > + sort_memory_ranges(*mem_ranges, false); > +out: > + if (ret) > + pr_err("Failed to setup crash memory ranges\n"); > + return ret; > +} <snip> > +/** > + * prepare_elf_headers - Prepare headers for the elfcore to be exported as > + * /proc/vmcore by the kdump kernel. > + * @image: Kexec image. > + * @cmem: Crash memory ranges to be exported via elfcore. > + * @addr: Vmalloc'd memory allocated by crash_prepare_elf64_headers > + * to prepare the elf headers. > + * @sz: Size of the vmalloc'd memory allocated. > + * > + * Returns 0 on success, negative errno on error. > + */ > +static int prepare_elf_headers(struct kimage *image, struct crash_mem *cmem, > + void **addr, unsigned long *sz) > +{ > + int ret; > + > + ret = crash_prepare_elf64_headers(cmem, false, addr, sz); > + > + /* Fix the offset for backup region in the ELF header */ > + if (!ret) > + update_backup_region_phdr(image, *addr); > + > + return ret; > +} The code above can be inlined into its caller, I don't see a need to have a separate function. > + > +/** > + * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr > + * segment needed to load kdump kernel. > + * @image: Kexec image. > + * @kbuf: Buffer contents and memory parameters. > + * > + * Returns 0 on success, negative errno on error. > + */ > +static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) > +{ > + struct crash_mem *cmem = NULL; > + unsigned long headers_sz; > + void *headers = NULL; > + int ret; > + > + ret = get_crash_memory_ranges(&cmem); > + if (ret) > + goto out; > + > + /* Setup elfcorehdr segment */ > + ret = prepare_elf_headers(image, cmem, &headers, &headers_sz); > + if (ret) { > + pr_err("Failed to prepare elf headers for the core\n"); > + goto out; > + } > + > + kbuf->buffer = headers; > + kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; > + kbuf->bufsz = kbuf->memsz = headers_sz; > + kbuf->top_down = false; > + > + ret = kexec_add_buffer(kbuf); > + if (ret) { > + vfree(headers); > + goto out; > + } > + > + image->arch.elfcorehdr_addr = kbuf->mem; > + image->arch.elf_headers_sz = headers_sz; > + image->arch.elf_headers = headers; > +out: > + kfree(cmem); > + return ret; > +} -- Thiago Jung Bauermann IBM Linux Technology Center
On 16/07/20 7:52 am, Thiago Jung Bauermann wrote: > > Hari Bathini <hbathini@linux.ibm.com> writes: > >> /** >> + * get_crash_memory_ranges - Get crash memory ranges. This list includes >> + * first/crashing kernel's memory regions that >> + * would be exported via an elfcore. >> + * @mem_ranges: Range list to add the memory ranges to. >> + * >> + * Returns 0 on success, negative errno on error. >> + */ >> +static int get_crash_memory_ranges(struct crash_mem **mem_ranges) >> +{ >> + struct memblock_region *reg; >> + struct crash_mem *tmem; >> + int ret; >> + >> + for_each_memblock(memory, reg) { >> + u64 base, size; >> + >> + base = (u64)reg->base; >> + size = (u64)reg->size; >> + >> + /* Skip backup memory region, which needs a separate entry */ >> + if (base == BACKUP_SRC_START) { >> + if (size > BACKUP_SRC_SIZE) { >> + base = BACKUP_SRC_END + 1; >> + size -= BACKUP_SRC_SIZE; >> + } else >> + continue; >> + } >> + >> + ret = add_mem_range(mem_ranges, base, size); >> + if (ret) >> + goto out; >> + >> + /* Try merging adjacent ranges before reallocation attempt */ >> + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) >> + sort_memory_ranges(*mem_ranges, true); >> + } >> + >> + /* Reallocate memory ranges if there is no space to split ranges */ >> + tmem = *mem_ranges; >> + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { >> + tmem = realloc_mem_ranges(mem_ranges); >> + if (!tmem) >> + goto out; >> + } >> + >> + /* Exclude crashkernel region */ >> + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); >> + if (ret) >> + goto out; >> + >> + ret = add_rtas_mem_range(mem_ranges); >> + if (ret) >> + goto out; >> + >> + ret = add_opal_mem_range(mem_ranges); >> + if (ret) >> + goto out; > > Maybe I'm confused, but don't you add the RTAS and OPAL regions as > usable memory for the crashkernel? In that case they shouldn't show up > in the core file. kexec-tools does the same thing. I am not endorsing it but I was trying to stay in parity to avoid breaking any userspace tools/commands. But as you rightly pointed, this is NOT right. The right thing to do, to get the rtas/opal data at the time of crash, is to have a backup region for them just like we have for the first 64K memory. I was hoping to do that later. Will check how userspace tools respond to dropping these regions. If that makes the tools unhappy, will retain the regions with a FIXME. Sorry about the confusion. Thanks Hari
Hari Bathini <hbathini@linux.ibm.com> writes: > On 16/07/20 7:52 am, Thiago Jung Bauermann wrote: >> >> Hari Bathini <hbathini@linux.ibm.com> writes: >> >>> /** >>> + * get_crash_memory_ranges - Get crash memory ranges. This list includes >>> + * first/crashing kernel's memory regions that >>> + * would be exported via an elfcore. >>> + * @mem_ranges: Range list to add the memory ranges to. >>> + * >>> + * Returns 0 on success, negative errno on error. >>> + */ >>> +static int get_crash_memory_ranges(struct crash_mem **mem_ranges) >>> +{ >>> + struct memblock_region *reg; >>> + struct crash_mem *tmem; >>> + int ret; >>> + >>> + for_each_memblock(memory, reg) { >>> + u64 base, size; >>> + >>> + base = (u64)reg->base; >>> + size = (u64)reg->size; >>> + >>> + /* Skip backup memory region, which needs a separate entry */ >>> + if (base == BACKUP_SRC_START) { >>> + if (size > BACKUP_SRC_SIZE) { >>> + base = BACKUP_SRC_END + 1; >>> + size -= BACKUP_SRC_SIZE; >>> + } else >>> + continue; >>> + } >>> + >>> + ret = add_mem_range(mem_ranges, base, size); >>> + if (ret) >>> + goto out; >>> + >>> + /* Try merging adjacent ranges before reallocation attempt */ >>> + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) >>> + sort_memory_ranges(*mem_ranges, true); >>> + } >>> + >>> + /* Reallocate memory ranges if there is no space to split ranges */ >>> + tmem = *mem_ranges; >>> + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { >>> + tmem = realloc_mem_ranges(mem_ranges); >>> + if (!tmem) >>> + goto out; >>> + } >>> + >>> + /* Exclude crashkernel region */ >>> + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); >>> + if (ret) >>> + goto out; >>> + >>> + ret = add_rtas_mem_range(mem_ranges); >>> + if (ret) >>> + goto out; >>> + >>> + ret = add_opal_mem_range(mem_ranges); >>> + if (ret) >>> + goto out; >> >> Maybe I'm confused, but don't you add the RTAS and OPAL regions as >> usable memory for the crashkernel? In that case they shouldn't show up >> in the core file. > > kexec-tools does the same thing. I am not endorsing it but I was trying to stay > in parity to avoid breaking any userspace tools/commands. But as you rightly > pointed, this is NOT right. The right thing to do, to get the rtas/opal data at > the time of crash, is to have a backup region for them just like we have for > the first 64K memory. I was hoping to do that later. > > Will check how userspace tools respond to dropping these regions. If that makes > the tools unhappy, will retain the regions with a FIXME. Sorry about the confusion. No problem, thanks for the clarification.
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 037cf2b..8b0a6d6 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -112,12 +112,18 @@ struct kimage_arch { unsigned long backup_start; void *backup_buf; + unsigned long elfcorehdr_addr; + unsigned long elf_headers_sz; + void *elf_headers; + #ifdef CONFIG_IMA_KEXEC phys_addr_t ima_buffer_addr; size_t ima_buffer_size; #endif }; +char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len); int setup_purgatory(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr); diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 4838b42..40a028c 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -36,6 +36,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, void *fdt; const void *slave_code; struct elfhdr ehdr; + char *modified_cmdline = NULL; struct kexec_elf_info elf_info; struct kexec_buf kbuf = { .image = image, .buf_min = 0, .buf_max = ppc64_rma_size }; @@ -74,6 +75,16 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_err("Failed to load kdump kernel segments\n"); goto out; } + + /* Setup cmdline for kdump kernel case */ + modified_cmdline = setup_kdump_cmdline(image, cmdline, + cmdline_len); + if (!modified_cmdline) { + pr_err("Setting up cmdline for kdump kernel failed\n"); + ret = -EINVAL; + goto out; + } + cmdline = modified_cmdline; } if (initrd != NULL) { @@ -130,6 +141,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_err("Error setting up the purgatory.\n"); out: + kfree(modified_cmdline); kexec_free_elf_info(&elf_info); /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c index 99a2c4d..2e74992 100644 --- a/arch/powerpc/kexec/file_load.c +++ b/arch/powerpc/kexec/file_load.c @@ -17,11 +17,46 @@ #include <linux/slab.h> #include <linux/kexec.h> #include <linux/libfdt.h> +#include <asm/setup.h> #include <asm/ima.h> #define SLAVE_CODE_SIZE 256 /* First 0x100 bytes */ /** + * setup_kdump_cmdline - Prepend "elfcorehdr=<addr> " to command line + * of kdump kernel for exporting the core. + * @image: Kexec image + * @cmdline: Command line parameters to update. + * @cmdline_len: Length of the cmdline parameters. + * + * kdump segment must be setup before calling this function. + * + * Returns new cmdline buffer for kdump kernel on success, NULL otherwise. + */ +char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len) +{ + int elfcorehdr_strlen; + char *cmdline_ptr; + + cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!cmdline_ptr) + return NULL; + + elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", + image->arch.elfcorehdr_addr); + + if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { + pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); + kfree(cmdline_ptr); + return NULL; + } + + memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); + return cmdline_ptr; +} + +/** * setup_purgatory - initialize the purgatory's global variables * @image: kexec image. * @slave_code: Slave code for the purgatory. @@ -215,6 +250,20 @@ int setup_new_fdt(const struct kimage *image, void *fdt, } } + if (image->type == KEXEC_TYPE_CRASH) { + /* + * Avoid elfcorehdr from being stomped on in kdump kernel by + * setting up memory reserve map. + */ + ret = fdt_add_mem_rsv(fdt, image->arch.elfcorehdr_addr, + image->arch.elf_headers_sz); + if (ret) { + pr_err("Error reserving elfcorehdr memory: %s\n", + fdt_strerror(ret)); + goto err; + } + } + ret = setup_ima_buffer(image, fdt, chosen_node); if (ret) { pr_err("Error setting up the new device tree.\n"); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 152f61a..2531bb5 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -122,6 +122,77 @@ static int get_usable_memory_ranges(struct crash_mem **mem_ranges) } /** + * get_crash_memory_ranges - Get crash memory ranges. This list includes + * first/crashing kernel's memory regions that + * would be exported via an elfcore. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_crash_memory_ranges(struct crash_mem **mem_ranges) +{ + struct memblock_region *reg; + struct crash_mem *tmem; + int ret; + + for_each_memblock(memory, reg) { + u64 base, size; + + base = (u64)reg->base; + size = (u64)reg->size; + + /* Skip backup memory region, which needs a separate entry */ + if (base == BACKUP_SRC_START) { + if (size > BACKUP_SRC_SIZE) { + base = BACKUP_SRC_END + 1; + size -= BACKUP_SRC_SIZE; + } else + continue; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + goto out; + + /* Try merging adjacent ranges before reallocation attempt */ + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) + sort_memory_ranges(*mem_ranges, true); + } + + /* Reallocate memory ranges if there is no space to split ranges */ + tmem = *mem_ranges; + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { + tmem = realloc_mem_ranges(mem_ranges); + if (!tmem) + goto out; + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + /* create a separate program header for the backup region */ + ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); + if (ret) + goto out; + + sort_memory_ranges(*mem_ranges, false); +out: + if (ret) + pr_err("Failed to setup crash memory ranges\n"); + return ret; +} + +/** * __locate_mem_hole_top_down - Looks top down for a large enough memory hole * in the memory regions between buf_min & buf_max * for the buffer. If found, sets kbuf->mem. @@ -900,6 +971,103 @@ static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf) } /** + * update_backup_region_phdr - Update backup region's offset for the core to + * export the region appropriately. + * @image: Kexec image. + * @ehdr: ELF core header. + * + * Assumes an exclusive program header is setup for the backup region + * in the ELF headers + * + * Returns nothing. + */ +static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr) +{ + Elf64_Phdr *phdr; + unsigned int i; + + phdr = (Elf64_Phdr *)(ehdr + 1); + for (i = 0; i < ehdr->e_phnum; i++) { + if (phdr->p_paddr == BACKUP_SRC_START) { + phdr->p_offset = image->arch.backup_start; + pr_debug("Backup region offset updated to 0x%lx\n", + image->arch.backup_start); + return; + } + } +} + +/** + * prepare_elf_headers - Prepare headers for the elfcore to be exported as + * /proc/vmcore by the kdump kernel. + * @image: Kexec image. + * @cmem: Crash memory ranges to be exported via elfcore. + * @addr: Vmalloc'd memory allocated by crash_prepare_elf64_headers + * to prepare the elf headers. + * @sz: Size of the vmalloc'd memory allocated. + * + * Returns 0 on success, negative errno on error. + */ +static int prepare_elf_headers(struct kimage *image, struct crash_mem *cmem, + void **addr, unsigned long *sz) +{ + int ret; + + ret = crash_prepare_elf64_headers(cmem, false, addr, sz); + + /* Fix the offset for backup region in the ELF header */ + if (!ret) + update_backup_region_phdr(image, *addr); + + return ret; +} + +/** + * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr + * segment needed to load kdump kernel. + * @image: Kexec image. + * @kbuf: Buffer contents and memory parameters. + * + * Returns 0 on success, negative errno on error. + */ +static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) +{ + struct crash_mem *cmem = NULL; + unsigned long headers_sz; + void *headers = NULL; + int ret; + + ret = get_crash_memory_ranges(&cmem); + if (ret) + goto out; + + /* Setup elfcorehdr segment */ + ret = prepare_elf_headers(image, cmem, &headers, &headers_sz); + if (ret) { + pr_err("Failed to prepare elf headers for the core\n"); + goto out; + } + + kbuf->buffer = headers; + kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf->bufsz = kbuf->memsz = headers_sz; + kbuf->top_down = false; + + ret = kexec_add_buffer(kbuf); + if (ret) { + vfree(headers); + goto out; + } + + image->arch.elfcorehdr_addr = kbuf->mem; + image->arch.elf_headers_sz = headers_sz; + image->arch.elf_headers = headers; +out: + kfree(cmem); + return ret; +} + +/** * load_crashdump_segments_ppc64 - Initialize the additional segements needed * to load kdump kernel. * @image: Kexec image. @@ -920,6 +1088,15 @@ int load_crashdump_segments_ppc64(struct kimage *image, } pr_debug("Loaded the backup region at 0x%lx\n", kbuf->mem); + /* Load elfcorehdr segment - to export crashing kernel's vmcore */ + ret = load_elfcorehdr_segment(image, kbuf); + if (ret) { + pr_err("Failed to load elfcorehdr segment\n"); + return ret; + } + pr_debug("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n", + image->arch.elfcorehdr_addr, kbuf->bufsz, kbuf->memsz); + return 0; } @@ -1280,5 +1457,9 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) vfree(image->arch.backup_buf); image->arch.backup_buf = NULL; + vfree(image->arch.elf_headers); + image->arch.elf_headers = NULL; + image->arch.elf_headers_sz = 0; + return kexec_image_post_load_cleanup_default(image); }