Message ID | 156327676575.27462.15697361502635226365.stgit@hbathini.in.ibm.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | Add FADump support on PowerNV platform | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | warning | Failed to apply on branch next (f5c20693d8edcd665f1159dc941b9e7f87c17647) |
snowpatch_ozlabs/apply_patch | fail | Failed to apply to any branch |
On 2019-07-16 17:02:45 Tue, Hari Bathini wrote: > Move code that supports processing the crash'ed kernel's memory > preserved by firmware to platform specific callback functions. > > Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> > --- > arch/powerpc/kernel/fadump-common.h | 6 > arch/powerpc/kernel/fadump.c | 340 +------------------------- > arch/powerpc/platforms/pseries/rtas-fadump.c | 278 +++++++++++++++++++++ > 3 files changed, 299 insertions(+), 325 deletions(-) > Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Thanks, -Mahesh. > diff --git a/arch/powerpc/kernel/fadump-common.h b/arch/powerpc/kernel/fadump-common.h > index 273247d..0231a0b 100644 > --- a/arch/powerpc/kernel/fadump-common.h > +++ b/arch/powerpc/kernel/fadump-common.h > @@ -100,6 +100,12 @@ struct fw_dump { > /* cmd line option during boot */ > unsigned long reserve_bootvar; > > + /* > + * Start address of preserve area. This memory is reserved > + * permanently (production or capture kernel) for FADump. > + */ > + unsigned long preserv_area_start; > + > unsigned long cpu_state_data_size; > unsigned long hpte_region_size; > unsigned long boot_memory_size; > diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c > index 650ebf8..e995db1 100644 > --- a/arch/powerpc/kernel/fadump.c > +++ b/arch/powerpc/kernel/fadump.c > @@ -28,15 +28,12 @@ > #include <asm/debugfs.h> > #include <asm/page.h> > #include <asm/prom.h> > -#include <asm/rtas.h> > #include <asm/fadump.h> > #include <asm/setup.h> > > #include "fadump-common.h" > -#include "../platforms/pseries/rtas-fadump.h" > > static struct fw_dump fw_dump; > -static const struct rtas_fadump_mem_struct *fdm_active; > > static DEFINE_MUTEX(fadump_mutex); > struct fad_crash_memory_ranges *crash_memory_ranges; > @@ -111,22 +108,13 @@ static int __init fadump_cma_init(void) { return 1; } > int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, > int depth, void *data) > { > - int ret; > - > - if (depth != 1 || strcmp(uname, "rtas") != 0) > + if (depth != 1) > return 0; > > - ret = rtas_fadump_dt_scan(&fw_dump, node); > + if (strcmp(uname, "rtas") == 0) > + return rtas_fadump_dt_scan(&fw_dump, node); > > - /* > - * The 'ibm,kernel-dump' rtas node is present only if there is > - * dump data waiting for us. > - */ > - fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL); > - if (fdm_active) > - fw_dump.dump_active = 1; > - > - return ret; > + return 0; > } > > /* > @@ -308,9 +296,7 @@ int __init fadump_reserve_mem(void) > * If dump is active then we have already calculated the size during > * first kernel. > */ > - if (fdm_active) > - fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len); > - else { > + if (!fw_dump.dump_active) { > fw_dump.boot_memory_size = fadump_calculate_reserve_size(); > #ifdef CONFIG_CMA > if (!fw_dump.nocma) > @@ -320,6 +306,7 @@ int __init fadump_reserve_mem(void) > #endif > } > > + size = get_fadump_area_size(); > if (memory_limit) > memory_boundary = memory_limit; > else > @@ -346,15 +333,10 @@ int __init fadump_reserve_mem(void) > size = memory_boundary - base; > fadump_reserve_crash_area(base, size); > > - fw_dump.fadumphdr_addr = > - be64_to_cpu(fdm_active->rmr_region.destination_address) + > - be64_to_cpu(fdm_active->rmr_region.source_len); > - pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr); > + pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr); > fw_dump.reserve_dump_area_start = base; > fw_dump.reserve_dump_area_size = size; > } else { > - size = get_fadump_area_size(); > - > /* > * Reserve memory at an offset closer to bottom of the RAM to > * minimize the impact of memory hot-remove operation. We can't > @@ -469,218 +451,6 @@ void crash_fadump(struct pt_regs *regs, const char *str) > fw_dump.ops->fadump_trigger(fdh, str); > } > > -#define GPR_MASK 0xffffff0000000000 > -static inline int fadump_gpr_index(u64 id) > -{ > - int i = -1; > - char str[3]; > - > - if ((id & GPR_MASK) == fadump_str_to_u64("GPR")) { > - /* get the digits at the end */ > - id &= ~GPR_MASK; > - id >>= 24; > - str[2] = '\0'; > - str[1] = id & 0xff; > - str[0] = (id >> 8) & 0xff; > - sscanf(str, "%d", &i); > - if (i > 31) > - i = -1; > - } > - return i; > -} > - > -static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id, > - u64 reg_val) > -{ > - int i; > - > - i = fadump_gpr_index(reg_id); > - if (i >= 0) > - regs->gpr[i] = (unsigned long)reg_val; > - else if (reg_id == fadump_str_to_u64("NIA")) > - regs->nip = (unsigned long)reg_val; > - else if (reg_id == fadump_str_to_u64("MSR")) > - regs->msr = (unsigned long)reg_val; > - else if (reg_id == fadump_str_to_u64("CTR")) > - regs->ctr = (unsigned long)reg_val; > - else if (reg_id == fadump_str_to_u64("LR")) > - regs->link = (unsigned long)reg_val; > - else if (reg_id == fadump_str_to_u64("XER")) > - regs->xer = (unsigned long)reg_val; > - else if (reg_id == fadump_str_to_u64("CR")) > - regs->ccr = (unsigned long)reg_val; > - else if (reg_id == fadump_str_to_u64("DAR")) > - regs->dar = (unsigned long)reg_val; > - else if (reg_id == fadump_str_to_u64("DSISR")) > - regs->dsisr = (unsigned long)reg_val; > -} > - > -static struct rtas_fadump_reg_entry* > -fadump_read_registers(struct rtas_fadump_reg_entry *reg_entry, struct pt_regs *regs) > -{ > - memset(regs, 0, sizeof(struct pt_regs)); > - > - while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) { > - fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id), > - be64_to_cpu(reg_entry->reg_value)); > - reg_entry++; > - } > - reg_entry++; > - return reg_entry; > -} > - > -/* > - * Read CPU state dump data and convert it into ELF notes. > - * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be > - * used to access the data to allow for additional fields to be added without > - * affecting compatibility. Each list of registers for a CPU starts with > - * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, > - * 8 Byte ASCII identifier and 8 Byte register value. The register entry > - * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part > - * of register value. For more details refer to PAPR document. > - * > - * Only for the crashing cpu we ignore the CPU dump data and get exact > - * state from fadump crash info structure populated by first kernel at the > - * time of crash. > - */ > -static int __init fadump_build_cpu_notes(const struct rtas_fadump_mem_struct *fdm) > -{ > - struct rtas_fadump_reg_save_area_header *reg_header; > - struct rtas_fadump_reg_entry *reg_entry; > - struct fadump_crash_info_header *fdh = NULL; > - void *vaddr; > - unsigned long addr; > - u32 num_cpus, *note_buf; > - struct pt_regs regs; > - int i, rc = 0, cpu = 0; > - > - if (!fdm->cpu_state_data.bytes_dumped) > - return -EINVAL; > - > - addr = be64_to_cpu(fdm->cpu_state_data.destination_address); > - vaddr = __va(addr); > - > - reg_header = vaddr; > - if (be64_to_cpu(reg_header->magic_number) != > - fadump_str_to_u64("REGSAVE")) { > - printk(KERN_ERR "Unable to read register save area.\n"); > - return -ENOENT; > - } > - pr_debug("--------CPU State Data------------\n"); > - pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number)); > - pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset)); > - > - vaddr += be32_to_cpu(reg_header->num_cpu_offset); > - num_cpus = be32_to_cpu(*((__be32 *)(vaddr))); > - pr_debug("NumCpus : %u\n", num_cpus); > - vaddr += sizeof(u32); > - reg_entry = (struct rtas_fadump_reg_entry *)vaddr; > - > - /* Allocate buffer to hold cpu crash notes. */ > - fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); > - fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); > - note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size); > - if (!note_buf) { > - printk(KERN_ERR "Failed to allocate 0x%lx bytes for " > - "cpu notes buffer\n", fw_dump.cpu_notes_buf_size); > - return -ENOMEM; > - } > - fw_dump.cpu_notes_buf = __pa(note_buf); > - > - pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", > - (num_cpus * sizeof(note_buf_t)), note_buf); > - > - if (fw_dump.fadumphdr_addr) > - fdh = __va(fw_dump.fadumphdr_addr); > - > - for (i = 0; i < num_cpus; i++) { > - if (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUSTRT")) { > - printk(KERN_ERR "Unable to read CPU state data\n"); > - rc = -ENOENT; > - goto error_out; > - } > - /* Lower 4 bytes of reg_value contains logical cpu id */ > - cpu = be64_to_cpu(reg_entry->reg_value) & RTAS_FADUMP_CPU_ID_MASK; > - if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) { > - RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry); > - continue; > - } > - pr_debug("Reading register data for cpu %d...\n", cpu); > - if (fdh && fdh->crashing_cpu == cpu) { > - regs = fdh->regs; > - note_buf = fadump_regs_to_elf_notes(note_buf, ®s); > - RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry); > - } else { > - reg_entry++; > - reg_entry = fadump_read_registers(reg_entry, ®s); > - note_buf = fadump_regs_to_elf_notes(note_buf, ®s); > - } > - } > - final_note(note_buf); > - > - if (fdh) { > - addr = fdh->elfcorehdr_addr; > - pr_debug("Updating elfcore header(%lx) with cpu notes\n", addr); > - fadump_update_elfcore_header(&fw_dump, (char *)__va(addr)); > - } > - return 0; > - > -error_out: > - fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf), > - fw_dump.cpu_notes_buf_size); > - fw_dump.cpu_notes_buf = 0; > - fw_dump.cpu_notes_buf_size = 0; > - return rc; > - > -} > - > -/* > - * Validate and process the dump data stored by firmware before exporting > - * it through '/proc/vmcore'. > - */ > -static int __init process_fadump(const struct rtas_fadump_mem_struct *fdm_active) > -{ > - struct fadump_crash_info_header *fdh; > - int rc = 0; > - > - if (!fdm_active || !fw_dump.fadumphdr_addr) > - return -EINVAL; > - > - /* Check if the dump data is valid. */ > - if ((be16_to_cpu(fdm_active->header.dump_status_flag) == RTAS_FADUMP_ERROR_FLAG) || > - (fdm_active->cpu_state_data.error_flags != 0) || > - (fdm_active->rmr_region.error_flags != 0)) { > - printk(KERN_ERR "Dump taken by platform is not valid\n"); > - return -EINVAL; > - } > - if ((fdm_active->rmr_region.bytes_dumped != > - fdm_active->rmr_region.source_len) || > - !fdm_active->cpu_state_data.bytes_dumped) { > - printk(KERN_ERR "Dump taken by platform is incomplete\n"); > - return -EINVAL; > - } > - > - /* Validate the fadump crash info header */ > - fdh = __va(fw_dump.fadumphdr_addr); > - if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { > - printk(KERN_ERR "Crash info header is not valid.\n"); > - return -EINVAL; > - } > - > - rc = fadump_build_cpu_notes(fdm_active); > - if (rc) > - return rc; > - > - /* > - * We are done validating dump info and elfcore header is now ready > - * to be exported. set elfcorehdr_addr so that vmcore module will > - * export the elfcore header through '/proc/vmcore'. > - */ > - elfcorehdr_addr = fdh->elfcorehdr_addr; > - > - return 0; > -} > - > static void free_crash_memory_ranges(void) > { > kfree(crash_memory_ranges); > @@ -970,7 +740,6 @@ static unsigned long init_fadump_header(unsigned long addr) > if (!addr) > return 0; > > - fw_dump.fadumphdr_addr = addr; > fdh = __va(addr); > addr += sizeof(struct fadump_crash_info_header); > > @@ -1014,39 +783,12 @@ static int register_fadump(void) > return fw_dump.ops->register_fadump(&fw_dump); > } > > -static int fadump_invalidate_dump(const struct rtas_fadump_mem_struct *fdm) > -{ > - int rc = 0; > - unsigned int wait_time; > - > - pr_debug("Invalidating firmware-assisted dump registration\n"); > - > - /* TODO: Add upper time limit for the delay */ > - do { > - rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, > - FADUMP_INVALIDATE, fdm, > - sizeof(struct rtas_fadump_mem_struct)); > - > - wait_time = rtas_busy_delay_time(rc); > - if (wait_time) > - mdelay(wait_time); > - } while (wait_time); > - > - if (rc) { > - pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc); > - return rc; > - } > - fw_dump.dump_active = 0; > - fdm_active = NULL; > - return 0; > -} > - > void fadump_cleanup(void) > { > /* Invalidate the registration only if dump is active. */ > if (fw_dump.dump_active) { > - /* pass the same memory dump structure provided by platform */ > - fadump_invalidate_dump(fdm_active); > + pr_debug("Invalidating firmware-assisted dump registration\n"); > + fw_dump.ops->invalidate_fadump(&fw_dump); > } else if (fw_dump.dump_registered) { > /* Un-register Firmware-assisted dump if it was registered. */ > fw_dump.ops->unregister_fadump(&fw_dump); > @@ -1132,7 +874,7 @@ static void fadump_invalidate_release_mem(void) > return; > } > > - destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address); > + destination_address = fw_dump.preserv_area_start; > fadump_cleanup(); > mutex_unlock(&fadump_mutex); > > @@ -1158,6 +900,7 @@ static void fadump_invalidate_release_mem(void) > fw_dump.cpu_notes_buf = 0; > fw_dump.cpu_notes_buf_size = 0; > } > + > /* Initialize the kernel dump memory structure for FAD registration. */ > fw_dump.ops->init_fadump_mem_struct(&fw_dump); > } > @@ -1210,7 +953,7 @@ static ssize_t fadump_register_store(struct kobject *kobj, > int ret = 0; > int input = -1; > > - if (!fw_dump.fadump_enabled || fdm_active) > + if (!fw_dump.fadump_enabled || fw_dump.dump_active) > return -EPERM; > > if (kstrtoint(buf, 0, &input)) > @@ -1223,7 +966,9 @@ static ssize_t fadump_register_store(struct kobject *kobj, > if (fw_dump.dump_registered == 0) { > goto unlock_out; > } > + > /* Un-register Firmware-assisted dump */ > + pr_debug("Un-register firmware-assisted dump\n"); > fw_dump.ops->unregister_fadump(&fw_dump); > break; > case 1: > @@ -1246,63 +991,13 @@ static ssize_t fadump_register_store(struct kobject *kobj, > > static int fadump_region_show(struct seq_file *m, void *private) > { > - const struct rtas_fadump_mem_struct *fdm_ptr; > - > if (!fw_dump.fadump_enabled) > return 0; > > mutex_lock(&fadump_mutex); > - if (fdm_active) > - fdm_ptr = fdm_active; > - else { > - mutex_unlock(&fadump_mutex); > - fw_dump.ops->fadump_region_show(&fw_dump, m); > - return 0; > - } > + fw_dump.ops->fadump_region_show(&fw_dump, m); > + mutex_unlock(&fadump_mutex); > > - seq_printf(m, > - "CPU : [%#016llx-%#016llx] %#llx bytes, " > - "Dumped: %#llx\n", > - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address), > - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) + > - be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1, > - be64_to_cpu(fdm_ptr->cpu_state_data.source_len), > - be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped)); > - seq_printf(m, > - "HPTE: [%#016llx-%#016llx] %#llx bytes, " > - "Dumped: %#llx\n", > - be64_to_cpu(fdm_ptr->hpte_region.destination_address), > - be64_to_cpu(fdm_ptr->hpte_region.destination_address) + > - be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1, > - be64_to_cpu(fdm_ptr->hpte_region.source_len), > - be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped)); > - seq_printf(m, > - "DUMP: [%#016llx-%#016llx] %#llx bytes, " > - "Dumped: %#llx\n", > - be64_to_cpu(fdm_ptr->rmr_region.destination_address), > - be64_to_cpu(fdm_ptr->rmr_region.destination_address) + > - be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1, > - be64_to_cpu(fdm_ptr->rmr_region.source_len), > - be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped)); > - > - if (!fdm_active || > - (fw_dump.reserve_dump_area_start == > - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address))) > - goto out; > - > - /* Dump is active. Show reserved memory region. */ > - seq_printf(m, > - " : [%#016llx-%#016llx] %#llx bytes, " > - "Dumped: %#llx\n", > - (unsigned long long)fw_dump.reserve_dump_area_start, > - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1, > - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - > - fw_dump.reserve_dump_area_start, > - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - > - fw_dump.reserve_dump_area_start); > -out: > - if (fdm_active) > - mutex_unlock(&fadump_mutex); > return 0; > } > > @@ -1373,12 +1068,13 @@ int __init setup_fadump(void) > * if dump process fails then invalidate the registration > * and release memory before proceeding for re-registration. > */ > - if (process_fadump(fdm_active) < 0) > + if (fw_dump.ops->process_fadump(&fw_dump) < 0) > fadump_invalidate_release_mem(); > } > /* Initialize the kernel dump memory structure for FAD registration. */ > else if (fw_dump.reserve_dump_area_size) > fw_dump.ops->init_fadump_mem_struct(&fw_dump); > + > fadump_init_files(); > > return 1; > diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c > index 790a37d..7ce84f8 100644 > --- a/arch/powerpc/platforms/pseries/rtas-fadump.c > +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c > @@ -31,6 +31,7 @@ > #include "rtas-fadump.h" > > static struct rtas_fadump_mem_struct fdm; > +static const struct rtas_fadump_mem_struct *fdm_active; > > static void rtas_fadump_update_config(struct fw_dump *fadump_conf, > const struct rtas_fadump_mem_struct *fdm) > @@ -40,6 +41,23 @@ static void rtas_fadump_update_config(struct fw_dump *fadump_conf, > > fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr + > fadump_conf->boot_memory_size); > + > + /* Start address of preserve area (permanent reservation) */ > + fadump_conf->preserv_area_start = > + be64_to_cpu(fdm->cpu_state_data.destination_address); > + pr_debug("Preserve area start address: 0x%lx\n", > + fadump_conf->preserv_area_start); > +} > + > +/* > + * This function is called in the capture kernel to get configuration details > + * setup in the first kernel and passed to the f/w. > + */ > +static void rtas_fadump_get_config(struct fw_dump *fadump_conf, > + const struct rtas_fadump_mem_struct *fdm) > +{ > + fadump_conf->boot_memory_size = be64_to_cpu(fdm->rmr_region.source_len); > + rtas_fadump_update_config(fadump_conf, fdm); > } > > static ulong rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf) > @@ -180,7 +198,196 @@ static int rtas_fadump_unregister_fadump(struct fw_dump *fadump_conf) > > static int rtas_fadump_invalidate_fadump(struct fw_dump *fadump_conf) > { > - return -EIO; > + int rc; > + unsigned int wait_time; > + > + /* TODO: Add upper time limit for the delay */ > + do { > + rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1, > + NULL, FADUMP_INVALIDATE, fdm_active, > + sizeof(struct rtas_fadump_mem_struct)); > + > + wait_time = rtas_busy_delay_time(rc); > + if (wait_time) > + mdelay(wait_time); > + } while (wait_time); > + > + if (rc) { > + pr_err("Failed to invalidate - unexpected error (%d).\n", rc); > + return -EIO; > + } > + > + fadump_conf->dump_active = 0; > + fdm_active = NULL; > + return 0; > +} > + > +#define RTAS_FADUMP_GPR_MASK 0xffffff0000000000 > +static inline int rtas_fadump_gpr_index(u64 id) > +{ > + int i = -1; > + char str[3]; > + > + if ((id & RTAS_FADUMP_GPR_MASK) == fadump_str_to_u64("GPR")) { > + /* get the digits at the end */ > + id &= ~RTAS_FADUMP_GPR_MASK; > + id >>= 24; > + str[2] = '\0'; > + str[1] = id & 0xff; > + str[0] = (id >> 8) & 0xff; > + if (kstrtoint(str, 10, &i)) > + i = -EINVAL; > + if (i > 31) > + i = -1; > + } > + return i; > +} > + > +void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) > +{ > + int i; > + > + i = rtas_fadump_gpr_index(reg_id); > + if (i >= 0) > + regs->gpr[i] = (unsigned long)reg_val; > + else if (reg_id == fadump_str_to_u64("NIA")) > + regs->nip = (unsigned long)reg_val; > + else if (reg_id == fadump_str_to_u64("MSR")) > + regs->msr = (unsigned long)reg_val; > + else if (reg_id == fadump_str_to_u64("CTR")) > + regs->ctr = (unsigned long)reg_val; > + else if (reg_id == fadump_str_to_u64("LR")) > + regs->link = (unsigned long)reg_val; > + else if (reg_id == fadump_str_to_u64("XER")) > + regs->xer = (unsigned long)reg_val; > + else if (reg_id == fadump_str_to_u64("CR")) > + regs->ccr = (unsigned long)reg_val; > + else if (reg_id == fadump_str_to_u64("DAR")) > + regs->dar = (unsigned long)reg_val; > + else if (reg_id == fadump_str_to_u64("DSISR")) > + regs->dsisr = (unsigned long)reg_val; > +} > + > +static struct rtas_fadump_reg_entry* > +rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry, > + struct pt_regs *regs) > +{ > + memset(regs, 0, sizeof(struct pt_regs)); > + > + while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) { > + rtas_fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id), > + be64_to_cpu(reg_entry->reg_value)); > + reg_entry++; > + } > + reg_entry++; > + return reg_entry; > +} > + > +/* > + * Read CPU state dump data and convert it into ELF notes. > + * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be > + * used to access the data to allow for additional fields to be added without > + * affecting compatibility. Each list of registers for a CPU starts with > + * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, > + * 8 Byte ASCII identifier and 8 Byte register value. The register entry > + * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part > + * of register value. For more details refer to PAPR document. > + * > + * Only for the crashing cpu we ignore the CPU dump data and get exact > + * state from fadump crash info structure populated by first kernel at the > + * time of crash. > + */ > +static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf) > +{ > + struct rtas_fadump_reg_save_area_header *reg_header; > + struct rtas_fadump_reg_entry *reg_entry; > + struct fadump_crash_info_header *fdh = NULL; > + void *vaddr; > + unsigned long addr; > + u32 num_cpus, *note_buf; > + struct pt_regs regs; > + int i, rc = 0, cpu = 0; > + > + addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address); > + vaddr = __va(addr); > + > + reg_header = vaddr; > + if (be64_to_cpu(reg_header->magic_number) != > + fadump_str_to_u64("REGSAVE")) { > + pr_err("Unable to read register save area.\n"); > + return -ENOENT; > + } > + > + pr_debug("--------CPU State Data------------\n"); > + pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number)); > + pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset)); > + > + vaddr += be32_to_cpu(reg_header->num_cpu_offset); > + num_cpus = be32_to_cpu(*((__be32 *)(vaddr))); > + pr_debug("NumCpus : %u\n", num_cpus); > + vaddr += sizeof(u32); > + reg_entry = (struct rtas_fadump_reg_entry *)vaddr; > + > + /* Allocate buffer to hold cpu crash notes. */ > + fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); > + fadump_conf->cpu_notes_buf_size = > + PAGE_ALIGN(fadump_conf->cpu_notes_buf_size); > + note_buf = fadump_cpu_notes_buf_alloc(fadump_conf->cpu_notes_buf_size); > + if (!note_buf) { > + pr_err("Failed to allocate 0x%lx bytes for cpu notes buffer\n", > + fadump_conf->cpu_notes_buf_size); > + return -ENOMEM; > + } > + fadump_conf->cpu_notes_buf = __pa(note_buf); > + > + pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", > + (num_cpus * sizeof(note_buf_t)), note_buf); > + > + if (fadump_conf->fadumphdr_addr) > + fdh = __va(fadump_conf->fadumphdr_addr); > + > + for (i = 0; i < num_cpus; i++) { > + if (be64_to_cpu(reg_entry->reg_id) != > + fadump_str_to_u64("CPUSTRT")) { > + pr_err("Unable to read CPU state data\n"); > + rc = -ENOENT; > + goto error_out; > + } > + /* Lower 4 bytes of reg_value contains logical cpu id */ > + cpu = (be64_to_cpu(reg_entry->reg_value) & > + RTAS_FADUMP_CPU_ID_MASK); > + if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) { > + RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry); > + continue; > + } > + pr_debug("Reading register data for cpu %d...\n", cpu); > + if (fdh && fdh->crashing_cpu == cpu) { > + regs = fdh->regs; > + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); > + RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry); > + } else { > + reg_entry++; > + reg_entry = rtas_fadump_read_regs(reg_entry, ®s); > + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); > + } > + } > + final_note(note_buf); > + > + if (fdh) { > + pr_debug("Updating elfcore header (%llx) with cpu notes\n", > + fdh->elfcorehdr_addr); > + fadump_update_elfcore_header(fadump_conf, > + __va(fdh->elfcorehdr_addr)); > + } > + return 0; > + > +error_out: > + fadump_cpu_notes_buf_free((ulong)__va(fadump_conf->cpu_notes_buf), > + fadump_conf->cpu_notes_buf_size); > + fadump_conf->cpu_notes_buf = 0; > + fadump_conf->cpu_notes_buf_size = 0; > + return rc; > + > } > > /* > @@ -189,15 +396,62 @@ static int rtas_fadump_invalidate_fadump(struct fw_dump *fadump_conf) > */ > static int __init rtas_fadump_process_fadump(struct fw_dump *fadump_conf) > { > - return -EINVAL; > + struct fadump_crash_info_header *fdh; > + int rc = 0; > + > + if (!fdm_active || !fadump_conf->fadumphdr_addr) > + return -EINVAL; > + > + /* Check if the dump data is valid. */ > + if ((be16_to_cpu(fdm_active->header.dump_status_flag) == > + RTAS_FADUMP_ERROR_FLAG) || > + (fdm_active->cpu_state_data.error_flags != 0) || > + (fdm_active->rmr_region.error_flags != 0)) { > + pr_err("Dump taken by platform is not valid\n"); > + return -EINVAL; > + } > + if ((fdm_active->rmr_region.bytes_dumped != > + fdm_active->rmr_region.source_len) || > + !fdm_active->cpu_state_data.bytes_dumped) { > + pr_err("Dump taken by platform is incomplete\n"); > + return -EINVAL; > + } > + > + /* Validate the fadump crash info header */ > + fdh = __va(fadump_conf->fadumphdr_addr); > + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { > + pr_err("Crash info header is not valid.\n"); > + return -EINVAL; > + } > + > + if (!fdm_active->cpu_state_data.bytes_dumped) > + return -EINVAL; > + > + rc = rtas_fadump_build_cpu_notes(fadump_conf); > + if (rc) > + return rc; > + > + /* > + * We are done validating dump info and elfcore header is now ready > + * to be exported. set elfcorehdr_addr so that vmcore module will > + * export the elfcore header through '/proc/vmcore'. > + */ > + elfcorehdr_addr = fdh->elfcorehdr_addr; > + > + return 0; > } > > static void rtas_fadump_region_show(struct fw_dump *fadump_conf, > struct seq_file *m) > { > - const struct rtas_fadump_mem_struct *fdm_ptr = &fdm; > + const struct rtas_fadump_mem_struct *fdm_ptr; > const struct rtas_fadump_section *cpu_data_section; > > + if (fdm_active) > + fdm_ptr = fdm_active; > + else > + fdm_ptr = &fdm; > + > cpu_data_section = &(fdm_ptr->cpu_state_data); > seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", > be64_to_cpu(cpu_data_section->destination_address), > @@ -219,6 +473,12 @@ static void rtas_fadump_region_show(struct fw_dump *fadump_conf, > seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", > be64_to_cpu(fdm_ptr->rmr_region.source_len), > be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped)); > + > + /* Dump is active. Show reserved area start address. */ > + if (fdm_active) { > + seq_printf(m, "\nMemory above %#016lx is reserved for saving crash dump\n", > + fadump_conf->reserve_dump_area_start); > + } > } > > static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh, > @@ -228,6 +488,7 @@ static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh, > rtas_os_term((char *)msg); > } > > + > static struct fadump_ops rtas_fadump_ops = { > .init_fadump_mem_struct = rtas_fadump_init_mem_struct, > .register_fadump = rtas_fadump_register_fadump, > @@ -258,6 +519,17 @@ int __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, ulong node) > fadump_conf->fadump_platform = FADUMP_PLATFORM_PSERIES; > fadump_conf->fadump_supported = 1; > > + /* > + * The 'ibm,kernel-dump' rtas node is present only if there is > + * dump data waiting for us. > + */ > + fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL); > + if (fdm_active) { > + pr_info("Firmware-assisted dump is active.\n"); > + fadump_conf->dump_active = 1; > + rtas_fadump_get_config(fadump_conf, (void *)__pa(fdm_active)); > + } > + > /* Get the sizes required to store dump data for the firmware provided > * dump sections. > * For each dump section type supported, a 32bit cell which defines >
diff --git a/arch/powerpc/kernel/fadump-common.h b/arch/powerpc/kernel/fadump-common.h index 273247d..0231a0b 100644 --- a/arch/powerpc/kernel/fadump-common.h +++ b/arch/powerpc/kernel/fadump-common.h @@ -100,6 +100,12 @@ struct fw_dump { /* cmd line option during boot */ unsigned long reserve_bootvar; + /* + * Start address of preserve area. This memory is reserved + * permanently (production or capture kernel) for FADump. + */ + unsigned long preserv_area_start; + unsigned long cpu_state_data_size; unsigned long hpte_region_size; unsigned long boot_memory_size; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 650ebf8..e995db1 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -28,15 +28,12 @@ #include <asm/debugfs.h> #include <asm/page.h> #include <asm/prom.h> -#include <asm/rtas.h> #include <asm/fadump.h> #include <asm/setup.h> #include "fadump-common.h" -#include "../platforms/pseries/rtas-fadump.h" static struct fw_dump fw_dump; -static const struct rtas_fadump_mem_struct *fdm_active; static DEFINE_MUTEX(fadump_mutex); struct fad_crash_memory_ranges *crash_memory_ranges; @@ -111,22 +108,13 @@ static int __init fadump_cma_init(void) { return 1; } int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) { - int ret; - - if (depth != 1 || strcmp(uname, "rtas") != 0) + if (depth != 1) return 0; - ret = rtas_fadump_dt_scan(&fw_dump, node); + if (strcmp(uname, "rtas") == 0) + return rtas_fadump_dt_scan(&fw_dump, node); - /* - * The 'ibm,kernel-dump' rtas node is present only if there is - * dump data waiting for us. - */ - fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL); - if (fdm_active) - fw_dump.dump_active = 1; - - return ret; + return 0; } /* @@ -308,9 +296,7 @@ int __init fadump_reserve_mem(void) * If dump is active then we have already calculated the size during * first kernel. */ - if (fdm_active) - fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len); - else { + if (!fw_dump.dump_active) { fw_dump.boot_memory_size = fadump_calculate_reserve_size(); #ifdef CONFIG_CMA if (!fw_dump.nocma) @@ -320,6 +306,7 @@ int __init fadump_reserve_mem(void) #endif } + size = get_fadump_area_size(); if (memory_limit) memory_boundary = memory_limit; else @@ -346,15 +333,10 @@ int __init fadump_reserve_mem(void) size = memory_boundary - base; fadump_reserve_crash_area(base, size); - fw_dump.fadumphdr_addr = - be64_to_cpu(fdm_active->rmr_region.destination_address) + - be64_to_cpu(fdm_active->rmr_region.source_len); - pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr); + pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr); fw_dump.reserve_dump_area_start = base; fw_dump.reserve_dump_area_size = size; } else { - size = get_fadump_area_size(); - /* * Reserve memory at an offset closer to bottom of the RAM to * minimize the impact of memory hot-remove operation. We can't @@ -469,218 +451,6 @@ void crash_fadump(struct pt_regs *regs, const char *str) fw_dump.ops->fadump_trigger(fdh, str); } -#define GPR_MASK 0xffffff0000000000 -static inline int fadump_gpr_index(u64 id) -{ - int i = -1; - char str[3]; - - if ((id & GPR_MASK) == fadump_str_to_u64("GPR")) { - /* get the digits at the end */ - id &= ~GPR_MASK; - id >>= 24; - str[2] = '\0'; - str[1] = id & 0xff; - str[0] = (id >> 8) & 0xff; - sscanf(str, "%d", &i); - if (i > 31) - i = -1; - } - return i; -} - -static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id, - u64 reg_val) -{ - int i; - - i = fadump_gpr_index(reg_id); - if (i >= 0) - regs->gpr[i] = (unsigned long)reg_val; - else if (reg_id == fadump_str_to_u64("NIA")) - regs->nip = (unsigned long)reg_val; - else if (reg_id == fadump_str_to_u64("MSR")) - regs->msr = (unsigned long)reg_val; - else if (reg_id == fadump_str_to_u64("CTR")) - regs->ctr = (unsigned long)reg_val; - else if (reg_id == fadump_str_to_u64("LR")) - regs->link = (unsigned long)reg_val; - else if (reg_id == fadump_str_to_u64("XER")) - regs->xer = (unsigned long)reg_val; - else if (reg_id == fadump_str_to_u64("CR")) - regs->ccr = (unsigned long)reg_val; - else if (reg_id == fadump_str_to_u64("DAR")) - regs->dar = (unsigned long)reg_val; - else if (reg_id == fadump_str_to_u64("DSISR")) - regs->dsisr = (unsigned long)reg_val; -} - -static struct rtas_fadump_reg_entry* -fadump_read_registers(struct rtas_fadump_reg_entry *reg_entry, struct pt_regs *regs) -{ - memset(regs, 0, sizeof(struct pt_regs)); - - while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) { - fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id), - be64_to_cpu(reg_entry->reg_value)); - reg_entry++; - } - reg_entry++; - return reg_entry; -} - -/* - * Read CPU state dump data and convert it into ELF notes. - * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be - * used to access the data to allow for additional fields to be added without - * affecting compatibility. Each list of registers for a CPU starts with - * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, - * 8 Byte ASCII identifier and 8 Byte register value. The register entry - * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part - * of register value. For more details refer to PAPR document. - * - * Only for the crashing cpu we ignore the CPU dump data and get exact - * state from fadump crash info structure populated by first kernel at the - * time of crash. - */ -static int __init fadump_build_cpu_notes(const struct rtas_fadump_mem_struct *fdm) -{ - struct rtas_fadump_reg_save_area_header *reg_header; - struct rtas_fadump_reg_entry *reg_entry; - struct fadump_crash_info_header *fdh = NULL; - void *vaddr; - unsigned long addr; - u32 num_cpus, *note_buf; - struct pt_regs regs; - int i, rc = 0, cpu = 0; - - if (!fdm->cpu_state_data.bytes_dumped) - return -EINVAL; - - addr = be64_to_cpu(fdm->cpu_state_data.destination_address); - vaddr = __va(addr); - - reg_header = vaddr; - if (be64_to_cpu(reg_header->magic_number) != - fadump_str_to_u64("REGSAVE")) { - printk(KERN_ERR "Unable to read register save area.\n"); - return -ENOENT; - } - pr_debug("--------CPU State Data------------\n"); - pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number)); - pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset)); - - vaddr += be32_to_cpu(reg_header->num_cpu_offset); - num_cpus = be32_to_cpu(*((__be32 *)(vaddr))); - pr_debug("NumCpus : %u\n", num_cpus); - vaddr += sizeof(u32); - reg_entry = (struct rtas_fadump_reg_entry *)vaddr; - - /* Allocate buffer to hold cpu crash notes. */ - fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); - fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); - note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size); - if (!note_buf) { - printk(KERN_ERR "Failed to allocate 0x%lx bytes for " - "cpu notes buffer\n", fw_dump.cpu_notes_buf_size); - return -ENOMEM; - } - fw_dump.cpu_notes_buf = __pa(note_buf); - - pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", - (num_cpus * sizeof(note_buf_t)), note_buf); - - if (fw_dump.fadumphdr_addr) - fdh = __va(fw_dump.fadumphdr_addr); - - for (i = 0; i < num_cpus; i++) { - if (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUSTRT")) { - printk(KERN_ERR "Unable to read CPU state data\n"); - rc = -ENOENT; - goto error_out; - } - /* Lower 4 bytes of reg_value contains logical cpu id */ - cpu = be64_to_cpu(reg_entry->reg_value) & RTAS_FADUMP_CPU_ID_MASK; - if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) { - RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry); - continue; - } - pr_debug("Reading register data for cpu %d...\n", cpu); - if (fdh && fdh->crashing_cpu == cpu) { - regs = fdh->regs; - note_buf = fadump_regs_to_elf_notes(note_buf, ®s); - RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry); - } else { - reg_entry++; - reg_entry = fadump_read_registers(reg_entry, ®s); - note_buf = fadump_regs_to_elf_notes(note_buf, ®s); - } - } - final_note(note_buf); - - if (fdh) { - addr = fdh->elfcorehdr_addr; - pr_debug("Updating elfcore header(%lx) with cpu notes\n", addr); - fadump_update_elfcore_header(&fw_dump, (char *)__va(addr)); - } - return 0; - -error_out: - fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf), - fw_dump.cpu_notes_buf_size); - fw_dump.cpu_notes_buf = 0; - fw_dump.cpu_notes_buf_size = 0; - return rc; - -} - -/* - * Validate and process the dump data stored by firmware before exporting - * it through '/proc/vmcore'. - */ -static int __init process_fadump(const struct rtas_fadump_mem_struct *fdm_active) -{ - struct fadump_crash_info_header *fdh; - int rc = 0; - - if (!fdm_active || !fw_dump.fadumphdr_addr) - return -EINVAL; - - /* Check if the dump data is valid. */ - if ((be16_to_cpu(fdm_active->header.dump_status_flag) == RTAS_FADUMP_ERROR_FLAG) || - (fdm_active->cpu_state_data.error_flags != 0) || - (fdm_active->rmr_region.error_flags != 0)) { - printk(KERN_ERR "Dump taken by platform is not valid\n"); - return -EINVAL; - } - if ((fdm_active->rmr_region.bytes_dumped != - fdm_active->rmr_region.source_len) || - !fdm_active->cpu_state_data.bytes_dumped) { - printk(KERN_ERR "Dump taken by platform is incomplete\n"); - return -EINVAL; - } - - /* Validate the fadump crash info header */ - fdh = __va(fw_dump.fadumphdr_addr); - if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { - printk(KERN_ERR "Crash info header is not valid.\n"); - return -EINVAL; - } - - rc = fadump_build_cpu_notes(fdm_active); - if (rc) - return rc; - - /* - * We are done validating dump info and elfcore header is now ready - * to be exported. set elfcorehdr_addr so that vmcore module will - * export the elfcore header through '/proc/vmcore'. - */ - elfcorehdr_addr = fdh->elfcorehdr_addr; - - return 0; -} - static void free_crash_memory_ranges(void) { kfree(crash_memory_ranges); @@ -970,7 +740,6 @@ static unsigned long init_fadump_header(unsigned long addr) if (!addr) return 0; - fw_dump.fadumphdr_addr = addr; fdh = __va(addr); addr += sizeof(struct fadump_crash_info_header); @@ -1014,39 +783,12 @@ static int register_fadump(void) return fw_dump.ops->register_fadump(&fw_dump); } -static int fadump_invalidate_dump(const struct rtas_fadump_mem_struct *fdm) -{ - int rc = 0; - unsigned int wait_time; - - pr_debug("Invalidating firmware-assisted dump registration\n"); - - /* TODO: Add upper time limit for the delay */ - do { - rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, - FADUMP_INVALIDATE, fdm, - sizeof(struct rtas_fadump_mem_struct)); - - wait_time = rtas_busy_delay_time(rc); - if (wait_time) - mdelay(wait_time); - } while (wait_time); - - if (rc) { - pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc); - return rc; - } - fw_dump.dump_active = 0; - fdm_active = NULL; - return 0; -} - void fadump_cleanup(void) { /* Invalidate the registration only if dump is active. */ if (fw_dump.dump_active) { - /* pass the same memory dump structure provided by platform */ - fadump_invalidate_dump(fdm_active); + pr_debug("Invalidating firmware-assisted dump registration\n"); + fw_dump.ops->invalidate_fadump(&fw_dump); } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fw_dump.ops->unregister_fadump(&fw_dump); @@ -1132,7 +874,7 @@ static void fadump_invalidate_release_mem(void) return; } - destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address); + destination_address = fw_dump.preserv_area_start; fadump_cleanup(); mutex_unlock(&fadump_mutex); @@ -1158,6 +900,7 @@ static void fadump_invalidate_release_mem(void) fw_dump.cpu_notes_buf = 0; fw_dump.cpu_notes_buf_size = 0; } + /* Initialize the kernel dump memory structure for FAD registration. */ fw_dump.ops->init_fadump_mem_struct(&fw_dump); } @@ -1210,7 +953,7 @@ static ssize_t fadump_register_store(struct kobject *kobj, int ret = 0; int input = -1; - if (!fw_dump.fadump_enabled || fdm_active) + if (!fw_dump.fadump_enabled || fw_dump.dump_active) return -EPERM; if (kstrtoint(buf, 0, &input)) @@ -1223,7 +966,9 @@ static ssize_t fadump_register_store(struct kobject *kobj, if (fw_dump.dump_registered == 0) { goto unlock_out; } + /* Un-register Firmware-assisted dump */ + pr_debug("Un-register firmware-assisted dump\n"); fw_dump.ops->unregister_fadump(&fw_dump); break; case 1: @@ -1246,63 +991,13 @@ static ssize_t fadump_register_store(struct kobject *kobj, static int fadump_region_show(struct seq_file *m, void *private) { - const struct rtas_fadump_mem_struct *fdm_ptr; - if (!fw_dump.fadump_enabled) return 0; mutex_lock(&fadump_mutex); - if (fdm_active) - fdm_ptr = fdm_active; - else { - mutex_unlock(&fadump_mutex); - fw_dump.ops->fadump_region_show(&fw_dump, m); - return 0; - } + fw_dump.ops->fadump_region_show(&fw_dump, m); + mutex_unlock(&fadump_mutex); - seq_printf(m, - "CPU : [%#016llx-%#016llx] %#llx bytes, " - "Dumped: %#llx\n", - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address), - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) + - be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1, - be64_to_cpu(fdm_ptr->cpu_state_data.source_len), - be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped)); - seq_printf(m, - "HPTE: [%#016llx-%#016llx] %#llx bytes, " - "Dumped: %#llx\n", - be64_to_cpu(fdm_ptr->hpte_region.destination_address), - be64_to_cpu(fdm_ptr->hpte_region.destination_address) + - be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1, - be64_to_cpu(fdm_ptr->hpte_region.source_len), - be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped)); - seq_printf(m, - "DUMP: [%#016llx-%#016llx] %#llx bytes, " - "Dumped: %#llx\n", - be64_to_cpu(fdm_ptr->rmr_region.destination_address), - be64_to_cpu(fdm_ptr->rmr_region.destination_address) + - be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1, - be64_to_cpu(fdm_ptr->rmr_region.source_len), - be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped)); - - if (!fdm_active || - (fw_dump.reserve_dump_area_start == - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address))) - goto out; - - /* Dump is active. Show reserved memory region. */ - seq_printf(m, - " : [%#016llx-%#016llx] %#llx bytes, " - "Dumped: %#llx\n", - (unsigned long long)fw_dump.reserve_dump_area_start, - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1, - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - - fw_dump.reserve_dump_area_start, - be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - - fw_dump.reserve_dump_area_start); -out: - if (fdm_active) - mutex_unlock(&fadump_mutex); return 0; } @@ -1373,12 +1068,13 @@ int __init setup_fadump(void) * if dump process fails then invalidate the registration * and release memory before proceeding for re-registration. */ - if (process_fadump(fdm_active) < 0) + if (fw_dump.ops->process_fadump(&fw_dump) < 0) fadump_invalidate_release_mem(); } /* Initialize the kernel dump memory structure for FAD registration. */ else if (fw_dump.reserve_dump_area_size) fw_dump.ops->init_fadump_mem_struct(&fw_dump); + fadump_init_files(); return 1; diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index 790a37d..7ce84f8 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -31,6 +31,7 @@ #include "rtas-fadump.h" static struct rtas_fadump_mem_struct fdm; +static const struct rtas_fadump_mem_struct *fdm_active; static void rtas_fadump_update_config(struct fw_dump *fadump_conf, const struct rtas_fadump_mem_struct *fdm) @@ -40,6 +41,23 @@ static void rtas_fadump_update_config(struct fw_dump *fadump_conf, fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr + fadump_conf->boot_memory_size); + + /* Start address of preserve area (permanent reservation) */ + fadump_conf->preserv_area_start = + be64_to_cpu(fdm->cpu_state_data.destination_address); + pr_debug("Preserve area start address: 0x%lx\n", + fadump_conf->preserv_area_start); +} + +/* + * This function is called in the capture kernel to get configuration details + * setup in the first kernel and passed to the f/w. + */ +static void rtas_fadump_get_config(struct fw_dump *fadump_conf, + const struct rtas_fadump_mem_struct *fdm) +{ + fadump_conf->boot_memory_size = be64_to_cpu(fdm->rmr_region.source_len); + rtas_fadump_update_config(fadump_conf, fdm); } static ulong rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf) @@ -180,7 +198,196 @@ static int rtas_fadump_unregister_fadump(struct fw_dump *fadump_conf) static int rtas_fadump_invalidate_fadump(struct fw_dump *fadump_conf) { - return -EIO; + int rc; + unsigned int wait_time; + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1, + NULL, FADUMP_INVALIDATE, fdm_active, + sizeof(struct rtas_fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + } while (wait_time); + + if (rc) { + pr_err("Failed to invalidate - unexpected error (%d).\n", rc); + return -EIO; + } + + fadump_conf->dump_active = 0; + fdm_active = NULL; + return 0; +} + +#define RTAS_FADUMP_GPR_MASK 0xffffff0000000000 +static inline int rtas_fadump_gpr_index(u64 id) +{ + int i = -1; + char str[3]; + + if ((id & RTAS_FADUMP_GPR_MASK) == fadump_str_to_u64("GPR")) { + /* get the digits at the end */ + id &= ~RTAS_FADUMP_GPR_MASK; + id >>= 24; + str[2] = '\0'; + str[1] = id & 0xff; + str[0] = (id >> 8) & 0xff; + if (kstrtoint(str, 10, &i)) + i = -EINVAL; + if (i > 31) + i = -1; + } + return i; +} + +void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) +{ + int i; + + i = rtas_fadump_gpr_index(reg_id); + if (i >= 0) + regs->gpr[i] = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("NIA")) + regs->nip = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("MSR")) + regs->msr = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("CTR")) + regs->ctr = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("LR")) + regs->link = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("XER")) + regs->xer = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("CR")) + regs->ccr = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("DAR")) + regs->dar = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("DSISR")) + regs->dsisr = (unsigned long)reg_val; +} + +static struct rtas_fadump_reg_entry* +rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry, + struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + + while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) { + rtas_fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id), + be64_to_cpu(reg_entry->reg_value)); + reg_entry++; + } + reg_entry++; + return reg_entry; +} + +/* + * Read CPU state dump data and convert it into ELF notes. + * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be + * used to access the data to allow for additional fields to be added without + * affecting compatibility. Each list of registers for a CPU starts with + * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, + * 8 Byte ASCII identifier and 8 Byte register value. The register entry + * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part + * of register value. For more details refer to PAPR document. + * + * Only for the crashing cpu we ignore the CPU dump data and get exact + * state from fadump crash info structure populated by first kernel at the + * time of crash. + */ +static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf) +{ + struct rtas_fadump_reg_save_area_header *reg_header; + struct rtas_fadump_reg_entry *reg_entry; + struct fadump_crash_info_header *fdh = NULL; + void *vaddr; + unsigned long addr; + u32 num_cpus, *note_buf; + struct pt_regs regs; + int i, rc = 0, cpu = 0; + + addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address); + vaddr = __va(addr); + + reg_header = vaddr; + if (be64_to_cpu(reg_header->magic_number) != + fadump_str_to_u64("REGSAVE")) { + pr_err("Unable to read register save area.\n"); + return -ENOENT; + } + + pr_debug("--------CPU State Data------------\n"); + pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number)); + pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset)); + + vaddr += be32_to_cpu(reg_header->num_cpu_offset); + num_cpus = be32_to_cpu(*((__be32 *)(vaddr))); + pr_debug("NumCpus : %u\n", num_cpus); + vaddr += sizeof(u32); + reg_entry = (struct rtas_fadump_reg_entry *)vaddr; + + /* Allocate buffer to hold cpu crash notes. */ + fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); + fadump_conf->cpu_notes_buf_size = + PAGE_ALIGN(fadump_conf->cpu_notes_buf_size); + note_buf = fadump_cpu_notes_buf_alloc(fadump_conf->cpu_notes_buf_size); + if (!note_buf) { + pr_err("Failed to allocate 0x%lx bytes for cpu notes buffer\n", + fadump_conf->cpu_notes_buf_size); + return -ENOMEM; + } + fadump_conf->cpu_notes_buf = __pa(note_buf); + + pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", + (num_cpus * sizeof(note_buf_t)), note_buf); + + if (fadump_conf->fadumphdr_addr) + fdh = __va(fadump_conf->fadumphdr_addr); + + for (i = 0; i < num_cpus; i++) { + if (be64_to_cpu(reg_entry->reg_id) != + fadump_str_to_u64("CPUSTRT")) { + pr_err("Unable to read CPU state data\n"); + rc = -ENOENT; + goto error_out; + } + /* Lower 4 bytes of reg_value contains logical cpu id */ + cpu = (be64_to_cpu(reg_entry->reg_value) & + RTAS_FADUMP_CPU_ID_MASK); + if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) { + RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry); + continue; + } + pr_debug("Reading register data for cpu %d...\n", cpu); + if (fdh && fdh->crashing_cpu == cpu) { + regs = fdh->regs; + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); + RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry); + } else { + reg_entry++; + reg_entry = rtas_fadump_read_regs(reg_entry, ®s); + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); + } + } + final_note(note_buf); + + if (fdh) { + pr_debug("Updating elfcore header (%llx) with cpu notes\n", + fdh->elfcorehdr_addr); + fadump_update_elfcore_header(fadump_conf, + __va(fdh->elfcorehdr_addr)); + } + return 0; + +error_out: + fadump_cpu_notes_buf_free((ulong)__va(fadump_conf->cpu_notes_buf), + fadump_conf->cpu_notes_buf_size); + fadump_conf->cpu_notes_buf = 0; + fadump_conf->cpu_notes_buf_size = 0; + return rc; + } /* @@ -189,15 +396,62 @@ static int rtas_fadump_invalidate_fadump(struct fw_dump *fadump_conf) */ static int __init rtas_fadump_process_fadump(struct fw_dump *fadump_conf) { - return -EINVAL; + struct fadump_crash_info_header *fdh; + int rc = 0; + + if (!fdm_active || !fadump_conf->fadumphdr_addr) + return -EINVAL; + + /* Check if the dump data is valid. */ + if ((be16_to_cpu(fdm_active->header.dump_status_flag) == + RTAS_FADUMP_ERROR_FLAG) || + (fdm_active->cpu_state_data.error_flags != 0) || + (fdm_active->rmr_region.error_flags != 0)) { + pr_err("Dump taken by platform is not valid\n"); + return -EINVAL; + } + if ((fdm_active->rmr_region.bytes_dumped != + fdm_active->rmr_region.source_len) || + !fdm_active->cpu_state_data.bytes_dumped) { + pr_err("Dump taken by platform is incomplete\n"); + return -EINVAL; + } + + /* Validate the fadump crash info header */ + fdh = __va(fadump_conf->fadumphdr_addr); + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + pr_err("Crash info header is not valid.\n"); + return -EINVAL; + } + + if (!fdm_active->cpu_state_data.bytes_dumped) + return -EINVAL; + + rc = rtas_fadump_build_cpu_notes(fadump_conf); + if (rc) + return rc; + + /* + * We are done validating dump info and elfcore header is now ready + * to be exported. set elfcorehdr_addr so that vmcore module will + * export the elfcore header through '/proc/vmcore'. + */ + elfcorehdr_addr = fdh->elfcorehdr_addr; + + return 0; } static void rtas_fadump_region_show(struct fw_dump *fadump_conf, struct seq_file *m) { - const struct rtas_fadump_mem_struct *fdm_ptr = &fdm; + const struct rtas_fadump_mem_struct *fdm_ptr; const struct rtas_fadump_section *cpu_data_section; + if (fdm_active) + fdm_ptr = fdm_active; + else + fdm_ptr = &fdm; + cpu_data_section = &(fdm_ptr->cpu_state_data); seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", be64_to_cpu(cpu_data_section->destination_address), @@ -219,6 +473,12 @@ static void rtas_fadump_region_show(struct fw_dump *fadump_conf, seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", be64_to_cpu(fdm_ptr->rmr_region.source_len), be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped)); + + /* Dump is active. Show reserved area start address. */ + if (fdm_active) { + seq_printf(m, "\nMemory above %#016lx is reserved for saving crash dump\n", + fadump_conf->reserve_dump_area_start); + } } static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh, @@ -228,6 +488,7 @@ static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh, rtas_os_term((char *)msg); } + static struct fadump_ops rtas_fadump_ops = { .init_fadump_mem_struct = rtas_fadump_init_mem_struct, .register_fadump = rtas_fadump_register_fadump, @@ -258,6 +519,17 @@ int __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, ulong node) fadump_conf->fadump_platform = FADUMP_PLATFORM_PSERIES; fadump_conf->fadump_supported = 1; + /* + * The 'ibm,kernel-dump' rtas node is present only if there is + * dump data waiting for us. + */ + fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL); + if (fdm_active) { + pr_info("Firmware-assisted dump is active.\n"); + fadump_conf->dump_active = 1; + rtas_fadump_get_config(fadump_conf, (void *)__pa(fdm_active)); + } + /* Get the sizes required to store dump data for the firmware provided * dump sections. * For each dump section type supported, a 32bit cell which defines
Move code that supports processing the crash'ed kernel's memory preserved by firmware to platform specific callback functions. Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> --- arch/powerpc/kernel/fadump-common.h | 6 arch/powerpc/kernel/fadump.c | 340 +------------------------- arch/powerpc/platforms/pseries/rtas-fadump.c | 278 +++++++++++++++++++++ 3 files changed, 299 insertions(+), 325 deletions(-)