Message ID | 20211124095500.98656-2-ganeshgr@linux.ibm.com (mailing list archive) |
---|---|
State | Rejected, archived |
Headers | show |
Series | [v3,1/2] powerpc/mce: Avoid using irq_work_queue() in realmode | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/github-powerpc_selftests | success | Successfully ran 8 jobs. |
snowpatch_ozlabs/github-powerpc_ppctests | success | Successfully ran 8 jobs. |
snowpatch_ozlabs/github-powerpc_kernel_qemu | success | Successfully ran 24 jobs. |
snowpatch_ozlabs/github-powerpc_sparse | success | Successfully ran 4 jobs. |
snowpatch_ozlabs/github-powerpc_clang | success | Successfully ran 7 jobs. |
Excerpts from Ganesh Goudar's message of November 24, 2021 7:55 pm: > Now that we are no longer switching on the mmu in realmode > mce handler, Revert the commit 4ff753feab02("powerpc/pseries: > Avoid using addr_to_pfn in real mode") partially, which > introduced functions mce_handle_err_virtmode/realmode() to > separate mce handler code which needed translation to enabled. > > Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> > --- > arch/powerpc/platforms/pseries/ras.c | 122 +++++++++++---------------- > 1 file changed, 49 insertions(+), 73 deletions(-) > > diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c > index 8613f9cc5798..62e1519b8355 100644 > --- a/arch/powerpc/platforms/pseries/ras.c > +++ b/arch/powerpc/platforms/pseries/ras.c > @@ -511,58 +511,17 @@ int pSeries_system_reset_exception(struct pt_regs *regs) > return 0; /* need to perform reset */ > } > > -static int mce_handle_err_realmode(int disposition, u8 error_type) > -{ > -#ifdef CONFIG_PPC_BOOK3S_64 > - if (disposition == RTAS_DISP_NOT_RECOVERED) { > - switch (error_type) { > - case MC_ERROR_TYPE_ERAT: > - flush_erat(); > - disposition = RTAS_DISP_FULLY_RECOVERED; > - break; > - case MC_ERROR_TYPE_SLB: > - /* > - * Store the old slb content in paca before flushing. > - * Print this when we go to virtual mode. > - * There are chances that we may hit MCE again if there > - * is a parity error on the SLB entry we trying to read > - * for saving. Hence limit the slb saving to single > - * level of recursion. > - */ > - if (local_paca->in_mce == 1) > - slb_save_contents(local_paca->mce_faulty_slbs); > - flush_and_reload_slb(); > - disposition = RTAS_DISP_FULLY_RECOVERED; > - break; > - default: > - break; > - } > - } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { > - /* Platform corrected itself but could be degraded */ > - pr_err("MCE: limited recovery, system may be degraded\n"); > - disposition = RTAS_DISP_FULLY_RECOVERED; > - } > -#endif > - return disposition; > -} > - > -static int mce_handle_err_virtmode(struct pt_regs *regs, > - struct rtas_error_log *errp, > - struct pseries_mc_errorlog *mce_log, > - int disposition) > +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) > { > struct mce_error_info mce_err = { 0 }; > + unsigned long eaddr = 0, paddr = 0; > + struct pseries_errorlog *pseries_log; > + struct pseries_mc_errorlog *mce_log; > + int disposition = rtas_error_disposition(errp); > int initiator = rtas_error_initiator(errp); > int severity = rtas_error_severity(errp); > - unsigned long eaddr = 0, paddr = 0; > u8 error_type, err_sub_type; > > - if (!mce_log) > - goto out; > - > - error_type = mce_log->error_type; > - err_sub_type = rtas_mc_error_sub_type(mce_log); > - > if (initiator == RTAS_INITIATOR_UNKNOWN) > mce_err.initiator = MCE_INITIATOR_UNKNOWN; > else if (initiator == RTAS_INITIATOR_CPU) > @@ -588,6 +547,8 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, > mce_err.severity = MCE_SEV_SEVERE; > else if (severity == RTAS_SEVERITY_ERROR) > mce_err.severity = MCE_SEV_SEVERE; > + else if (severity == RTAS_SEVERITY_FATAL) > + mce_err.severity = MCE_SEV_FATAL; > else > mce_err.severity = MCE_SEV_FATAL; > What's this hunk for? > @@ -599,7 +560,18 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, > mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; > mce_err.error_class = MCE_ECLASS_UNKNOWN; > > - switch (error_type) { > + if (!rtas_error_extended(errp)) > + goto out; > + > + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); > + if (!pseries_log) > + goto out; > + > + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; > + error_type = mce_log->error_type; > + err_sub_type = rtas_mc_error_sub_type(mce_log); > + > + switch (mce_log->error_type) { > case MC_ERROR_TYPE_UE: > mce_err.error_type = MCE_ERROR_TYPE_UE; > mce_common_process_ue(regs, &mce_err); > @@ -692,41 +664,45 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, > mce_err.error_type = MCE_ERROR_TYPE_DCACHE; > break; > case MC_ERROR_TYPE_I_CACHE: > - mce_err.error_type = MCE_ERROR_TYPE_ICACHE; > + mce_err.error_type = MCE_ERROR_TYPE_DCACHE; > break; And this one. Doesn't look right. > case MC_ERROR_TYPE_UNKNOWN: > default: > mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; > break; > } > + > +#ifdef CONFIG_PPC_BOOK3S_64 > + if (disposition == RTAS_DISP_NOT_RECOVERED) { > + switch (error_type) { > + case MC_ERROR_TYPE_SLB: > + case MC_ERROR_TYPE_ERAT: > + /* > + * Store the old slb content in paca before flushing. > + * Print this when we go to virtual mode. > + * There are chances that we may hit MCE again if there > + * is a parity error on the SLB entry we trying to read > + * for saving. Hence limit the slb saving to single > + * level of recursion. > + */ > + if (local_paca->in_mce == 1) > + slb_save_contents(local_paca->mce_faulty_slbs); > + flush_and_reload_slb(); > + disposition = RTAS_DISP_FULLY_RECOVERED; > + break; > + default: > + break; > + } > + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { > + /* Platform corrected itself but could be degraded */ > + pr_err("MCE: limited recovery, system may be degraded\n"); > + disposition = RTAS_DISP_FULLY_RECOVERED; > + } I would prefer if you just keep the mce_handle_err_realmode function (can rename it if you want). It's actually changed a bit since the patch being reverted so we don't want to undo that. Thanks, Nick
On 11/24/21 18:40, Nicholas Piggin wrote: > Excerpts from Ganesh Goudar's message of November 24, 2021 7:55 pm: >> Now that we are no longer switching on the mmu in realmode >> mce handler, Revert the commit 4ff753feab02("powerpc/pseries: >> Avoid using addr_to_pfn in real mode") partially, which >> introduced functions mce_handle_err_virtmode/realmode() to >> separate mce handler code which needed translation to enabled. >> >> Signed-off-by: Ganesh Goudar<ganeshgr@linux.ibm.com> >> --- >> arch/powerpc/platforms/pseries/ras.c | 122 +++++++++++---------------- >> 1 file changed, 49 insertions(+), 73 deletions(-) >> >> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c >> index 8613f9cc5798..62e1519b8355 100644 >> --- a/arch/powerpc/platforms/pseries/ras.c >> +++ b/arch/powerpc/platforms/pseries/ras.c >> @@ -511,58 +511,17 @@ int pSeries_system_reset_exception(struct pt_regs *regs) >> return 0; /* need to perform reset */ >> } >> >> -static int mce_handle_err_realmode(int disposition, u8 error_type) >> -{ >> -#ifdef CONFIG_PPC_BOOK3S_64 >> - if (disposition == RTAS_DISP_NOT_RECOVERED) { >> - switch (error_type) { >> - case MC_ERROR_TYPE_ERAT: >> - flush_erat(); >> - disposition = RTAS_DISP_FULLY_RECOVERED; >> - break; >> - case MC_ERROR_TYPE_SLB: >> - /* >> - * Store the old slb content in paca before flushing. >> - * Print this when we go to virtual mode. >> - * There are chances that we may hit MCE again if there >> - * is a parity error on the SLB entry we trying to read >> - * for saving. Hence limit the slb saving to single >> - * level of recursion. >> - */ >> - if (local_paca->in_mce == 1) >> - slb_save_contents(local_paca->mce_faulty_slbs); >> - flush_and_reload_slb(); >> - disposition = RTAS_DISP_FULLY_RECOVERED; >> - break; >> - default: >> - break; >> - } >> - } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { >> - /* Platform corrected itself but could be degraded */ >> - pr_err("MCE: limited recovery, system may be degraded\n"); >> - disposition = RTAS_DISP_FULLY_RECOVERED; >> - } >> -#endif >> - return disposition; >> -} >> - >> -static int mce_handle_err_virtmode(struct pt_regs *regs, >> - struct rtas_error_log *errp, >> - struct pseries_mc_errorlog *mce_log, >> - int disposition) >> +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) >> { >> struct mce_error_info mce_err = { 0 }; >> + unsigned long eaddr = 0, paddr = 0; >> + struct pseries_errorlog *pseries_log; >> + struct pseries_mc_errorlog *mce_log; >> + int disposition = rtas_error_disposition(errp); >> int initiator = rtas_error_initiator(errp); >> int severity = rtas_error_severity(errp); >> - unsigned long eaddr = 0, paddr = 0; >> u8 error_type, err_sub_type; >> >> - if (!mce_log) >> - goto out; >> - >> - error_type = mce_log->error_type; >> - err_sub_type = rtas_mc_error_sub_type(mce_log); >> - >> if (initiator == RTAS_INITIATOR_UNKNOWN) >> mce_err.initiator = MCE_INITIATOR_UNKNOWN; >> else if (initiator == RTAS_INITIATOR_CPU) >> @@ -588,6 +547,8 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, >> mce_err.severity = MCE_SEV_SEVERE; >> else if (severity == RTAS_SEVERITY_ERROR) >> mce_err.severity = MCE_SEV_SEVERE; >> + else if (severity == RTAS_SEVERITY_FATAL) >> + mce_err.severity = MCE_SEV_FATAL; >> else >> mce_err.severity = MCE_SEV_FATAL; >> > What's this hunk for? > >> @@ -599,7 +560,18 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, >> mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; >> mce_err.error_class = MCE_ECLASS_UNKNOWN; >> >> - switch (error_type) { >> + if (!rtas_error_extended(errp)) >> + goto out; >> + >> + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); >> + if (!pseries_log) >> + goto out; >> + >> + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; >> + error_type = mce_log->error_type; >> + err_sub_type = rtas_mc_error_sub_type(mce_log); >> + >> + switch (mce_log->error_type) { >> case MC_ERROR_TYPE_UE: >> mce_err.error_type = MCE_ERROR_TYPE_UE; >> mce_common_process_ue(regs, &mce_err); >> @@ -692,41 +664,45 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, >> mce_err.error_type = MCE_ERROR_TYPE_DCACHE; >> break; >> case MC_ERROR_TYPE_I_CACHE: >> - mce_err.error_type = MCE_ERROR_TYPE_ICACHE; >> + mce_err.error_type = MCE_ERROR_TYPE_DCACHE; >> break; > And this one. Doesn't look right. > >> case MC_ERROR_TYPE_UNKNOWN: >> default: >> mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; >> break; >> } >> + >> +#ifdef CONFIG_PPC_BOOK3S_64 >> + if (disposition == RTAS_DISP_NOT_RECOVERED) { >> + switch (error_type) { >> + case MC_ERROR_TYPE_SLB: >> + case MC_ERROR_TYPE_ERAT: >> + /* >> + * Store the old slb content in paca before flushing. >> + * Print this when we go to virtual mode. >> + * There are chances that we may hit MCE again if there >> + * is a parity error on the SLB entry we trying to read >> + * for saving. Hence limit the slb saving to single >> + * level of recursion. >> + */ >> + if (local_paca->in_mce == 1) >> + slb_save_contents(local_paca->mce_faulty_slbs); >> + flush_and_reload_slb(); >> + disposition = RTAS_DISP_FULLY_RECOVERED; >> + break; >> + default: >> + break; >> + } >> + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { >> + /* Platform corrected itself but could be degraded */ >> + pr_err("MCE: limited recovery, system may be degraded\n"); >> + disposition = RTAS_DISP_FULLY_RECOVERED; >> + } > I would prefer if you just keep the mce_handle_err_realmode function > (can rename it if you want). It's actually changed a bit since the > patch being reverted so we don't want to undo that. Ok, I will leave it as is for now, we can change it later. > > Thanks, > Nick
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 8613f9cc5798..62e1519b8355 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -511,58 +511,17 @@ int pSeries_system_reset_exception(struct pt_regs *regs) return 0; /* need to perform reset */ } -static int mce_handle_err_realmode(int disposition, u8 error_type) -{ -#ifdef CONFIG_PPC_BOOK3S_64 - if (disposition == RTAS_DISP_NOT_RECOVERED) { - switch (error_type) { - case MC_ERROR_TYPE_ERAT: - flush_erat(); - disposition = RTAS_DISP_FULLY_RECOVERED; - break; - case MC_ERROR_TYPE_SLB: - /* - * Store the old slb content in paca before flushing. - * Print this when we go to virtual mode. - * There are chances that we may hit MCE again if there - * is a parity error on the SLB entry we trying to read - * for saving. Hence limit the slb saving to single - * level of recursion. - */ - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - flush_and_reload_slb(); - disposition = RTAS_DISP_FULLY_RECOVERED; - break; - default: - break; - } - } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { - /* Platform corrected itself but could be degraded */ - pr_err("MCE: limited recovery, system may be degraded\n"); - disposition = RTAS_DISP_FULLY_RECOVERED; - } -#endif - return disposition; -} - -static int mce_handle_err_virtmode(struct pt_regs *regs, - struct rtas_error_log *errp, - struct pseries_mc_errorlog *mce_log, - int disposition) +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) { struct mce_error_info mce_err = { 0 }; + unsigned long eaddr = 0, paddr = 0; + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log; + int disposition = rtas_error_disposition(errp); int initiator = rtas_error_initiator(errp); int severity = rtas_error_severity(errp); - unsigned long eaddr = 0, paddr = 0; u8 error_type, err_sub_type; - if (!mce_log) - goto out; - - error_type = mce_log->error_type; - err_sub_type = rtas_mc_error_sub_type(mce_log); - if (initiator == RTAS_INITIATOR_UNKNOWN) mce_err.initiator = MCE_INITIATOR_UNKNOWN; else if (initiator == RTAS_INITIATOR_CPU) @@ -588,6 +547,8 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.severity = MCE_SEV_SEVERE; else if (severity == RTAS_SEVERITY_ERROR) mce_err.severity = MCE_SEV_SEVERE; + else if (severity == RTAS_SEVERITY_FATAL) + mce_err.severity = MCE_SEV_FATAL; else mce_err.severity = MCE_SEV_FATAL; @@ -599,7 +560,18 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; mce_err.error_class = MCE_ECLASS_UNKNOWN; - switch (error_type) { + if (!rtas_error_extended(errp)) + goto out; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (!pseries_log) + goto out; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + error_type = mce_log->error_type; + err_sub_type = rtas_mc_error_sub_type(mce_log); + + switch (mce_log->error_type) { case MC_ERROR_TYPE_UE: mce_err.error_type = MCE_ERROR_TYPE_UE; mce_common_process_ue(regs, &mce_err); @@ -692,41 +664,45 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.error_type = MCE_ERROR_TYPE_DCACHE; break; case MC_ERROR_TYPE_I_CACHE: - mce_err.error_type = MCE_ERROR_TYPE_ICACHE; + mce_err.error_type = MCE_ERROR_TYPE_DCACHE; break; case MC_ERROR_TYPE_UNKNOWN: default: mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; break; } + +#ifdef CONFIG_PPC_BOOK3S_64 + if (disposition == RTAS_DISP_NOT_RECOVERED) { + switch (error_type) { + case MC_ERROR_TYPE_SLB: + case MC_ERROR_TYPE_ERAT: + /* + * Store the old slb content in paca before flushing. + * Print this when we go to virtual mode. + * There are chances that we may hit MCE again if there + * is a parity error on the SLB entry we trying to read + * for saving. Hence limit the slb saving to single + * level of recursion. + */ + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + flush_and_reload_slb(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + default: + break; + } + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { + /* Platform corrected itself but could be degraded */ + pr_err("MCE: limited recovery, system may be degraded\n"); + disposition = RTAS_DISP_FULLY_RECOVERED; + } +#endif out: save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, - &mce_err, regs->nip, eaddr, paddr); - return disposition; -} + &mce_err, regs->nip, eaddr, paddr); -static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) -{ - struct pseries_errorlog *pseries_log; - struct pseries_mc_errorlog *mce_log = NULL; - int disposition = rtas_error_disposition(errp); - unsigned long msr; - u8 error_type; - - if (!rtas_error_extended(errp)) - goto out; - - pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); - if (!pseries_log) - goto out; - - mce_log = (struct pseries_mc_errorlog *)pseries_log->data; - error_type = mce_log->error_type; - - disposition = mce_handle_err_realmode(disposition, error_type); -out: - disposition = mce_handle_err_virtmode(regs, errp, mce_log, - disposition); return disposition; }
Now that we are no longer switching on the mmu in realmode mce handler, Revert the commit 4ff753feab02("powerpc/pseries: Avoid using addr_to_pfn in real mode") partially, which introduced functions mce_handle_err_virtmode/realmode() to separate mce handler code which needed translation to enabled. Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> --- arch/powerpc/platforms/pseries/ras.c | 122 +++++++++++---------------- 1 file changed, 49 insertions(+), 73 deletions(-)