Message ID | 20150206025417.21378.22759.stgit@thinktux.in.ibm.com |
---|---|
State | Accepted |
Headers | show |
On 02/06/2015 08:24 AM, Ananth N Mavinakayanahalli wrote: > We currently just log an error when we don't find an active PSI link > 15 minutes after it went down. Add a PEL log, with sufficient severity > so it gets pushed to the administrator. > > V2: > Reset the timeout correctly to prevent error log flooding. > > Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Tested-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com> -Vasant > --- > hw/psi.c | 12 +++++++++--- > include/errorlog.h | 1 + > 2 files changed, 10 insertions(+), 3 deletions(-) > > diff --git a/hw/psi.c b/hw/psi.c > index 70403fd..b4804ff 100644 > --- a/hw/psi.c > +++ b/hw/psi.c > @@ -31,6 +31,7 @@ > #include <i2c.h> > #include <timebase.h> > #include <platform.h> > +#include <errorlog.h> > > static LIST_HEAD(psis); > static u64 psi_link_timer; > @@ -43,6 +44,10 @@ static void psi_activate_phb(struct psi *psi); > > static struct lock psi_lock = LOCK_UNLOCKED; > > +DEFINE_LOG_ENTRY(OPAL_RC_PSI_TIMEOUT, OPAL_PLATFORM_ERR_EVT, OPAL_PSI, > + OPAL_PLATFORM_FIRMWARE, > + OPAL_UNRECOVERABLE_ERR_LOSS_OF_FUNCTION, OPAL_NA, NULL); > + > void psi_set_link_polling(bool active) > { > printf("PSI: %sing link polling\n", > @@ -200,9 +205,10 @@ static void psi_link_poll(void *data __unused) > now + secs_to_tb(PSI_LINK_RECOVERY_TIMEOUT); > > if (tb_compare(now, psi_link_timeout) == TB_AAFTERB) { > - prerror("PSI: Timed out looking for a PSI link\n"); > - > - /* Log error to the host from here */ > + log_simple_error(&e_info(OPAL_RC_PSI_TIMEOUT), > + "PSI: Link timeout -- loss of FSP\n"); > + /* Reset the link timeout and continue looking */ > + psi_link_timeout = 0; > } > > /* Poll every 10 seconds */ > diff --git a/include/errorlog.h b/include/errorlog.h > index 7879b7b..1bcc03e 100644 > --- a/include/errorlog.h > +++ b/include/errorlog.h > @@ -268,6 +268,7 @@ enum opal_reasoncode { > /* PSI */ > OPAL_RC_PSI_INIT = OPAL_PS | 0x10, > OPAL_RC_PSI_IRQ_RESET = OPAL_PS | 0x11, > + OPAL_RC_PSI_TIMEOUT = OPAL_PS | 0X12, > /* XSCOM */ > OPAL_RC_XSCOM_RW = OPAL_XS | 0x10, > OPAL_RC_XSCOM_INDIRECT_RW = OPAL_XS | 0x11, > > _______________________________________________ > Skiboot mailing list > Skiboot@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/skiboot >
diff --git a/hw/psi.c b/hw/psi.c index 70403fd..b4804ff 100644 --- a/hw/psi.c +++ b/hw/psi.c @@ -31,6 +31,7 @@ #include <i2c.h> #include <timebase.h> #include <platform.h> +#include <errorlog.h> static LIST_HEAD(psis); static u64 psi_link_timer; @@ -43,6 +44,10 @@ static void psi_activate_phb(struct psi *psi); static struct lock psi_lock = LOCK_UNLOCKED; +DEFINE_LOG_ENTRY(OPAL_RC_PSI_TIMEOUT, OPAL_PLATFORM_ERR_EVT, OPAL_PSI, + OPAL_PLATFORM_FIRMWARE, + OPAL_UNRECOVERABLE_ERR_LOSS_OF_FUNCTION, OPAL_NA, NULL); + void psi_set_link_polling(bool active) { printf("PSI: %sing link polling\n", @@ -200,9 +205,10 @@ static void psi_link_poll(void *data __unused) now + secs_to_tb(PSI_LINK_RECOVERY_TIMEOUT); if (tb_compare(now, psi_link_timeout) == TB_AAFTERB) { - prerror("PSI: Timed out looking for a PSI link\n"); - - /* Log error to the host from here */ + log_simple_error(&e_info(OPAL_RC_PSI_TIMEOUT), + "PSI: Link timeout -- loss of FSP\n"); + /* Reset the link timeout and continue looking */ + psi_link_timeout = 0; } /* Poll every 10 seconds */ diff --git a/include/errorlog.h b/include/errorlog.h index 7879b7b..1bcc03e 100644 --- a/include/errorlog.h +++ b/include/errorlog.h @@ -268,6 +268,7 @@ enum opal_reasoncode { /* PSI */ OPAL_RC_PSI_INIT = OPAL_PS | 0x10, OPAL_RC_PSI_IRQ_RESET = OPAL_PS | 0x11, + OPAL_RC_PSI_TIMEOUT = OPAL_PS | 0X12, /* XSCOM */ OPAL_RC_XSCOM_RW = OPAL_XS | 0x10, OPAL_RC_XSCOM_INDIRECT_RW = OPAL_XS | 0x11,
We currently just log an error when we don't find an active PSI link 15 minutes after it went down. Add a PEL log, with sufficient severity so it gets pushed to the administrator. V2: Reset the timeout correctly to prevent error log flooding. Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com> --- hw/psi.c | 12 +++++++++--- include/errorlog.h | 1 + 2 files changed, 10 insertions(+), 3 deletions(-)