Message ID | 1470348870-18714-1-git-send-email-vipin@linux.vnet.ibm.com |
---|---|
State | Superseded |
Headers | show |
Please refer v2 for this. It removes a duplicate mfg mode detection placed under lpc_init. On Friday 05 August 2016 03:44 AM, Vipin K Parashar wrote: > High volume of SYNC errors onto LPC bus cause degraded system > performance and are likely due to bad hardware present onto system. > Thus once LPC SYNC errors cross a certain threshold, OPAL should log > them onto BMC as unrecoverable errors in manufacturing mode. This > will help manufacturing screen bad parts, causing such errors. > > Cc: stable > Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com> > --- > core/platform.c | 8 +++++++- > hw/lpc.c | 32 +++++++++++++++++++++++++++----- > include/errorlog.h | 1 + > include/platform.h | 2 ++ > 4 files changed, 37 insertions(+), 6 deletions(-) > > diff --git a/core/platform.c b/core/platform.c > index de6e406..9730f8d 100644 > --- a/core/platform.c > +++ b/core/platform.c > @@ -24,6 +24,7 @@ > #include <xscom.h> > #include <errorlog.h> > > +bool mfg_mode; > struct platform platform; > > DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC, > @@ -124,8 +125,13 @@ void probe_platform(void) > struct platform *platforms = &__platforms_start; > unsigned int i; > > - platform = generic_platform; > + /* Detect Manufacturing mode */ > + if (dt_find_property(dt_root, "ibm,manufacturing-mode")) { > + printf("PLAT: Manufacturing mode ON\n"); > + mfg_mode = true; > + } > > + platform = generic_platform; > for (i = 0; &platforms[i] < &__platforms_end; i++) { > if (platforms[i].probe && platforms[i].probe()) { > platform = platforms[i]; > diff --git a/hw/lpc.c b/hw/lpc.c > index 32cb7b1..4b76b4d 100644 > --- a/hw/lpc.c > +++ b/hw/lpc.c > @@ -25,6 +25,7 @@ > #include <timebase.h> > #include <errorlog.h> > #include <opal-api.h> > +#include <platform.h> > > //#define DBG_IRQ(fmt...) prerror(fmt) > #define DBG_IRQ(fmt...) do { } while(0) > @@ -41,6 +42,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC, OPAL_PLATFORM_ERR_EVT, OPAL_LPC, > OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL, > OPAL_NA); > > +DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC_PERF, OPAL_PLATFORM_ERR_EVT, OPAL_LPC, > + OPAL_MISC_SUBSYSTEM, OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF, > + OPAL_NA); > + > #define ECCB_CTL 0 /* b0020 -> b00200 */ > #define ECCB_STAT 2 /* b0022 -> b00210 */ > #define ECCB_DATA 3 /* b0023 -> b00218 */ > @@ -110,6 +115,9 @@ DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC, OPAL_PLATFORM_ERR_EVT, OPAL_LPC, > LPC_HC_IRQ_BM_TAR_ERR) > #define LPC_HC_ERROR_ADDRESS 0x40 > > + > +#define LPC_BUS_DEGRADED_PERF_THRESHOLD 5 > + > struct lpc_client_entry { > struct list_node node; > const struct lpc_client *clt; > @@ -662,8 +670,10 @@ static void lpc_dispatch_reset(struct proc_chip *chip) > static void lpc_dispatch_err_irqs(struct proc_chip *chip, uint32_t irqs) > { > int rc; > + struct opal_err_info *info; > const char *sync_err = "Unknown LPC error"; > uint32_t err_addr; > + static int lpc_bus_err_count; > > /* Write back to clear error interrupts, we clear SerIRQ later > * as they are handled as level interrupts > @@ -690,13 +700,19 @@ static void lpc_dispatch_err_irqs(struct proc_chip *chip, uint32_t irqs) > > rc = opb_read(chip, lpc_reg_opb_base + LPC_HC_ERROR_ADDRESS, > &err_addr, 4); > + > + lpc_bus_err_count++; > + if (mfg_mode && (lpc_bus_err_count > LPC_BUS_DEGRADED_PERF_THRESHOLD)) > + info = &e_info(OPAL_RC_LPC_SYNC_PERF); > + else > + info = &e_info(OPAL_RC_LPC_SYNC); > + > if (rc) > - log_simple_error(&e_info(OPAL_RC_LPC_SYNC), "%s " > - "Error address: Unknown\n", sync_err); > + log_simple_error(info, "%s Error address: Unknown\n", > + sync_err); > else > - log_simple_error(&e_info(OPAL_RC_LPC_SYNC), "%s " > - "Error address: 0x%08x\n", > - sync_err, err_addr); > + log_simple_error(info, "%s Error address: 0x%08x\n", > + sync_err, err_addr); > } > > static void lpc_dispatch_ser_irqs(struct proc_chip *chip, uint32_t irqs, > @@ -869,6 +885,12 @@ void lpc_init(void) > prlog(PR_NOTICE, "Default bus on chip %d\n", > lpc_default_chip_id); > > + /* Detect Manufacturing mode */ > + if (dt_find_property(dt_root, "ibm,manufacturing-mode")) { > + prlog(PR_INFO, "Manufacturing mode ON\n"); > + mfg_mode = true; > + } > + > if (has_lpc) { > opal_register(OPAL_LPC_WRITE, opal_lpc_write, 5); > opal_register(OPAL_LPC_READ, opal_lpc_read, 5); > diff --git a/include/errorlog.h b/include/errorlog.h > index f89eac9..247198b 100644 > --- a/include/errorlog.h > +++ b/include/errorlog.h > @@ -266,6 +266,7 @@ enum opal_reasoncode { > OPAL_RC_LPC_READ = OPAL_SRC_COMPONENT_LPC | 0x10, > OPAL_RC_LPC_WRITE = OPAL_SRC_COMPONENT_LPC | 0x11, > OPAL_RC_LPC_SYNC = OPAL_SRC_COMPONENT_LPC | 0x12, > + OPAL_RC_LPC_SYNC_PERF = OPAL_SRC_COMPONENT_LPC | 0x13, > /* OP_PANEL */ > OPAL_RC_PANEL_WRITE = OPAL_SRC_COMPONENT_OP_PANEL | 0x10, > /* PSI */ > diff --git a/include/platform.h b/include/platform.h > index 062a941..a2c2fee 100644 > --- a/include/platform.h > +++ b/include/platform.h > @@ -175,6 +175,8 @@ extern struct platform __platforms_end; > > extern struct platform platform; > > +extern bool mfg_mode; > + > #define DECLARE_PLATFORM(name)\ > static const struct platform __used __section(".platforms") name ##_platform >
diff --git a/core/platform.c b/core/platform.c index de6e406..9730f8d 100644 --- a/core/platform.c +++ b/core/platform.c @@ -24,6 +24,7 @@ #include <xscom.h> #include <errorlog.h> +bool mfg_mode; struct platform platform; DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC, @@ -124,8 +125,13 @@ void probe_platform(void) struct platform *platforms = &__platforms_start; unsigned int i; - platform = generic_platform; + /* Detect Manufacturing mode */ + if (dt_find_property(dt_root, "ibm,manufacturing-mode")) { + printf("PLAT: Manufacturing mode ON\n"); + mfg_mode = true; + } + platform = generic_platform; for (i = 0; &platforms[i] < &__platforms_end; i++) { if (platforms[i].probe && platforms[i].probe()) { platform = platforms[i]; diff --git a/hw/lpc.c b/hw/lpc.c index 32cb7b1..4b76b4d 100644 --- a/hw/lpc.c +++ b/hw/lpc.c @@ -25,6 +25,7 @@ #include <timebase.h> #include <errorlog.h> #include <opal-api.h> +#include <platform.h> //#define DBG_IRQ(fmt...) prerror(fmt) #define DBG_IRQ(fmt...) do { } while(0) @@ -41,6 +42,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC, OPAL_PLATFORM_ERR_EVT, OPAL_LPC, OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL, OPAL_NA); +DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC_PERF, OPAL_PLATFORM_ERR_EVT, OPAL_LPC, + OPAL_MISC_SUBSYSTEM, OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF, + OPAL_NA); + #define ECCB_CTL 0 /* b0020 -> b00200 */ #define ECCB_STAT 2 /* b0022 -> b00210 */ #define ECCB_DATA 3 /* b0023 -> b00218 */ @@ -110,6 +115,9 @@ DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC, OPAL_PLATFORM_ERR_EVT, OPAL_LPC, LPC_HC_IRQ_BM_TAR_ERR) #define LPC_HC_ERROR_ADDRESS 0x40 + +#define LPC_BUS_DEGRADED_PERF_THRESHOLD 5 + struct lpc_client_entry { struct list_node node; const struct lpc_client *clt; @@ -662,8 +670,10 @@ static void lpc_dispatch_reset(struct proc_chip *chip) static void lpc_dispatch_err_irqs(struct proc_chip *chip, uint32_t irqs) { int rc; + struct opal_err_info *info; const char *sync_err = "Unknown LPC error"; uint32_t err_addr; + static int lpc_bus_err_count; /* Write back to clear error interrupts, we clear SerIRQ later * as they are handled as level interrupts @@ -690,13 +700,19 @@ static void lpc_dispatch_err_irqs(struct proc_chip *chip, uint32_t irqs) rc = opb_read(chip, lpc_reg_opb_base + LPC_HC_ERROR_ADDRESS, &err_addr, 4); + + lpc_bus_err_count++; + if (mfg_mode && (lpc_bus_err_count > LPC_BUS_DEGRADED_PERF_THRESHOLD)) + info = &e_info(OPAL_RC_LPC_SYNC_PERF); + else + info = &e_info(OPAL_RC_LPC_SYNC); + if (rc) - log_simple_error(&e_info(OPAL_RC_LPC_SYNC), "%s " - "Error address: Unknown\n", sync_err); + log_simple_error(info, "%s Error address: Unknown\n", + sync_err); else - log_simple_error(&e_info(OPAL_RC_LPC_SYNC), "%s " - "Error address: 0x%08x\n", - sync_err, err_addr); + log_simple_error(info, "%s Error address: 0x%08x\n", + sync_err, err_addr); } static void lpc_dispatch_ser_irqs(struct proc_chip *chip, uint32_t irqs, @@ -869,6 +885,12 @@ void lpc_init(void) prlog(PR_NOTICE, "Default bus on chip %d\n", lpc_default_chip_id); + /* Detect Manufacturing mode */ + if (dt_find_property(dt_root, "ibm,manufacturing-mode")) { + prlog(PR_INFO, "Manufacturing mode ON\n"); + mfg_mode = true; + } + if (has_lpc) { opal_register(OPAL_LPC_WRITE, opal_lpc_write, 5); opal_register(OPAL_LPC_READ, opal_lpc_read, 5); diff --git a/include/errorlog.h b/include/errorlog.h index f89eac9..247198b 100644 --- a/include/errorlog.h +++ b/include/errorlog.h @@ -266,6 +266,7 @@ enum opal_reasoncode { OPAL_RC_LPC_READ = OPAL_SRC_COMPONENT_LPC | 0x10, OPAL_RC_LPC_WRITE = OPAL_SRC_COMPONENT_LPC | 0x11, OPAL_RC_LPC_SYNC = OPAL_SRC_COMPONENT_LPC | 0x12, + OPAL_RC_LPC_SYNC_PERF = OPAL_SRC_COMPONENT_LPC | 0x13, /* OP_PANEL */ OPAL_RC_PANEL_WRITE = OPAL_SRC_COMPONENT_OP_PANEL | 0x10, /* PSI */ diff --git a/include/platform.h b/include/platform.h index 062a941..a2c2fee 100644 --- a/include/platform.h +++ b/include/platform.h @@ -175,6 +175,8 @@ extern struct platform __platforms_end; extern struct platform platform; +extern bool mfg_mode; + #define DECLARE_PLATFORM(name)\ static const struct platform __used __section(".platforms") name ##_platform
High volume of SYNC errors onto LPC bus cause degraded system performance and are likely due to bad hardware present onto system. Thus once LPC SYNC errors cross a certain threshold, OPAL should log them onto BMC as unrecoverable errors in manufacturing mode. This will help manufacturing screen bad parts, causing such errors. Cc: stable Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com> --- core/platform.c | 8 +++++++- hw/lpc.c | 32 +++++++++++++++++++++++++++----- include/errorlog.h | 1 + include/platform.h | 2 ++ 4 files changed, 37 insertions(+), 6 deletions(-)