diff mbox

lpc: Log LPC SYNC errors as unrecoverable ones for manufacturing

Message ID 1470348870-18714-1-git-send-email-vipin@linux.vnet.ibm.com
State Superseded
Headers show

Commit Message

Vipin K Parashar Aug. 4, 2016, 10:14 p.m. UTC
High volume of SYNC errors onto LPC bus cause degraded system
performance and are likely due to bad hardware present onto system.
Thus once LPC SYNC errors cross a certain threshold, OPAL should log
them onto BMC as unrecoverable errors in manufacturing mode. This
will help manufacturing screen bad parts, causing such errors.

Cc: stable
Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com>
---
 core/platform.c    |  8 +++++++-
 hw/lpc.c           | 32 +++++++++++++++++++++++++++-----
 include/errorlog.h |  1 +
 include/platform.h |  2 ++
 4 files changed, 37 insertions(+), 6 deletions(-)

Comments

Vipin K Parashar Aug. 4, 2016, 11 p.m. UTC | #1
Please refer v2 for this.

It removes a duplicate mfg mode detection placed under lpc_init.


On Friday 05 August 2016 03:44 AM, Vipin K Parashar wrote:
> High volume of SYNC errors onto LPC bus cause degraded system
> performance and are likely due to bad hardware present onto system.
> Thus once LPC SYNC errors cross a certain threshold, OPAL should log
> them onto BMC as unrecoverable errors in manufacturing mode. This
> will help manufacturing screen bad parts, causing such errors.
>
> Cc: stable
> Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com>
> ---
>   core/platform.c    |  8 +++++++-
>   hw/lpc.c           | 32 +++++++++++++++++++++++++++-----
>   include/errorlog.h |  1 +
>   include/platform.h |  2 ++
>   4 files changed, 37 insertions(+), 6 deletions(-)
>
> diff --git a/core/platform.c b/core/platform.c
> index de6e406..9730f8d 100644
> --- a/core/platform.c
> +++ b/core/platform.c
> @@ -24,6 +24,7 @@
>   #include <xscom.h>
>   #include <errorlog.h>
>
> +bool mfg_mode;
>   struct platform	platform;
>
>   DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
> @@ -124,8 +125,13 @@ void probe_platform(void)
>   	struct platform *platforms = &__platforms_start;
>   	unsigned int i;
>
> -	platform = generic_platform;
> +	/* Detect Manufacturing mode */
> +	if (dt_find_property(dt_root, "ibm,manufacturing-mode")) {
> +		printf("PLAT: Manufacturing mode ON\n");
> +		mfg_mode = true;
> +	}
>
> +	platform = generic_platform;
>   	for (i = 0; &platforms[i] < &__platforms_end; i++) {
>   		if (platforms[i].probe && platforms[i].probe()) {
>   			platform = platforms[i];
> diff --git a/hw/lpc.c b/hw/lpc.c
> index 32cb7b1..4b76b4d 100644
> --- a/hw/lpc.c
> +++ b/hw/lpc.c
> @@ -25,6 +25,7 @@
>   #include <timebase.h>
>   #include <errorlog.h>
>   #include <opal-api.h>
> +#include <platform.h>
>
>   //#define DBG_IRQ(fmt...) prerror(fmt)
>   #define DBG_IRQ(fmt...) do { } while(0)
> @@ -41,6 +42,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC, OPAL_PLATFORM_ERR_EVT, OPAL_LPC,
>   		 OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL,
>   		 OPAL_NA);
>
> +DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC_PERF, OPAL_PLATFORM_ERR_EVT, OPAL_LPC,
> +		 OPAL_MISC_SUBSYSTEM, OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF,
> +		 OPAL_NA);
> +
>   #define ECCB_CTL	0 /* b0020 -> b00200 */
>   #define ECCB_STAT	2 /* b0022 -> b00210 */
>   #define ECCB_DATA	3 /* b0023 -> b00218 */
> @@ -110,6 +115,9 @@ DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC, OPAL_PLATFORM_ERR_EVT, OPAL_LPC,
>   	LPC_HC_IRQ_BM_TAR_ERR)
>   #define LPC_HC_ERROR_ADDRESS	0x40
>
> +
> +#define	LPC_BUS_DEGRADED_PERF_THRESHOLD		5
> +
>   struct lpc_client_entry {
>   	struct list_node node;
>   	const struct lpc_client *clt;
> @@ -662,8 +670,10 @@ static void lpc_dispatch_reset(struct proc_chip *chip)
>   static void lpc_dispatch_err_irqs(struct proc_chip *chip, uint32_t irqs)
>   {
>   	int rc;
> +	struct opal_err_info *info;
>   	const char *sync_err = "Unknown LPC error";
>   	uint32_t err_addr;
> +	static int lpc_bus_err_count;
>
>   	/* Write back to clear error interrupts, we clear SerIRQ later
>   	 * as they are handled as level interrupts
> @@ -690,13 +700,19 @@ static void lpc_dispatch_err_irqs(struct proc_chip *chip, uint32_t irqs)
>
>   	rc = opb_read(chip, lpc_reg_opb_base + LPC_HC_ERROR_ADDRESS,
>   		      &err_addr, 4);
> +
> +	lpc_bus_err_count++;
> +	if (mfg_mode && (lpc_bus_err_count > LPC_BUS_DEGRADED_PERF_THRESHOLD))
> +		info = &e_info(OPAL_RC_LPC_SYNC_PERF);
> +	else
> +		info = &e_info(OPAL_RC_LPC_SYNC);
> +
>   	if (rc)
> -		log_simple_error(&e_info(OPAL_RC_LPC_SYNC), "%s "
> -			"Error address: Unknown\n", sync_err);
> +		log_simple_error(info, "%s Error address: Unknown\n",
> +					sync_err);
>   	else
> -		log_simple_error(&e_info(OPAL_RC_LPC_SYNC), "%s "
> -			"Error address: 0x%08x\n",
> -			sync_err, err_addr);
> +		log_simple_error(info, "%s Error address: 0x%08x\n",
> +					sync_err, err_addr);
>   }
>
>   static void lpc_dispatch_ser_irqs(struct proc_chip *chip, uint32_t irqs,
> @@ -869,6 +885,12 @@ void lpc_init(void)
>   		prlog(PR_NOTICE, "Default bus on chip %d\n",
>   					lpc_default_chip_id);
>
> +	/* Detect Manufacturing mode */
> +	if (dt_find_property(dt_root, "ibm,manufacturing-mode")) {
> +		prlog(PR_INFO, "Manufacturing mode ON\n");
> +		mfg_mode = true;
> +	}
> +
>   	if (has_lpc) {
>   		opal_register(OPAL_LPC_WRITE, opal_lpc_write, 5);
>   		opal_register(OPAL_LPC_READ, opal_lpc_read, 5);
> diff --git a/include/errorlog.h b/include/errorlog.h
> index f89eac9..247198b 100644
> --- a/include/errorlog.h
> +++ b/include/errorlog.h
> @@ -266,6 +266,7 @@ enum opal_reasoncode {
>   	OPAL_RC_LPC_READ	    = OPAL_SRC_COMPONENT_LPC | 0x10,
>   	OPAL_RC_LPC_WRITE	    = OPAL_SRC_COMPONENT_LPC | 0x11,
>   	OPAL_RC_LPC_SYNC	    = OPAL_SRC_COMPONENT_LPC | 0x12,
> +	OPAL_RC_LPC_SYNC_PERF	    = OPAL_SRC_COMPONENT_LPC | 0x13,
>   /* OP_PANEL */
>   	OPAL_RC_PANEL_WRITE	    = OPAL_SRC_COMPONENT_OP_PANEL | 0x10,
>   /* PSI */
> diff --git a/include/platform.h b/include/platform.h
> index 062a941..a2c2fee 100644
> --- a/include/platform.h
> +++ b/include/platform.h
> @@ -175,6 +175,8 @@ extern struct platform __platforms_end;
>
>   extern struct platform	platform;
>
> +extern bool mfg_mode;
> +
>   #define DECLARE_PLATFORM(name)\
>   static const struct platform __used __section(".platforms") name ##_platform
>
diff mbox

Patch

diff --git a/core/platform.c b/core/platform.c
index de6e406..9730f8d 100644
--- a/core/platform.c
+++ b/core/platform.c
@@ -24,6 +24,7 @@ 
 #include <xscom.h>
 #include <errorlog.h>
 
+bool mfg_mode;
 struct platform	platform;
 
 DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
@@ -124,8 +125,13 @@  void probe_platform(void)
 	struct platform *platforms = &__platforms_start;
 	unsigned int i;
 
-	platform = generic_platform;
+	/* Detect Manufacturing mode */
+	if (dt_find_property(dt_root, "ibm,manufacturing-mode")) {
+		printf("PLAT: Manufacturing mode ON\n");
+		mfg_mode = true;
+	}
 
+	platform = generic_platform;
 	for (i = 0; &platforms[i] < &__platforms_end; i++) {
 		if (platforms[i].probe && platforms[i].probe()) {
 			platform = platforms[i];
diff --git a/hw/lpc.c b/hw/lpc.c
index 32cb7b1..4b76b4d 100644
--- a/hw/lpc.c
+++ b/hw/lpc.c
@@ -25,6 +25,7 @@ 
 #include <timebase.h>
 #include <errorlog.h>
 #include <opal-api.h>
+#include <platform.h>
 
 //#define DBG_IRQ(fmt...) prerror(fmt)
 #define DBG_IRQ(fmt...) do { } while(0)
@@ -41,6 +42,10 @@  DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC, OPAL_PLATFORM_ERR_EVT, OPAL_LPC,
 		 OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL,
 		 OPAL_NA);
 
+DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC_PERF, OPAL_PLATFORM_ERR_EVT, OPAL_LPC,
+		 OPAL_MISC_SUBSYSTEM, OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF,
+		 OPAL_NA);
+
 #define ECCB_CTL	0 /* b0020 -> b00200 */
 #define ECCB_STAT	2 /* b0022 -> b00210 */
 #define ECCB_DATA	3 /* b0023 -> b00218 */
@@ -110,6 +115,9 @@  DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC, OPAL_PLATFORM_ERR_EVT, OPAL_LPC,
 	LPC_HC_IRQ_BM_TAR_ERR)
 #define LPC_HC_ERROR_ADDRESS	0x40
 
+
+#define	LPC_BUS_DEGRADED_PERF_THRESHOLD		5
+
 struct lpc_client_entry {
 	struct list_node node;
 	const struct lpc_client *clt;
@@ -662,8 +670,10 @@  static void lpc_dispatch_reset(struct proc_chip *chip)
 static void lpc_dispatch_err_irqs(struct proc_chip *chip, uint32_t irqs)
 {
 	int rc;
+	struct opal_err_info *info;
 	const char *sync_err = "Unknown LPC error";
 	uint32_t err_addr;
+	static int lpc_bus_err_count;
 
 	/* Write back to clear error interrupts, we clear SerIRQ later
 	 * as they are handled as level interrupts
@@ -690,13 +700,19 @@  static void lpc_dispatch_err_irqs(struct proc_chip *chip, uint32_t irqs)
 
 	rc = opb_read(chip, lpc_reg_opb_base + LPC_HC_ERROR_ADDRESS,
 		      &err_addr, 4);
+
+	lpc_bus_err_count++;
+	if (mfg_mode && (lpc_bus_err_count > LPC_BUS_DEGRADED_PERF_THRESHOLD))
+		info = &e_info(OPAL_RC_LPC_SYNC_PERF);
+	else
+		info = &e_info(OPAL_RC_LPC_SYNC);
+
 	if (rc)
-		log_simple_error(&e_info(OPAL_RC_LPC_SYNC), "%s "
-			"Error address: Unknown\n", sync_err);
+		log_simple_error(info, "%s Error address: Unknown\n",
+					sync_err);
 	else
-		log_simple_error(&e_info(OPAL_RC_LPC_SYNC), "%s "
-			"Error address: 0x%08x\n",
-			sync_err, err_addr);
+		log_simple_error(info, "%s Error address: 0x%08x\n",
+					sync_err, err_addr);
 }
 
 static void lpc_dispatch_ser_irqs(struct proc_chip *chip, uint32_t irqs,
@@ -869,6 +885,12 @@  void lpc_init(void)
 		prlog(PR_NOTICE, "Default bus on chip %d\n",
 					lpc_default_chip_id);
 
+	/* Detect Manufacturing mode */
+	if (dt_find_property(dt_root, "ibm,manufacturing-mode")) {
+		prlog(PR_INFO, "Manufacturing mode ON\n");
+		mfg_mode = true;
+	}
+
 	if (has_lpc) {
 		opal_register(OPAL_LPC_WRITE, opal_lpc_write, 5);
 		opal_register(OPAL_LPC_READ, opal_lpc_read, 5);
diff --git a/include/errorlog.h b/include/errorlog.h
index f89eac9..247198b 100644
--- a/include/errorlog.h
+++ b/include/errorlog.h
@@ -266,6 +266,7 @@  enum opal_reasoncode {
 	OPAL_RC_LPC_READ	    = OPAL_SRC_COMPONENT_LPC | 0x10,
 	OPAL_RC_LPC_WRITE	    = OPAL_SRC_COMPONENT_LPC | 0x11,
 	OPAL_RC_LPC_SYNC	    = OPAL_SRC_COMPONENT_LPC | 0x12,
+	OPAL_RC_LPC_SYNC_PERF	    = OPAL_SRC_COMPONENT_LPC | 0x13,
 /* OP_PANEL */
 	OPAL_RC_PANEL_WRITE	    = OPAL_SRC_COMPONENT_OP_PANEL | 0x10,
 /* PSI */
diff --git a/include/platform.h b/include/platform.h
index 062a941..a2c2fee 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -175,6 +175,8 @@  extern struct platform __platforms_end;
 
 extern struct platform	platform;
 
+extern bool mfg_mode;
+
 #define DECLARE_PLATFORM(name)\
 static const struct platform __used __section(".platforms") name ##_platform