diff mbox

[v3] hw/xscom: Reset XSCOM engine after finite number of retries when busy

Message ID 201605311139.u4VBdR4h016885@mx0a-001b2d01.pphosted.com
State Superseded
Headers show

Commit Message

Vipin K Parashar May 31, 2016, 11:39 a.m. UTC
OPAL retries XSCOM read/write operations forever till it succeeds.
This can cause XSCOM ops to hang forever when XSCOM engine remains
busy for some reason. Changed it to retry XSCOM operations only
XSCOM_BUSY_MAX_RETRIES number of times instead of retrying forever.
Also added logic to reset XSCOM engine after XSCOM_BUSY_RESET_THRESHOLD
number of retries to unblock it when it remains busy.

Cc: stable # 9c2d82394fd2 ("xscom: Return OPAL_WRONG_STATE on XSCOM ops..")
Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com>
Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
---
Changes in v3:
 - Added delay of 10ms after XSCOM engine reset when XSCOM is found busy.
 - Modified 'if' condition to check return value of xscom_handle_error().

Changes in v2:
 - Changed newly added macro names to slightly more intuitive names.
   Used XSCOM_BUSY_MAX_RETRIES to signify total retries allowed if
   XSCOM remains busy and XSCOM_BUSY_RESET_THRESHOLD to hold threshold
   count for resetting XSCOM before retrying XSCOM operation again.

 hw/xscom.c         | 76 ++++++++++++++++++++++++++++++++++++++++++------------
 include/errorlog.h |  1 +
 include/xscom.h    |  6 +++++
 3 files changed, 66 insertions(+), 17 deletions(-)

Comments

Mahesh J Salgaonkar June 3, 2016, 5:46 a.m. UTC | #1
On 2016-05-31 17:09:01 Tue, Vipin K Parashar wrote:
> OPAL retries XSCOM read/write operations forever till it succeeds.
> This can cause XSCOM ops to hang forever when XSCOM engine remains
> busy for some reason. Changed it to retry XSCOM operations only
> XSCOM_BUSY_MAX_RETRIES number of times instead of retrying forever.
> Also added logic to reset XSCOM engine after XSCOM_BUSY_RESET_THRESHOLD
> number of retries to unblock it when it remains busy.
> 
> Cc: stable # 9c2d82394fd2 ("xscom: Return OPAL_WRONG_STATE on XSCOM ops..")
> Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com>
> Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
> ---
> Changes in v3:
>  - Added delay of 10ms after XSCOM engine reset when XSCOM is found busy.
>  - Modified 'if' condition to check return value of xscom_handle_error().
> 
> Changes in v2:
>  - Changed newly added macro names to slightly more intuitive names.
>    Used XSCOM_BUSY_MAX_RETRIES to signify total retries allowed if
>    XSCOM remains busy and XSCOM_BUSY_RESET_THRESHOLD to hold threshold
>    count for resetting XSCOM before retrying XSCOM operation again.
> 
>  hw/xscom.c         | 76 ++++++++++++++++++++++++++++++++++++++++++------------
>  include/errorlog.h |  1 +
>  include/xscom.h    |  6 +++++
>  3 files changed, 66 insertions(+), 17 deletions(-)
> 
> diff --git a/hw/xscom.c b/hw/xscom.c
> index 84f72f5..2649a50 100644
> --- a/hw/xscom.c
> +++ b/hw/xscom.c
> @@ -23,6 +23,7 @@
>  #include <centaur.h>
>  #include <errorlog.h>
>  #include <opal-api.h>
> +#include <timebase.h>
>  
>  /* Mask of bits to clear in HMER before an access */
>  #define HMER_CLR_MASK	(~(SPR_HMER_XSCOM_FAIL | \
> @@ -41,6 +42,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_XSCOM_RESET, OPAL_PLATFORM_ERR_EVT, OPAL_XSCOM,
>  		OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_GENERAL,
>  		OPAL_NA);
>  
> +DEFINE_LOG_ENTRY(OPAL_RC_XSCOM_BUSY, OPAL_PLATFORM_ERR_EVT, OPAL_XSCOM,
> +		OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_GENERAL,
> +		OPAL_NA);
> +
>  /* xscom details to trigger xstop */
>  static struct {
>  	uint64_t addr;
> @@ -118,18 +123,46 @@ static void xscom_reset(uint32_t gcid)
>  	 */
>  }
>  
> -static int xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
> -			       bool is_write)
> +static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
> +			      bool is_write, int64_t retries)
>  {
> +	struct timespec ts;
>  	unsigned int stat = GETFIELD(SPR_HMER_XSCOM_STATUS, hmer);
>  
>  	/* XXX Figure out error codes from doc and error
>  	 * recovery procedures
>  	 */
>  	switch(stat) {
> -	/* XSCOM blocked, just retry */
> +	/*
> +	 * XSCOM engine is blocked, need to retry. Reset XSCOM engine
> +	 * after crossing retry threshold before retrying again.
> +	 */
>  	case 1:
> +		if (retries && !(retries  % XSCOM_BUSY_RESET_THRESHOLD)) {
> +			prlog(PR_NOTICE, "XSCOM: Busy even after %d retries, "
> +				"resetting XSCOM now. Total retries  = %lld\n",
> +				XSCOM_BUSY_RESET_THRESHOLD, retries);
> +			xscom_reset(gcid);
> +
> +			/*
> +			 * Its observed that sometimes immediate retry of
> +			 * XSCOM operation returns wrong data. Adding a
> +			 * delay for XSCOM reset to be effective. Delay of
> +			 * 10 ms is found to be working fine experimentally.
> +			 */

I understand that today we are not sure about how to verify whether xscom reset
has succeeded or not and hence this patch adds the delay. But I think we
should add FIXME comment here so that in future we can fix this
properly by talking to hardware folks to find out alternate way to
verify xscom reset completion status.

> +			ts.tv_sec = 0;
> +			ts.tv_nsec = 10 * 1000;
> +			nanosleep_nopoll(&ts, NULL);
> +		}
> +
> +		/* Log error if we have retried enough and its still busy */
> +		if (retries == XSCOM_BUSY_MAX_RETRIES)
> +			log_simple_error(&e_info(OPAL_RC_XSCOM_BUSY),
> +				"XSCOM: %s-busy error gcid=0x%x pcb_addr=0x%x "
> +				"stat=0x%x\n", is_write ? "write" : "read",
> +				gcid, pcb_addr, stat);
>  		return OPAL_BUSY;
> +
>  	/* CPU is asleep, don't retry */
>  	case 2:
>  		return OPAL_WRONG_STATE;
> @@ -177,15 +210,16 @@ static bool xscom_gcid_ok(uint32_t gcid)
>   */
>  static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
>  {
> +	int i;
>  	uint64_t hmer;
> -	int64_t ret;
> +	int64_t ret, retries = 0;
>  
>  	if (!xscom_gcid_ok(gcid)) {
>  		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
>  		return OPAL_PARAMETER;
>  	}
>  
> -	for (;;) {
> +	for (i = 0; i <= XSCOM_BUSY_MAX_RETRIES; i++) {
>  		/* Clear status bits in HMER (HMER is special
>  		 * writing to it *ands* bits
>  		 */
> @@ -199,27 +233,31 @@ static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
>  
>  		/* Check for error */
>  		if (!(hmer & SPR_HMER_XSCOM_FAIL))
> -			break;
> +			return OPAL_SUCCESS;
>  
>  		/* Handle error and possibly eventually retry */
> -		ret = xscom_handle_error(hmer, gcid, pcb_addr, false);
> -		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
> -			return ret;
> +		ret = xscom_handle_error(hmer, gcid, pcb_addr, false, retries);
> +		if (ret != OPAL_BUSY)
> +			break;
> +		retries++;

I think we can get rid off extra variable here 'retries'. Just pass
variable 'i' to xscom_handle_error() which anyway gets incremented as
part of for loop.

>  	}
> -	return OPAL_SUCCESS;
> +
> +	prerror("XSCOM: Read failed, ret =  %lld\n", ret);
> +	return ret;
>  }
>  
>  static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
>  {
> +	int i;
>  	uint64_t hmer;
> -	int64_t ret;
> +	int64_t ret, retries = 0;
>  
>  	if (!xscom_gcid_ok(gcid)) {
>  		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
>  		return OPAL_PARAMETER;
>  	}
>  
> -	for (;;) {
> +	for (i = 0; i <= XSCOM_BUSY_MAX_RETRIES; i++) {
>  		/* Clear status bits in HMER (HMER is special
>  		 * writing to it *ands* bits
>  		 */
> @@ -233,14 +271,18 @@ static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
>  
>  		/* Check for error */
>  		if (!(hmer & SPR_HMER_XSCOM_FAIL))
> -			break;
> +			return OPAL_SUCCESS;
>  
>  		/* Handle error and possibly eventually retry */
> -		ret = xscom_handle_error(hmer, gcid, pcb_addr, true);
> -		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
> -			return ret;
> +		ret = xscom_handle_error(hmer, gcid, pcb_addr, true, retries);
> +		if (ret == OPAL_BUSY)
> +			retries++;
> +		else
> +			break;

Same here..


Thanks,
-Mahesh.
Vaidyanathan Srinivasan June 3, 2016, 5:59 a.m. UTC | #2
* Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> [2016-06-03 11:16:30]:

> On 2016-05-31 17:09:01 Tue, Vipin K Parashar wrote:

[snip]

> I understand that today we are not sure about how to verify whether xscom reset
> has succeeded or not and hence this patch adds the delay. But I think we
> should add FIXME comment here so that in future we can fix this
> properly by talking to hardware folks to find out alternate way to
> verify xscom reset completion status.

Yes, we could add FIXME in comment until we can figure out the exact
cycle checks from hardware team. But this is a very very rare recovery
case/scenario and should not hold-up OPAL under normal operation.
 
> > +			ts.tv_sec = 0;
> > +			ts.tv_nsec = 10 * 1000;
> > +			nanosleep_nopoll(&ts, NULL);
> > +		}
> > +
> > +		/* Log error if we have retried enough and its still busy */
> > +		if (retries == XSCOM_BUSY_MAX_RETRIES)
> > +			log_simple_error(&e_info(OPAL_RC_XSCOM_BUSY),
> > +				"XSCOM: %s-busy error gcid=0x%x pcb_addr=0x%x "
> > +				"stat=0x%x\n", is_write ? "write" : "read",
> > +				gcid, pcb_addr, stat);
> >  		return OPAL_BUSY;
> > +
> >  	/* CPU is asleep, don't retry */
> >  	case 2:
> >  		return OPAL_WRONG_STATE;
> > @@ -177,15 +210,16 @@ static bool xscom_gcid_ok(uint32_t gcid)
> >   */
> >  static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
> >  {
> > +	int i;
> >  	uint64_t hmer;
> > -	int64_t ret;
> > +	int64_t ret, retries = 0;
> >  
> >  	if (!xscom_gcid_ok(gcid)) {
> >  		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
> >  		return OPAL_PARAMETER;
> >  	}
> >  
> > -	for (;;) {
> > +	for (i = 0; i <= XSCOM_BUSY_MAX_RETRIES; i++) {

        for (retries = 0; retries <= XSCOM_BUSY_MAX_RETRIES; retries++)

> >  		/* Clear status bits in HMER (HMER is special
> >  		 * writing to it *ands* bits
> >  		 */
> > @@ -199,27 +233,31 @@ static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
> >  
> >  		/* Check for error */
> >  		if (!(hmer & SPR_HMER_XSCOM_FAIL))
> > -			break;
> > +			return OPAL_SUCCESS;
> >  
> >  		/* Handle error and possibly eventually retry */
> > -		ret = xscom_handle_error(hmer, gcid, pcb_addr, false);
> > -		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
> > -			return ret;
> > +		ret = xscom_handle_error(hmer, gcid, pcb_addr, false, retries);
> > +		if (ret != OPAL_BUSY)
> > +			break;
> > +		retries++;
> 
> I think we can get rid off extra variable here 'retries'. Just pass
> variable 'i' to xscom_handle_error() which anyway gets incremented as
> part of for loop.

Good idea. How about keep the name retries, and remove i :)

--Vaidy
Vipin K Parashar June 6, 2016, 6:21 a.m. UTC | #3
Thanks!! Mahesh and Vaidy for review.
I will sent out new revision addressing review comments.

Regards,
Vipin

On Friday 03 June 2016 11:29 AM, Vaidyanathan Srinivasan wrote:
> * Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> [2016-06-03 11:16:30]:
>
>> On 2016-05-31 17:09:01 Tue, Vipin K Parashar wrote:
> [snip]
>
>> I understand that today we are not sure about how to verify whether xscom reset
>> has succeeded or not and hence this patch adds the delay. But I think we
>> should add FIXME comment here so that in future we can fix this
>> properly by talking to hardware folks to find out alternate way to
>> verify xscom reset completion status.
> Yes, we could add FIXME in comment until we can figure out the exact
> cycle checks from hardware team. But this is a very very rare recovery
> case/scenario and should not hold-up OPAL under normal operation.
>
>>> +			ts.tv_sec = 0;
>>> +			ts.tv_nsec = 10 * 1000;
>>> +			nanosleep_nopoll(&ts, NULL);
>>> +		}
>>> +
>>> +		/* Log error if we have retried enough and its still busy */
>>> +		if (retries == XSCOM_BUSY_MAX_RETRIES)
>>> +			log_simple_error(&e_info(OPAL_RC_XSCOM_BUSY),
>>> +				"XSCOM: %s-busy error gcid=0x%x pcb_addr=0x%x "
>>> +				"stat=0x%x\n", is_write ? "write" : "read",
>>> +				gcid, pcb_addr, stat);
>>>   		return OPAL_BUSY;
>>> +
>>>   	/* CPU is asleep, don't retry */
>>>   	case 2:
>>>   		return OPAL_WRONG_STATE;
>>> @@ -177,15 +210,16 @@ static bool xscom_gcid_ok(uint32_t gcid)
>>>    */
>>>   static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
>>>   {
>>> +	int i;
>>>   	uint64_t hmer;
>>> -	int64_t ret;
>>> +	int64_t ret, retries = 0;
>>>   
>>>   	if (!xscom_gcid_ok(gcid)) {
>>>   		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
>>>   		return OPAL_PARAMETER;
>>>   	}
>>>   
>>> -	for (;;) {
>>> +	for (i = 0; i <= XSCOM_BUSY_MAX_RETRIES; i++) {
>          for (retries = 0; retries <= XSCOM_BUSY_MAX_RETRIES; retries++)
>
>>>   		/* Clear status bits in HMER (HMER is special
>>>   		 * writing to it *ands* bits
>>>   		 */
>>> @@ -199,27 +233,31 @@ static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
>>>   
>>>   		/* Check for error */
>>>   		if (!(hmer & SPR_HMER_XSCOM_FAIL))
>>> -			break;
>>> +			return OPAL_SUCCESS;
>>>   
>>>   		/* Handle error and possibly eventually retry */
>>> -		ret = xscom_handle_error(hmer, gcid, pcb_addr, false);
>>> -		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
>>> -			return ret;
>>> +		ret = xscom_handle_error(hmer, gcid, pcb_addr, false, retries);
>>> +		if (ret != OPAL_BUSY)
>>> +			break;
>>> +		retries++;
>> I think we can get rid off extra variable here 'retries'. Just pass
>> variable 'i' to xscom_handle_error() which anyway gets incremented as
>> part of for loop.
> Good idea. How about keep the name retries, and remove i :)
>
> --Vaidy
diff mbox

Patch

diff --git a/hw/xscom.c b/hw/xscom.c
index 84f72f5..2649a50 100644
--- a/hw/xscom.c
+++ b/hw/xscom.c
@@ -23,6 +23,7 @@ 
 #include <centaur.h>
 #include <errorlog.h>
 #include <opal-api.h>
+#include <timebase.h>
 
 /* Mask of bits to clear in HMER before an access */
 #define HMER_CLR_MASK	(~(SPR_HMER_XSCOM_FAIL | \
@@ -41,6 +42,10 @@  DEFINE_LOG_ENTRY(OPAL_RC_XSCOM_RESET, OPAL_PLATFORM_ERR_EVT, OPAL_XSCOM,
 		OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_GENERAL,
 		OPAL_NA);
 
+DEFINE_LOG_ENTRY(OPAL_RC_XSCOM_BUSY, OPAL_PLATFORM_ERR_EVT, OPAL_XSCOM,
+		OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_GENERAL,
+		OPAL_NA);
+
 /* xscom details to trigger xstop */
 static struct {
 	uint64_t addr;
@@ -118,18 +123,46 @@  static void xscom_reset(uint32_t gcid)
 	 */
 }
 
-static int xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
-			       bool is_write)
+static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
+			      bool is_write, int64_t retries)
 {
+	struct timespec ts;
 	unsigned int stat = GETFIELD(SPR_HMER_XSCOM_STATUS, hmer);
 
 	/* XXX Figure out error codes from doc and error
 	 * recovery procedures
 	 */
 	switch(stat) {
-	/* XSCOM blocked, just retry */
+	/*
+	 * XSCOM engine is blocked, need to retry. Reset XSCOM engine
+	 * after crossing retry threshold before retrying again.
+	 */
 	case 1:
+		if (retries && !(retries  % XSCOM_BUSY_RESET_THRESHOLD)) {
+			prlog(PR_NOTICE, "XSCOM: Busy even after %d retries, "
+				"resetting XSCOM now. Total retries  = %lld\n",
+				XSCOM_BUSY_RESET_THRESHOLD, retries);
+			xscom_reset(gcid);
+
+			/*
+			 * Its observed that sometimes immediate retry of
+			 * XSCOM operation returns wrong data. Adding a
+			 * delay for XSCOM reset to be effective. Delay of
+			 * 10 ms is found to be working fine experimentally.
+			 */
+			ts.tv_sec = 0;
+			ts.tv_nsec = 10 * 1000;
+			nanosleep_nopoll(&ts, NULL);
+		}
+
+		/* Log error if we have retried enough and its still busy */
+		if (retries == XSCOM_BUSY_MAX_RETRIES)
+			log_simple_error(&e_info(OPAL_RC_XSCOM_BUSY),
+				"XSCOM: %s-busy error gcid=0x%x pcb_addr=0x%x "
+				"stat=0x%x\n", is_write ? "write" : "read",
+				gcid, pcb_addr, stat);
 		return OPAL_BUSY;
+
 	/* CPU is asleep, don't retry */
 	case 2:
 		return OPAL_WRONG_STATE;
@@ -177,15 +210,16 @@  static bool xscom_gcid_ok(uint32_t gcid)
  */
 static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
 {
+	int i;
 	uint64_t hmer;
-	int64_t ret;
+	int64_t ret, retries = 0;
 
 	if (!xscom_gcid_ok(gcid)) {
 		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
 		return OPAL_PARAMETER;
 	}
 
-	for (;;) {
+	for (i = 0; i <= XSCOM_BUSY_MAX_RETRIES; i++) {
 		/* Clear status bits in HMER (HMER is special
 		 * writing to it *ands* bits
 		 */
@@ -199,27 +233,31 @@  static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
 
 		/* Check for error */
 		if (!(hmer & SPR_HMER_XSCOM_FAIL))
-			break;
+			return OPAL_SUCCESS;
 
 		/* Handle error and possibly eventually retry */
-		ret = xscom_handle_error(hmer, gcid, pcb_addr, false);
-		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
-			return ret;
+		ret = xscom_handle_error(hmer, gcid, pcb_addr, false, retries);
+		if (ret != OPAL_BUSY)
+			break;
+		retries++;
 	}
-	return OPAL_SUCCESS;
+
+	prerror("XSCOM: Read failed, ret =  %lld\n", ret);
+	return ret;
 }
 
 static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
 {
+	int i;
 	uint64_t hmer;
-	int64_t ret;
+	int64_t ret, retries = 0;
 
 	if (!xscom_gcid_ok(gcid)) {
 		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
 		return OPAL_PARAMETER;
 	}
 
-	for (;;) {
+	for (i = 0; i <= XSCOM_BUSY_MAX_RETRIES; i++) {
 		/* Clear status bits in HMER (HMER is special
 		 * writing to it *ands* bits
 		 */
@@ -233,14 +271,18 @@  static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
 
 		/* Check for error */
 		if (!(hmer & SPR_HMER_XSCOM_FAIL))
-			break;
+			return OPAL_SUCCESS;
 
 		/* Handle error and possibly eventually retry */
-		ret = xscom_handle_error(hmer, gcid, pcb_addr, true);
-		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
-			return ret;
+		ret = xscom_handle_error(hmer, gcid, pcb_addr, true, retries);
+		if (ret == OPAL_BUSY)
+			retries++;
+		else
+			break;
 	}
-	return OPAL_SUCCESS;
+
+	prerror("XSCOM: Write failed, ret =  %lld\n", ret);
+	return ret;
 }
 
 /*
diff --git a/include/errorlog.h b/include/errorlog.h
index ed90dab..214aed2 100644
--- a/include/errorlog.h
+++ b/include/errorlog.h
@@ -275,6 +275,7 @@  enum opal_reasoncode {
 	OPAL_RC_XSCOM_RW		= OPAL_XS | 0x10,
 	OPAL_RC_XSCOM_INDIRECT_RW	= OPAL_XS | 0x11,
 	OPAL_RC_XSCOM_RESET		= OPAL_XS | 0x12,
+	OPAL_RC_XSCOM_BUSY		= OPAL_XS | 0x13,
 /* PCI */
 	OPAL_RC_PCI_INIT_SLOT   = OPAL_PC | 0x10,
 	OPAL_RC_PCI_ADD_SLOT    = OPAL_PC | 0x11,
diff --git a/include/xscom.h b/include/xscom.h
index 933af6a..1aee40e 100644
--- a/include/xscom.h
+++ b/include/xscom.h
@@ -167,6 +167,12 @@ 
 /* HB folks say: try 10 time for now */
 #define XSCOM_IND_MAX_RETRIES		10
 
+/* Max number of retries when XSCOM remains busy */
+#define XSCOM_BUSY_MAX_RETRIES		3000
+
+/* Retry count after which to reset XSCOM, if still busy */
+#define XSCOM_BUSY_RESET_THRESHOLD	1000
+
 /*
  * Error handling:
  *