diff mbox

[1/7] xscom: Return OPAL_WRONG_STATE on XSCOM ops if CPU is asleep

Message ID 1458027237-8926-2-git-send-email-ruscur@russell.cc
State Superseded
Headers show

Commit Message

Russell Currey March 15, 2016, 7:33 a.m. UTC
xscom_read and xscom_write return OPAL_SUCCESS if they worked, and
OPAL_HARDWARE if they didn't.  This doesn't provide information about why
the operation failed, such as if the CPU happens to be asleep.

This is specifically useful in error scanning, so if every CPU is being
scanned for errors, sleeping CPUs likely aren't the cause of failures.

So, return OPAL_WRONG_STATE in xscom_read and xscom_write if the CPU is
sleeping.

Signed-off-by: Russell Currey <ruscur@russell.cc>
---
This will potentially add some new functionality to the kernel.  I'm not
sure how that might be useful, but I don't think it can be harmful.
---
 doc/opal-api/opal-xscom-read-write-65-66.txt |  5 +++++
 hw/xscom.c                                   | 29 +++++++++++++++++-----------
 2 files changed, 23 insertions(+), 11 deletions(-)

Comments

Alistair Popple March 17, 2016, 4:58 a.m. UTC | #1
On Tue, 15 Mar 2016 18:33:51 Russell Currey wrote:
> xscom_read and xscom_write return OPAL_SUCCESS if they worked, and
> OPAL_HARDWARE if they didn't.  This doesn't provide information about why
> the operation failed, such as if the CPU happens to be asleep.
> 
> This is specifically useful in error scanning, so if every CPU is being
> scanned for errors, sleeping CPUs likely aren't the cause of failures.
> 
> So, return OPAL_WRONG_STATE in xscom_read and xscom_write if the CPU is
> sleeping.
> 
> Signed-off-by: Russell Currey <ruscur@russell.cc>
> ---
> This will potentially add some new functionality to the kernel.  I'm not
> sure how that might be useful, but I don't think it can be harmful.

I assume the kernel only checks for rc != 0 such that this minor OPAL API 
change doesn't actually result in any behaviour change for current and 
previous kernels?

Reviewed-by: Alistair Popple <alistair@popple.id.au>

> ---
>  doc/opal-api/opal-xscom-read-write-65-66.txt |  5 +++++
>  hw/xscom.c                                   | 29 
+++++++++++++++++-----------
>  2 files changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/doc/opal-api/opal-xscom-read-write-65-66.txt b/doc/opal-
api/opal-xscom-read-write-65-66.txt
> index 92916df..4ed0134 100644
> --- a/doc/opal-api/opal-xscom-read-write-65-66.txt
> +++ b/doc/opal-api/opal-xscom-read-write-65-66.txt
> @@ -10,3 +10,8 @@ each takes three parameters:
>  
>  int xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val)
>  int xscom_write(uint32_t partid, uint64_t pcb_addr, uint64_t val)
> +
> +Returns:
> +	OPAL_SUCCESS
> +	OPAL_HARDWARE if operation failed
> +	OPAL_WRONG_STATE if CPU is asleep
> diff --git a/hw/xscom.c b/hw/xscom.c
> index a7a1705..1c45cbc 100644
> --- a/hw/xscom.c
> +++ b/hw/xscom.c
> @@ -118,7 +118,7 @@ static void xscom_reset(uint32_t gcid)
>  	 */
>  }
>  
> -static bool xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t 
pcb_addr,
> +static int xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t 
pcb_addr,
>  			       bool is_write)
>  {
>  	unsigned int stat = GETFIELD(SPR_HMER_XSCOM_STATUS, hmer);
> @@ -129,7 +129,10 @@ static bool xscom_handle_error(uint64_t hmer, uint32_t 
gcid, uint32_t pcb_addr,
>  	switch(stat) {
>  	/* XSCOM blocked, just retry */
>  	case 1:
> -		return true;
> +		return OPAL_BUSY;
> +	/* CPU is asleep, don't retry */
> +	case 2:
> +		return OPAL_WRONG_STATE;
>  	}
>  
>  	/* XXX: Create error log entry ? */
> @@ -141,7 +144,7 @@ static bool xscom_handle_error(uint64_t hmer, uint32_t 
gcid, uint32_t pcb_addr,
>  	xscom_reset(gcid);
>  
>  	/* Non recovered ... just fail */
> -	return false;
> +	return OPAL_HARDWARE;
>  }
>  
>  static void xscom_handle_ind_error(uint64_t data, uint32_t gcid,
> @@ -175,6 +178,7 @@ static bool xscom_gcid_ok(uint32_t gcid)
>  static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
>  {
>  	uint64_t hmer;
> +	int64_t ret;
>  
>  	if (!xscom_gcid_ok(gcid)) {
>  		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
> @@ -197,16 +201,18 @@ static int __xscom_read(uint32_t gcid, uint32_t 
pcb_addr, uint64_t *val)
>  		if (!(hmer & SPR_HMER_XSCOM_FAIL))
>  			break;
>  
> -		/* Handle error and eventually retry */
> -		if (!xscom_handle_error(hmer, gcid, pcb_addr, false))
> -			return OPAL_HARDWARE;
> +		/* Handle error and possibly eventually retry */
> +		ret = xscom_handle_error(hmer, gcid, pcb_addr, false);
> +		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
> +			return ret;
>  	}
> -	return 0;
> +	return OPAL_SUCCESS;
>  }
>  
>  static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
>  {
>  	uint64_t hmer;
> +	int64_t ret;
>  
>  	if (!xscom_gcid_ok(gcid)) {
>  		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
> @@ -229,11 +235,12 @@ static int __xscom_write(uint32_t gcid, uint32_t 
pcb_addr, uint64_t val)
>  		if (!(hmer & SPR_HMER_XSCOM_FAIL))
>  			break;
>  
> -		/* Handle error and eventually retry */
> -		if (!xscom_handle_error(hmer, gcid, pcb_addr, true))
> -			return OPAL_HARDWARE;
> +		/* Handle error and possibly eventually retry */
> +		ret = xscom_handle_error(hmer, gcid, pcb_addr, true);
> +		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
> +			return ret;
>  	}
> -	return 0;
> +	return OPAL_SUCCESS;
>  }
>  
>  /*
>
Russell Currey March 17, 2016, 5:02 a.m. UTC | #2
On Thu, 2016-03-17 at 15:58 +1100, Alistair Popple wrote:
> On Tue, 15 Mar 2016 18:33:51 Russell Currey wrote:
> > 
> > xscom_read and xscom_write return OPAL_SUCCESS if they worked, and
> > OPAL_HARDWARE if they didn't.  This doesn't provide information about
> > why
> > the operation failed, such as if the CPU happens to be asleep.
> > 
> > This is specifically useful in error scanning, so if every CPU is being
> > scanned for errors, sleeping CPUs likely aren't the cause of failures.
> > 
> > So, return OPAL_WRONG_STATE in xscom_read and xscom_write if the CPU is
> > sleeping.
> > 
> > Signed-off-by: Russell Currey <ruscur@russell.cc>
> > ---
> > This will potentially add some new functionality to the kernel.  I'm
> > not
> > sure how that might be useful, but I don't think it can be harmful.
> I assume the kernel only checks for rc != 0 such that this minor OPAL
> API 
> change doesn't actually result in any behaviour change for current and 
> previous kernels?

Yeah, it ends up in opal_xscom_err_xlate, which treats non-zero as -EIO.
If something ever needed to know the CPU was asleep that could be changed
in future to return something different.

> 
> Reviewed-by: Alistair Popple <alistair@popple.id.au>
> 
> > 
> > ---
> >  doc/opal-api/opal-xscom-read-write-65-66.txt |  5 +++++
> >  hw/xscom.c                                   | 29 
> +++++++++++++++++-----------
> > 
> >  2 files changed, 23 insertions(+), 11 deletions(-)
> > 
> > diff --git a/doc/opal-api/opal-xscom-read-write-65-66.txt b/doc/opal-
> api/opal-xscom-read-write-65-66.txt
> > 
> > index 92916df..4ed0134 100644
> > --- a/doc/opal-api/opal-xscom-read-write-65-66.txt
> > +++ b/doc/opal-api/opal-xscom-read-write-65-66.txt
> > @@ -10,3 +10,8 @@ each takes three parameters:
> >  
> >  int xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val)
> >  int xscom_write(uint32_t partid, uint64_t pcb_addr, uint64_t val)
> > +
> > +Returns:
> > +	OPAL_SUCCESS
> > +	OPAL_HARDWARE if operation failed
> > +	OPAL_WRONG_STATE if CPU is asleep
> > diff --git a/hw/xscom.c b/hw/xscom.c
> > index a7a1705..1c45cbc 100644
> > --- a/hw/xscom.c
> > +++ b/hw/xscom.c
> > @@ -118,7 +118,7 @@ static void xscom_reset(uint32_t gcid)
> >  	 */
> >  }
> >  
> > -static bool xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t 
> pcb_addr,
> > 
> > +static int xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t 
> pcb_addr,
> > 
> >  			       bool is_write)
> >  {
> >  	unsigned int stat = GETFIELD(SPR_HMER_XSCOM_STATUS, hmer);
> > @@ -129,7 +129,10 @@ static bool xscom_handle_error(uint64_t hmer,
> > uint32_t 
> gcid, uint32_t pcb_addr,
> > 
> >  	switch(stat) {
> >  	/* XSCOM blocked, just retry */
> >  	case 1:
> > -		return true;
> > +		return OPAL_BUSY;
> > +	/* CPU is asleep, don't retry */
> > +	case 2:
> > +		return OPAL_WRONG_STATE;
> >  	}
> >  
> >  	/* XXX: Create error log entry ? */
> > @@ -141,7 +144,7 @@ static bool xscom_handle_error(uint64_t hmer,
> > uint32_t 
> gcid, uint32_t pcb_addr,
> > 
> >  	xscom_reset(gcid);
> >  
> >  	/* Non recovered ... just fail */
> > -	return false;
> > +	return OPAL_HARDWARE;
> >  }
> >  
> >  static void xscom_handle_ind_error(uint64_t data, uint32_t gcid,
> > @@ -175,6 +178,7 @@ static bool xscom_gcid_ok(uint32_t gcid)
> >  static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t
> > *val)
> >  {
> >  	uint64_t hmer;
> > +	int64_t ret;
> >  
> >  	if (!xscom_gcid_ok(gcid)) {
> >  		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__,
> > gcid);
> > @@ -197,16 +201,18 @@ static int __xscom_read(uint32_t gcid, uint32_t 
> pcb_addr, uint64_t *val)
> > 
> >  		if (!(hmer & SPR_HMER_XSCOM_FAIL))
> >  			break;
> >  
> > -		/* Handle error and eventually retry */
> > -		if (!xscom_handle_error(hmer, gcid, pcb_addr, false))
> > -			return OPAL_HARDWARE;
> > +		/* Handle error and possibly eventually retry */
> > +		ret = xscom_handle_error(hmer, gcid, pcb_addr, false);
> > +		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
> > +			return ret;
> >  	}
> > -	return 0;
> > +	return OPAL_SUCCESS;
> >  }
> >  
> >  static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t
> > val)
> >  {
> >  	uint64_t hmer;
> > +	int64_t ret;
> >  
> >  	if (!xscom_gcid_ok(gcid)) {
> >  		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__,
> > gcid);
> > @@ -229,11 +235,12 @@ static int __xscom_write(uint32_t gcid, uint32_t 
> pcb_addr, uint64_t val)
> > 
> >  		if (!(hmer & SPR_HMER_XSCOM_FAIL))
> >  			break;
> >  
> > -		/* Handle error and eventually retry */
> > -		if (!xscom_handle_error(hmer, gcid, pcb_addr, true))
> > -			return OPAL_HARDWARE;
> > +		/* Handle error and possibly eventually retry */
> > +		ret = xscom_handle_error(hmer, gcid, pcb_addr, true);
> > +		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
> > +			return ret;
> >  	}
> > -	return 0;
> > +	return OPAL_SUCCESS;
> >  }
> >  
> >  /*
> > 
>
diff mbox

Patch

diff --git a/doc/opal-api/opal-xscom-read-write-65-66.txt b/doc/opal-api/opal-xscom-read-write-65-66.txt
index 92916df..4ed0134 100644
--- a/doc/opal-api/opal-xscom-read-write-65-66.txt
+++ b/doc/opal-api/opal-xscom-read-write-65-66.txt
@@ -10,3 +10,8 @@  each takes three parameters:
 
 int xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val)
 int xscom_write(uint32_t partid, uint64_t pcb_addr, uint64_t val)
+
+Returns:
+	OPAL_SUCCESS
+	OPAL_HARDWARE if operation failed
+	OPAL_WRONG_STATE if CPU is asleep
diff --git a/hw/xscom.c b/hw/xscom.c
index a7a1705..1c45cbc 100644
--- a/hw/xscom.c
+++ b/hw/xscom.c
@@ -118,7 +118,7 @@  static void xscom_reset(uint32_t gcid)
 	 */
 }
 
-static bool xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
+static int xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
 			       bool is_write)
 {
 	unsigned int stat = GETFIELD(SPR_HMER_XSCOM_STATUS, hmer);
@@ -129,7 +129,10 @@  static bool xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
 	switch(stat) {
 	/* XSCOM blocked, just retry */
 	case 1:
-		return true;
+		return OPAL_BUSY;
+	/* CPU is asleep, don't retry */
+	case 2:
+		return OPAL_WRONG_STATE;
 	}
 
 	/* XXX: Create error log entry ? */
@@ -141,7 +144,7 @@  static bool xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
 	xscom_reset(gcid);
 
 	/* Non recovered ... just fail */
-	return false;
+	return OPAL_HARDWARE;
 }
 
 static void xscom_handle_ind_error(uint64_t data, uint32_t gcid,
@@ -175,6 +178,7 @@  static bool xscom_gcid_ok(uint32_t gcid)
 static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
 {
 	uint64_t hmer;
+	int64_t ret;
 
 	if (!xscom_gcid_ok(gcid)) {
 		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
@@ -197,16 +201,18 @@  static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
 		if (!(hmer & SPR_HMER_XSCOM_FAIL))
 			break;
 
-		/* Handle error and eventually retry */
-		if (!xscom_handle_error(hmer, gcid, pcb_addr, false))
-			return OPAL_HARDWARE;
+		/* Handle error and possibly eventually retry */
+		ret = xscom_handle_error(hmer, gcid, pcb_addr, false);
+		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
+			return ret;
 	}
-	return 0;
+	return OPAL_SUCCESS;
 }
 
 static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
 {
 	uint64_t hmer;
+	int64_t ret;
 
 	if (!xscom_gcid_ok(gcid)) {
 		prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
@@ -229,11 +235,12 @@  static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
 		if (!(hmer & SPR_HMER_XSCOM_FAIL))
 			break;
 
-		/* Handle error and eventually retry */
-		if (!xscom_handle_error(hmer, gcid, pcb_addr, true))
-			return OPAL_HARDWARE;
+		/* Handle error and possibly eventually retry */
+		ret = xscom_handle_error(hmer, gcid, pcb_addr, true);
+		if (ret == OPAL_HARDWARE || ret == OPAL_WRONG_STATE)
+			return ret;
 	}
-	return 0;
+	return OPAL_SUCCESS;
 }
 
 /*