diff mbox series

powerpc/xive: Fix bogus error code returned by OPAL

Message ID 156812362556.1866243.7399893138425681517.stgit@bahia.tls.ibm.com (mailing list archive)
State Changes Requested
Headers show
Series powerpc/xive: Fix bogus error code returned by OPAL | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch next (c317052c95bef1f977b023158e5aa929215f443d)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 31 lines checked

Commit Message

Greg Kurz Sept. 10, 2019, 1:53 p.m. UTC
There's a bug in skiboot that causes the OPAL_XIVE_ALLOCATE_IRQ call
to return the 32-bit value 0xffffffff when OPAL has run out of IRQs.
Unfortunatelty, OPAL return values are signed 64-bit entities and
errors are supposed to be negative. If that happens, the linux code
confusingly treats 0xffffffff as a valid IRQ number and panics at some
point.

A fix was recently merged in skiboot:

e97391ae2bb5 ("xive: fix return value of opal_xive_allocate_irq()")

but we need a workaround anyway to support older skiboots already
on the field.

Internally convert 0xffffffff to OPAL_RESOURCE which is the usual error
returned upon resource exhaustion.

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 arch/powerpc/sysdev/xive/native.c |   13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

Comments

Cédric Le Goater Sept. 10, 2019, 1:59 p.m. UTC | #1
On 10/09/2019 15:53, Greg Kurz wrote:
> There's a bug in skiboot that causes the OPAL_XIVE_ALLOCATE_IRQ call
> to return the 32-bit value 0xffffffff when OPAL has run out of IRQs.
> Unfortunatelty, OPAL return values are signed 64-bit entities and
> errors are supposed to be negative. If that happens, the linux code
> confusingly treats 0xffffffff as a valid IRQ number and panics at some
> point.
> 
> A fix was recently merged in skiboot:
> 
> e97391ae2bb5 ("xive: fix return value of opal_xive_allocate_irq()")
> 
> but we need a workaround anyway to support older skiboots already
> on the field.
> 
> Internally convert 0xffffffff to OPAL_RESOURCE which is the usual error
> returned upon resource exhaustion.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>



Reviewed-by: Cédric Le Goater <clg@kaod.org>

Thanks,

C.

> ---
>  arch/powerpc/sysdev/xive/native.c |   13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
> index 37987c815913..c35583f84f9f 100644
> --- a/arch/powerpc/sysdev/xive/native.c
> +++ b/arch/powerpc/sysdev/xive/native.c
> @@ -231,6 +231,15 @@ static bool xive_native_match(struct device_node *node)
>  	return of_device_is_compatible(node, "ibm,opal-xive-vc");
>  }
>  
> +static int64_t opal_xive_allocate_irq_fixup(uint32_t chip_id)
> +{
> +	s64 irq = opal_xive_allocate_irq(chip_id);
> +
> +#define XIVE_ALLOC_NO_SPACE	0xffffffff /* No possible space */
> +	return
> +		irq == XIVE_ALLOC_NO_SPACE ? OPAL_RESOURCE : irq;
> +}
> +
>  #ifdef CONFIG_SMP
>  static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
>  {
> @@ -238,7 +247,7 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
>  
>  	/* Allocate an IPI and populate info about it */
>  	for (;;) {
> -		irq = opal_xive_allocate_irq(xc->chip_id);
> +		irq = opal_xive_allocate_irq_fixup(xc->chip_id);
>  		if (irq == OPAL_BUSY) {
>  			msleep(OPAL_BUSY_DELAY_MS);
>  			continue;
> @@ -259,7 +268,7 @@ u32 xive_native_alloc_irq(void)
>  	s64 rc;
>  
>  	for (;;) {
> -		rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP);
> +		rc = opal_xive_allocate_irq_fixup(OPAL_XIVE_ANY_CHIP);
>  		if (rc != OPAL_BUSY)
>  			break;
>  		msleep(OPAL_BUSY_DELAY_MS);
>
Michael Ellerman Sept. 11, 2019, 2:26 p.m. UTC | #2
Hi Greg,

Couple of comments ...

Greg Kurz <groug@kaod.org> writes:
> There's a bug in skiboot that causes the OPAL_XIVE_ALLOCATE_IRQ call
> to return the 32-bit value 0xffffffff when OPAL has run out of IRQs.
> Unfortunatelty, OPAL return values are signed 64-bit entities and
> errors are supposed to be negative. If that happens, the linux code
> confusingly treats 0xffffffff as a valid IRQ number and panics at some
> point.
>
> A fix was recently merged in skiboot:
>
> e97391ae2bb5 ("xive: fix return value of opal_xive_allocate_irq()")
>
> but we need a workaround anyway to support older skiboots already
> on the field.
  ^
  in
 
>
> Internally convert 0xffffffff to OPAL_RESOURCE which is the usual error
> returned upon resource exhaustion.

This should go to stable, any idea what versions it should go back to?
Probably whenever the xive code was introduced?

> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>  arch/powerpc/sysdev/xive/native.c |   13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
> index 37987c815913..c35583f84f9f 100644
> --- a/arch/powerpc/sysdev/xive/native.c
> +++ b/arch/powerpc/sysdev/xive/native.c
> @@ -231,6 +231,15 @@ static bool xive_native_match(struct device_node *node)
>  	return of_device_is_compatible(node, "ibm,opal-xive-vc");
>  }
>  
> +static int64_t opal_xive_allocate_irq_fixup(uint32_t chip_id)
          ^                                    ^
          Can you use s64 here and u32 here ....

Instead of calling this opal_xive_allocate_irq_fixup() and relying on
all callers to call the fixup, can we rename the opal wrapper and leave
this function's name unchanged, eg:

-OPAL_CALL(opal_xive_allocate_irq,              OPAL_XIVE_ALLOCATE_IRQ);
+OPAL_CALL(opal_xive_allocate_irq_raw,          OPAL_XIVE_ALLOCATE_IRQ);


> +{
> +	s64 irq = opal_xive_allocate_irq(chip_id);
> +
> +#define XIVE_ALLOC_NO_SPACE	0xffffffff /* No possible space */
> +	return
> +		irq == XIVE_ALLOC_NO_SPACE ? OPAL_RESOURCE : irq;
> +}

I don't really like the #define and the weird indenting and so on, can
we instead do it like:

	/*
         * Old versions of skiboot can incorrectly return 0xffffffff to
         * indicate no space, fix it up here.
         */
	return irq == 0xffffffff ? OPAL_RESOURCE : irq;

cheers
Greg Kurz Sept. 11, 2019, 2:40 p.m. UTC | #3
On Thu, 12 Sep 2019 00:26:19 +1000
Michael Ellerman <mpe@ellerman.id.au> wrote:

> Hi Greg,
> 

Bom dia ! :)

> Couple of comments ...
> 
> Greg Kurz <groug@kaod.org> writes:
> > There's a bug in skiboot that causes the OPAL_XIVE_ALLOCATE_IRQ call
> > to return the 32-bit value 0xffffffff when OPAL has run out of IRQs.
> > Unfortunatelty, OPAL return values are signed 64-bit entities and
> > errors are supposed to be negative. If that happens, the linux code
> > confusingly treats 0xffffffff as a valid IRQ number and panics at some
> > point.
> >
> > A fix was recently merged in skiboot:
> >
> > e97391ae2bb5 ("xive: fix return value of opal_xive_allocate_irq()")
> >
> > but we need a workaround anyway to support older skiboots already
> > on the field.
>   ^
>   in
>  
> >
> > Internally convert 0xffffffff to OPAL_RESOURCE which is the usual error
> > returned upon resource exhaustion.
> 
> This should go to stable, any idea what versions it should go back to?
> Probably whenever the xive code was introduced?
> 

Yes I guess so. This would mean v4.12. I'll add the appropriate stable
tag before re-posting, and address all the other remarks of course.

> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >  arch/powerpc/sysdev/xive/native.c |   13 +++++++++++--
> >  1 file changed, 11 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
> > index 37987c815913..c35583f84f9f 100644
> > --- a/arch/powerpc/sysdev/xive/native.c
> > +++ b/arch/powerpc/sysdev/xive/native.c
> > @@ -231,6 +231,15 @@ static bool xive_native_match(struct device_node *node)
> >  	return of_device_is_compatible(node, "ibm,opal-xive-vc");
> >  }
> >  
> > +static int64_t opal_xive_allocate_irq_fixup(uint32_t chip_id)
>           ^                                    ^
>           Can you use s64 here and u32 here ....
> 
> Instead of calling this opal_xive_allocate_irq_fixup() and relying on
> all callers to call the fixup, can we rename the opal wrapper and leave
> this function's name unchanged, eg:
> 
> -OPAL_CALL(opal_xive_allocate_irq,              OPAL_XIVE_ALLOCATE_IRQ);
> +OPAL_CALL(opal_xive_allocate_irq_raw,          OPAL_XIVE_ALLOCATE_IRQ);
> 
> 
> > +{
> > +	s64 irq = opal_xive_allocate_irq(chip_id);
> > +
> > +#define XIVE_ALLOC_NO_SPACE	0xffffffff /* No possible space */
> > +	return
> > +		irq == XIVE_ALLOC_NO_SPACE ? OPAL_RESOURCE : irq;
> > +}
> 
> I don't really like the #define and the weird indenting and so on, can
> we instead do it like:
> 
> 	/*
>          * Old versions of skiboot can incorrectly return 0xffffffff to
>          * indicate no space, fix it up here.
>          */
> 	return irq == 0xffffffff ? OPAL_RESOURCE : irq;
> 
> cheers
diff mbox series

Patch

diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 37987c815913..c35583f84f9f 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -231,6 +231,15 @@  static bool xive_native_match(struct device_node *node)
 	return of_device_is_compatible(node, "ibm,opal-xive-vc");
 }
 
+static int64_t opal_xive_allocate_irq_fixup(uint32_t chip_id)
+{
+	s64 irq = opal_xive_allocate_irq(chip_id);
+
+#define XIVE_ALLOC_NO_SPACE	0xffffffff /* No possible space */
+	return
+		irq == XIVE_ALLOC_NO_SPACE ? OPAL_RESOURCE : irq;
+}
+
 #ifdef CONFIG_SMP
 static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
 {
@@ -238,7 +247,7 @@  static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
 
 	/* Allocate an IPI and populate info about it */
 	for (;;) {
-		irq = opal_xive_allocate_irq(xc->chip_id);
+		irq = opal_xive_allocate_irq_fixup(xc->chip_id);
 		if (irq == OPAL_BUSY) {
 			msleep(OPAL_BUSY_DELAY_MS);
 			continue;
@@ -259,7 +268,7 @@  u32 xive_native_alloc_irq(void)
 	s64 rc;
 
 	for (;;) {
-		rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP);
+		rc = opal_xive_allocate_irq_fixup(OPAL_XIVE_ANY_CHIP);
 		if (rc != OPAL_BUSY)
 			break;
 		msleep(OPAL_BUSY_DELAY_MS);