diff mbox series

[SRU,J:linux-bluefield,v1,1/1] UBUNTU: SAUCE: gpio-mlxbf3: During reboot test, ipmb driver fails to load intermittently

Message ID 20240520220104.3602-2-asmaa@nvidia.com
State New
Headers show
Series UBUNTU: SAUCE: gpio-mlxbf3: During reboot test, ipmb driver fails to load intermittently | expand

Commit Message

Asmaa Mnebhi May 20, 2024, 10:01 p.m. UTC
BugLink: https://bugs.launchpad.net/bugs/2066198

The ipmb driver failing to load is just the result of i2c-mlxbf
not receiving interrupts.
In fact, any driver dependent on the i2c-mlxbf driver will not work.

How to reproduce this issue?

- modprobe gpio-mlxbf3
- modprobe pwr-mlxbf
- modprobe mlxbf-gige -> this calls into the gpio driver which enables the PHY interrupt (gpio10)
- reboot linux
  -> graceful reboot does not remove modules so it doesn't disable the PHY interrupt via
     mlxbf3_gpio_irq_disable. Hence, the interrupt remains enabled.
- In anolis, we don't enforce the dependency between gpio-mlxbf3 and mlxbf-gige.
  So the next time linux boots and loads the driver in this order, we encounter the issue:
- modprobe mlxbf-gige. The gige driver uses polling in the case where it loads before the gpio
  driver. Note that the interrupt at GPIO10 is still enabled at this point so if the interrupt
  triggers, there is nothing to clear it.
- modprobe gpio-mlxbf3
- modprobe i2c-mlxbf. The interrupt wouldn't work here because it is shared with the gpio
  interrupts which was not cleared.

The solution is to add a shutdown function to the gpio driver to clear and disable
all interrupts.
Also make sure to clear the interrupt after disabling it in the disable irq function.

Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
Reviewed-by: David Thompson <davthompson@nvidia.com>
---
 drivers/gpio/gpio-mlxbf3.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

Comments

Asmaa Mnebhi May 23, 2024, 1:58 p.m. UTC | #1
Hi @Tim Gardner<mailto:tim.gardner@canonical.com> @Bartlomiej Zolnierkiewicz<mailto:bartlomiej.zolnierkiewicz@canonical.com>,



Could you please review this patch?



Thanks.
Asmaa



> -----Original Message-----

> From: Asmaa Mnebhi <asmaa@nvidia.com>

> Sent: Monday, May 20, 2024 6:01 PM

> To: kernel-team@lists.ubuntu.com

> Cc: Asmaa Mnebhi <asmaa@nvidia.com>; David Thompson

> <davthompson@nvidia.com>

> Subject: [SRU][J:linux-bluefield][PATCH v1 1/1] UBUNTU: SAUCE: gpio-mlxbf3:

> During reboot test, ipmb driver fails to load intermittently

>

> BugLink: https://bugs.launchpad.net/bugs/2066198

>

> The ipmb driver failing to load is just the result of i2c-mlxbf not receiving

> interrupts.

> In fact, any driver dependent on the i2c-mlxbf driver will not work.

>

> How to reproduce this issue?

>

> - modprobe gpio-mlxbf3

> - modprobe pwr-mlxbf

> - modprobe mlxbf-gige -> this calls into the gpio driver which enables the PHY

> interrupt (gpio10)

> - reboot linux

>   -> graceful reboot does not remove modules so it doesn't disable the PHY

> interrupt via

>      mlxbf3_gpio_irq_disable. Hence, the interrupt remains enabled.

> - In anolis, we don't enforce the dependency between gpio-mlxbf3 and mlxbf-

> gige.

>   So the next time linux boots and loads the driver in this order, we encounter

> the issue:

> - modprobe mlxbf-gige. The gige driver uses polling in the case where it loads

> before the gpio

>   driver. Note that the interrupt at GPIO10 is still enabled at this point so if the

> interrupt

>   triggers, there is nothing to clear it.

> - modprobe gpio-mlxbf3

> - modprobe i2c-mlxbf. The interrupt wouldn't work here because it is shared

> with the gpio

>   interrupts which was not cleared.

>

> The solution is to add a shutdown function to the gpio driver to clear and

> disable all interrupts.

> Also make sure to clear the interrupt after disabling it in the disable irq function.

>

> Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com<mailto:asmaa@nvidia.com>>

> Reviewed-by: David Thompson <davthompson@nvidia.com<mailto:davthompson@nvidia.com>>

> ---

>  drivers/gpio/gpio-mlxbf3.c | 14 ++++++++++++++

>  1 file changed, 14 insertions(+)

>

> diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index

> 9d1bd3f6e23f..1646325f1283 100644

> --- a/drivers/gpio/gpio-mlxbf3.c

> +++ b/drivers/gpio/gpio-mlxbf3.c

> @@ -39,6 +39,8 @@

>  #define MLXBF_GPIO_CAUSE_OR_EVTEN0        0x14

>  #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE      0x18

>

> +#define MLXBF_GPIO_CLR_ALL_INTS           0xFFFFFFFF

> +

>  struct mlxbf3_gpio_context {

>            struct gpio_chip gc;

>

> @@ -85,6 +87,8 @@ static void mlxbf3_gpio_irq_disable(struct irq_data *irqd)

>            val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0);

>            val &= ~BIT(offset);

>            writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0);

> +

> +         writel(BIT(offset), gs->gpio_cause_io +

> MLXBF_GPIO_CAUSE_OR_CLRCAUSE);

>            raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);

>

>            gpiochip_disable_irq(gc, offset);

> @@ -267,6 +271,15 @@ static int mlxbf3_gpio_probe(struct platform_device

> *pdev)

>            return 0;

>  }

>

> +static void mlxbf3_gpio_shutdown(struct platform_device *pdev) {

> +         struct mlxbf3_gpio_context *gs = dev_get_drvdata(&pdev->dev);

> +

> +         /* Disable and clear all interrupts */

> +         writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0);

> +         writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io +

> +MLXBF_GPIO_CAUSE_OR_CLRCAUSE); }

> +

>  static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = {

>            { "MLNXBF33", 0 },

>            {}

> @@ -279,6 +292,7 @@ static struct platform_driver mlxbf3_gpio_driver = {

>                           .acpi_match_table = mlxbf3_gpio_acpi_match,

>            },

>            .probe    = mlxbf3_gpio_probe,

> +         .shutdown = mlxbf3_gpio_shutdown,

>  };

>  module_platform_driver(mlxbf3_gpio_driver);

>

> --

> 2.30.1
diff mbox series

Patch

diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c
index 9d1bd3f6e23f..1646325f1283 100644
--- a/drivers/gpio/gpio-mlxbf3.c
+++ b/drivers/gpio/gpio-mlxbf3.c
@@ -39,6 +39,8 @@ 
 #define MLXBF_GPIO_CAUSE_OR_EVTEN0        0x14
 #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE      0x18
 
+#define MLXBF_GPIO_CLR_ALL_INTS           0xFFFFFFFF
+
 struct mlxbf3_gpio_context {
 	struct gpio_chip gc;
 
@@ -85,6 +87,8 @@  static void mlxbf3_gpio_irq_disable(struct irq_data *irqd)
 	val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0);
 	val &= ~BIT(offset);
 	writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0);
+
+	writel(BIT(offset), gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE);
 	raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);
 
 	gpiochip_disable_irq(gc, offset);
@@ -267,6 +271,15 @@  static int mlxbf3_gpio_probe(struct platform_device *pdev)
 	return 0;
 }
 
+static void mlxbf3_gpio_shutdown(struct platform_device *pdev)
+{
+	struct mlxbf3_gpio_context *gs = dev_get_drvdata(&pdev->dev);
+
+	/* Disable and clear all interrupts */
+	writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0);
+	writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE);
+}
+
 static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = {
 	{ "MLNXBF33", 0 },
 	{}
@@ -279,6 +292,7 @@  static struct platform_driver mlxbf3_gpio_driver = {
 		.acpi_match_table = mlxbf3_gpio_acpi_match,
 	},
 	.probe    = mlxbf3_gpio_probe,
+	.shutdown = mlxbf3_gpio_shutdown,
 };
 module_platform_driver(mlxbf3_gpio_driver);