Message ID | 20240520220104.3602-2-asmaa@nvidia.com |
---|---|
State | New |
Headers | show |
Series | UBUNTU: SAUCE: gpio-mlxbf3: During reboot test, ipmb driver fails to load intermittently | expand |
Hi @Tim Gardner<mailto:tim.gardner@canonical.com> @Bartlomiej Zolnierkiewicz<mailto:bartlomiej.zolnierkiewicz@canonical.com>, Could you please review this patch? Thanks. Asmaa > -----Original Message----- > From: Asmaa Mnebhi <asmaa@nvidia.com> > Sent: Monday, May 20, 2024 6:01 PM > To: kernel-team@lists.ubuntu.com > Cc: Asmaa Mnebhi <asmaa@nvidia.com>; David Thompson > <davthompson@nvidia.com> > Subject: [SRU][J:linux-bluefield][PATCH v1 1/1] UBUNTU: SAUCE: gpio-mlxbf3: > During reboot test, ipmb driver fails to load intermittently > > BugLink: https://bugs.launchpad.net/bugs/2066198 > > The ipmb driver failing to load is just the result of i2c-mlxbf not receiving > interrupts. > In fact, any driver dependent on the i2c-mlxbf driver will not work. > > How to reproduce this issue? > > - modprobe gpio-mlxbf3 > - modprobe pwr-mlxbf > - modprobe mlxbf-gige -> this calls into the gpio driver which enables the PHY > interrupt (gpio10) > - reboot linux > -> graceful reboot does not remove modules so it doesn't disable the PHY > interrupt via > mlxbf3_gpio_irq_disable. Hence, the interrupt remains enabled. > - In anolis, we don't enforce the dependency between gpio-mlxbf3 and mlxbf- > gige. > So the next time linux boots and loads the driver in this order, we encounter > the issue: > - modprobe mlxbf-gige. The gige driver uses polling in the case where it loads > before the gpio > driver. Note that the interrupt at GPIO10 is still enabled at this point so if the > interrupt > triggers, there is nothing to clear it. > - modprobe gpio-mlxbf3 > - modprobe i2c-mlxbf. The interrupt wouldn't work here because it is shared > with the gpio > interrupts which was not cleared. > > The solution is to add a shutdown function to the gpio driver to clear and > disable all interrupts. > Also make sure to clear the interrupt after disabling it in the disable irq function. > > Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com<mailto:asmaa@nvidia.com>> > Reviewed-by: David Thompson <davthompson@nvidia.com<mailto:davthompson@nvidia.com>> > --- > drivers/gpio/gpio-mlxbf3.c | 14 ++++++++++++++ > 1 file changed, 14 insertions(+) > > diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index > 9d1bd3f6e23f..1646325f1283 100644 > --- a/drivers/gpio/gpio-mlxbf3.c > +++ b/drivers/gpio/gpio-mlxbf3.c > @@ -39,6 +39,8 @@ > #define MLXBF_GPIO_CAUSE_OR_EVTEN0 0x14 > #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE 0x18 > > +#define MLXBF_GPIO_CLR_ALL_INTS 0xFFFFFFFF > + > struct mlxbf3_gpio_context { > struct gpio_chip gc; > > @@ -85,6 +87,8 @@ static void mlxbf3_gpio_irq_disable(struct irq_data *irqd) > val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); > val &= ~BIT(offset); > writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); > + > + writel(BIT(offset), gs->gpio_cause_io + > MLXBF_GPIO_CAUSE_OR_CLRCAUSE); > raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags); > > gpiochip_disable_irq(gc, offset); > @@ -267,6 +271,15 @@ static int mlxbf3_gpio_probe(struct platform_device > *pdev) > return 0; > } > > +static void mlxbf3_gpio_shutdown(struct platform_device *pdev) { > + struct mlxbf3_gpio_context *gs = dev_get_drvdata(&pdev->dev); > + > + /* Disable and clear all interrupts */ > + writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); > + writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io + > +MLXBF_GPIO_CAUSE_OR_CLRCAUSE); } > + > static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = { > { "MLNXBF33", 0 }, > {} > @@ -279,6 +292,7 @@ static struct platform_driver mlxbf3_gpio_driver = { > .acpi_match_table = mlxbf3_gpio_acpi_match, > }, > .probe = mlxbf3_gpio_probe, > + .shutdown = mlxbf3_gpio_shutdown, > }; > module_platform_driver(mlxbf3_gpio_driver); > > -- > 2.30.1
diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index 9d1bd3f6e23f..1646325f1283 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -39,6 +39,8 @@ #define MLXBF_GPIO_CAUSE_OR_EVTEN0 0x14 #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE 0x18 +#define MLXBF_GPIO_CLR_ALL_INTS 0xFFFFFFFF + struct mlxbf3_gpio_context { struct gpio_chip gc; @@ -85,6 +87,8 @@ static void mlxbf3_gpio_irq_disable(struct irq_data *irqd) val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); val &= ~BIT(offset); writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + + writel(BIT(offset), gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags); gpiochip_disable_irq(gc, offset); @@ -267,6 +271,15 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) return 0; } +static void mlxbf3_gpio_shutdown(struct platform_device *pdev) +{ + struct mlxbf3_gpio_context *gs = dev_get_drvdata(&pdev->dev); + + /* Disable and clear all interrupts */ + writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); +} + static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = { { "MLNXBF33", 0 }, {} @@ -279,6 +292,7 @@ static struct platform_driver mlxbf3_gpio_driver = { .acpi_match_table = mlxbf3_gpio_acpi_match, }, .probe = mlxbf3_gpio_probe, + .shutdown = mlxbf3_gpio_shutdown, }; module_platform_driver(mlxbf3_gpio_driver);