diff mbox series

[SRU,F:linux-bluefield,v1,1/1] UBUNTU: SAUCE: mlxbf-gige: autonegotiation fails to complete on BF2

Message ID 20240418211046.30873-2-asmaa@nvidia.com
State New
Headers show
Series UBUNTU: SAUCE: mlxbf-gige: autonegotiation fails to complete on BF2 | expand

Commit Message

Asmaa Mnebhi April 18, 2024, 9:10 p.m. UTC
BugLink: https://bugs.launchpad.net/bugs/2062384

During their reboot test, QA found an intermittent issue where the OOB link is down.
The link is down because the KSZ9031 PHY fails to complete autonegotiation.
Even under "normal" circumstances where autonegotiation completes,
it takes an abnormal time to do so (on average, at least 8 seconds).

Hence, the hardware team and Microchip are involved in this debug but the root cause is still unknown.
In the meantime, we need to provide a software workaround since customers are starting to see this issue as well.

Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
Reviewed-by: David Thompson <davthompson@nvidia.com>
---
 .../mellanox/mlxbf_gige/mlxbf_gige_main.c         | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

Comments

Asmaa Mnebhi April 29, 2024, 7:31 p.m. UTC | #1
@Tim Gardner<mailto:tim.gardner@canonical.com> @Bartlomiej Zolnierkiewicz<mailto:bartlomiej.zolnierkiewicz@canonical.com>, could you also review this for Focal please?



Thanks.

Asmaa



++@Vladimir Sokolovsky<mailto:vlad@nvidia.com>



> -----Original Message-----

> From: Asmaa Mnebhi <asmaa@nvidia.com>

> Sent: Thursday, April 18, 2024 5:11 PM

> To: kernel-team@lists.ubuntu.com

> Cc: Asmaa Mnebhi <asmaa@nvidia.com>; David Thompson

> <davthompson@nvidia.com>

> Subject: [SRU][F:linux-bluefield][PATCH v1 1/1] UBUNTU: SAUCE: mlxbf-gige:

> autonegotiation fails to complete on BF2

>

> BugLink: https://bugs.launchpad.net/bugs/2062384

>

> During their reboot test, QA found an intermittent issue where the OOB link is

> down.

> The link is down because the KSZ9031 PHY fails to complete autonegotiation.

> Even under "normal" circumstances where autonegotiation completes, it takes

> an abnormal time to do so (on average, at least 8 seconds).

>

> Hence, the hardware team and Microchip are involved in this debug but the root

> cause is still unknown.

> In the meantime, we need to provide a software workaround since customers are

> starting to see this issue as well.

>

> Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com<mailto:asmaa@nvidia.com>>

> Reviewed-by: David Thompson <davthompson@nvidia.com<mailto:davthompson@nvidia.com>>

> ---

>  .../mellanox/mlxbf_gige/mlxbf_gige_main.c         | 15 +++++++++++++++

>  1 file changed, 15 insertions(+)

>

> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c

> b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c

> index d322d9fb06ba..635009a209d5 100644

> --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c

> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c

> @@ -138,6 +138,7 @@ static int mlxbf_gige_open(struct net_device *netdev)  {

>            struct mlxbf_gige *priv = netdev_priv(netdev);

>            struct phy_device *phydev = netdev->phydev;

> +         u8 timeout = 10;

>            u64 control;

>            u64 int_en;

>            int err;

> @@ -163,6 +164,20 @@ static int mlxbf_gige_open(struct net_device *netdev)

>

>            phy_start(phydev);

>

> +         /* On BlueField-2 systems, the KSZ9031 PHY hardware could fail

> +         * to complete autonegotiation and so the link remains down.

> +         * The software workaround is to restart autonegotiation.

> +         */

> +         while (timeout) {

> +                       if (phy_aneg_done(phydev))

> +                                      break;

> +                       msleep(1000);

> +                       timeout--;

> +         };

> +

> +         if (timeout == 0)

> +                       phy_restart_aneg(phydev);

> +

>            err = mlxbf_gige_tx_init(priv);

>            if (err)

>                           goto free_irqs;

> --

> 2.30.1
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index d322d9fb06ba..635009a209d5 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -138,6 +138,7 @@  static int mlxbf_gige_open(struct net_device *netdev)
 {
 	struct mlxbf_gige *priv = netdev_priv(netdev);
 	struct phy_device *phydev = netdev->phydev;
+	u8 timeout = 10;
 	u64 control;
 	u64 int_en;
 	int err;
@@ -163,6 +164,20 @@  static int mlxbf_gige_open(struct net_device *netdev)
 
 	phy_start(phydev);
 
+	/* On BlueField-2 systems, the KSZ9031 PHY hardware could fail
+	 * to complete autonegotiation and so the link remains down.
+	 * The software workaround is to restart autonegotiation.
+	 */
+	while (timeout) {
+		if (phy_aneg_done(phydev))
+			break;
+		msleep(1000);
+		timeout--;
+	};
+
+	if (timeout == 0)
+		phy_restart_aneg(phydev);
+
 	err = mlxbf_gige_tx_init(priv);
 	if (err)
 		goto free_irqs;