diff mbox series

[iwl-net,v1,1/1] e1000e: avoid failing the system during pm_suspend

Message ID 20240806132348.880744-1-vitaly.lifshits@intel.com
State Accepted
Delegated to: Anthony Nguyen
Headers show
Series [iwl-net,v1,1/1] e1000e: avoid failing the system during pm_suspend | expand

Commit Message

Vitaly Lifshits Aug. 6, 2024, 1:23 p.m. UTC
Occasionally when the system goes into pm_suspend, the suspend might fail
due to a PHY access error on the network adapter. Previously, this would
have caused the whole system to fail to go to a low power state.
An example of this was reported in the following Bugzilla:
https://bugzilla.kernel.org/show_bug.cgi?id=205015

[ 1663.694828] e1000e 0000:00:19.0 eth0: Failed to disable ULP
[ 1664.731040] asix 2-3:1.0 eth1: link up, 100Mbps, full-duplex, lpa 0xC1E1
[ 1665.093513] e1000e 0000:00:19.0 eth0: Hardware Error
[ 1665.596760] e1000e 0000:00:19.0: pci_pm_resume+0x0/0x80 returned 0 after 2975399 usecs
and then the system never recovers from it, and all the following suspend failed due to this
[22909.393854] PM: pci_pm_suspend(): e1000e_pm_suspend+0x0/0x760 [e1000e] returns -2
[22909.393858] PM: dpm_run_callback(): pci_pm_suspend+0x0/0x160 returns -2
[22909.393861] PM: Device 0000:00:1f.6 failed to suspend async: error -2

This can be avoided by changing the return values of __e1000_shutdown and
e1000e_pm_suspend functions so that they always return 0 (success). This
is consistent with what other drivers do.
If the e1000e deiver encounters a hardware error during suspend, potential
side effects include slightly higher power draw or non-working wake on
LAN. This is preferred to a system-level suspend failure, and a warning
message is written to the system log, so that the user can be aware that
the LAN controller experienced a problem during suspend.

Suggested-by: Dima Ruinskiy <dima.ruinskiy@intel.com>
Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=205015
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

Comments

Tony Nguyen Aug. 8, 2024, 8:24 p.m. UTC | #1
On 8/6/2024 6:23 AM, Vitaly Lifshits wrote:
> Occasionally when the system goes into pm_suspend, the suspend might fail
> due to a PHY access error on the network adapter. Previously, this would
> have caused the whole system to fail to go to a low power state.
> An example of this was reported in the following Bugzilla:
> https://bugzilla.kernel.org/show_bug.cgi?id=205015
> 
> [ 1663.694828] e1000e 0000:00:19.0 eth0: Failed to disable ULP
> [ 1664.731040] asix 2-3:1.0 eth1: link up, 100Mbps, full-duplex, lpa 0xC1E1
> [ 1665.093513] e1000e 0000:00:19.0 eth0: Hardware Error
> [ 1665.596760] e1000e 0000:00:19.0: pci_pm_resume+0x0/0x80 returned 0 after 2975399 usecs
> and then the system never recovers from it, and all the following suspend failed due to this
> [22909.393854] PM: pci_pm_suspend(): e1000e_pm_suspend+0x0/0x760 [e1000e] returns -2
> [22909.393858] PM: dpm_run_callback(): pci_pm_suspend+0x0/0x160 returns -2
> [22909.393861] PM: Device 0000:00:1f.6 failed to suspend async: error -2
> 
> This can be avoided by changing the return values of __e1000_shutdown and
> e1000e_pm_suspend functions so that they always return 0 (success). This
> is consistent with what other drivers do.
> If the e1000e deiver encounters a hardware error during suspend, potential
> side effects include slightly higher power draw or non-working wake on
> LAN. This is preferred to a system-level suspend failure, and a warning
> message is written to the system log, so that the user can be aware that
> the LAN controller experienced a problem during suspend.
> 

Could you please provide a Fixes:

Thanks,
Tony

> Suggested-by: Dima Ruinskiy <dima.ruinskiy@intel.com>
> Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=205015
> ---
>   drivers/net/ethernet/intel/e1000e/netdev.c | 19 +++++++++++--------
>   1 file changed, 11 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> index 360ee26557f7..f103249b12fa 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -6671,8 +6671,10 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
>   		if (adapter->flags2 & FLAG2_HAS_PHY_WAKEUP) {
>   			/* enable wakeup by the PHY */
>   			retval = e1000_init_phy_wakeup(adapter, wufc);
> -			if (retval)
> -				return retval;
> +			if (retval) {
> +				e_err("Failed to enable wakeup\n");
> +				goto skip_phy_configurations;
> +			}
>   		} else {
>   			/* enable wakeup by the MAC */
>   			ew32(WUFC, wufc);
> @@ -6693,8 +6695,10 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
>   			 * or broadcast.
>   			 */
>   			retval = e1000_enable_ulp_lpt_lp(hw, !runtime);
> -			if (retval)
> -				return retval;
> +			if (retval) {
> +				e_err("Failed to enable ULP\n");
> +				goto skip_phy_configurations;
> +			}
>   		}
>   	}
>   
> @@ -6726,6 +6730,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
>   		hw->phy.ops.release(hw);
>   	}
>   
> +skip_phy_configurations:
>   	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
>   	 * would have already happened in close and is redundant.
>   	 */
> @@ -6968,15 +6973,13 @@ static int e1000e_pm_suspend(struct device *dev)
>   	e1000e_pm_freeze(dev);
>   
>   	rc = __e1000_shutdown(pdev, false);
> -	if (rc) {
> -		e1000e_pm_thaw(dev);
> -	} else {
> +	if (!rc) {
>   		/* Introduce S0ix implementation */
>   		if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS)
>   			e1000e_s0ix_entry_flow(adapter);
>   	}
>   
> -	return rc;
> +	return 0;
>   }
>   
>   static int e1000e_pm_resume(struct device *dev)
Mor Bar-Gabay Aug. 20, 2024, 7:11 a.m. UTC | #2
On 06/08/2024 16:23, Vitaly Lifshits wrote:
> Occasionally when the system goes into pm_suspend, the suspend might fail
> due to a PHY access error on the network adapter. Previously, this would
> have caused the whole system to fail to go to a low power state.
> An example of this was reported in the following Bugzilla:
> https://bugzilla.kernel.org/show_bug.cgi?id=205015
> 
> [ 1663.694828] e1000e 0000:00:19.0 eth0: Failed to disable ULP
> [ 1664.731040] asix 2-3:1.0 eth1: link up, 100Mbps, full-duplex, lpa 0xC1E1
> [ 1665.093513] e1000e 0000:00:19.0 eth0: Hardware Error
> [ 1665.596760] e1000e 0000:00:19.0: pci_pm_resume+0x0/0x80 returned 0 after 2975399 usecs
> and then the system never recovers from it, and all the following suspend failed due to this
> [22909.393854] PM: pci_pm_suspend(): e1000e_pm_suspend+0x0/0x760 [e1000e] returns -2
> [22909.393858] PM: dpm_run_callback(): pci_pm_suspend+0x0/0x160 returns -2
> [22909.393861] PM: Device 0000:00:1f.6 failed to suspend async: error -2
> 
> This can be avoided by changing the return values of __e1000_shutdown and
> e1000e_pm_suspend functions so that they always return 0 (success). This
> is consistent with what other drivers do.
> If the e1000e deiver encounters a hardware error during suspend, potential
> side effects include slightly higher power draw or non-working wake on
> LAN. This is preferred to a system-level suspend failure, and a warning
> message is written to the system log, so that the user can be aware that
> the LAN controller experienced a problem during suspend.
> 
> Suggested-by: Dima Ruinskiy <dima.ruinskiy@intel.com>
> Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=205015
> ---
>   drivers/net/ethernet/intel/e1000e/netdev.c | 19 +++++++++++--------
>   1 file changed, 11 insertions(+), 8 deletions(-)
> 
Tested-by: Mor Bar-Gabay <morx.bar.gabay@intel.com>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 360ee26557f7..f103249b12fa 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6671,8 +6671,10 @@  static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
 		if (adapter->flags2 & FLAG2_HAS_PHY_WAKEUP) {
 			/* enable wakeup by the PHY */
 			retval = e1000_init_phy_wakeup(adapter, wufc);
-			if (retval)
-				return retval;
+			if (retval) {
+				e_err("Failed to enable wakeup\n");
+				goto skip_phy_configurations;
+			}
 		} else {
 			/* enable wakeup by the MAC */
 			ew32(WUFC, wufc);
@@ -6693,8 +6695,10 @@  static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
 			 * or broadcast.
 			 */
 			retval = e1000_enable_ulp_lpt_lp(hw, !runtime);
-			if (retval)
-				return retval;
+			if (retval) {
+				e_err("Failed to enable ULP\n");
+				goto skip_phy_configurations;
+			}
 		}
 	}
 
@@ -6726,6 +6730,7 @@  static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
 		hw->phy.ops.release(hw);
 	}
 
+skip_phy_configurations:
 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
 	 * would have already happened in close and is redundant.
 	 */
@@ -6968,15 +6973,13 @@  static int e1000e_pm_suspend(struct device *dev)
 	e1000e_pm_freeze(dev);
 
 	rc = __e1000_shutdown(pdev, false);
-	if (rc) {
-		e1000e_pm_thaw(dev);
-	} else {
+	if (!rc) {
 		/* Introduce S0ix implementation */
 		if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS)
 			e1000e_s0ix_entry_flow(adapter);
 	}
 
-	return rc;
+	return 0;
 }
 
 static int e1000e_pm_resume(struct device *dev)