diff mbox series

[linux,dev-5.10,33/35] pmbus: (core) Add a one-shot retry in pmbus_set_page()

Message ID 20210308225419.46530-34-eajames@linux.ibm.com
State New
Headers show
Series Rainier and Everest system updates | expand

Commit Message

Eddie James March 8, 2021, 10:54 p.m. UTC
From: Andrew Jeffery <andrew@aj.id.au>

From extensive testing and tracing it was discovered that the MAX31785
occasionally fails to switch pages despite ACK'ing the PAGE PMBus data
write. I suspect this behaviour had been seen on other devices as well,
as pmbus_set_page() already read-back the freshly set value and errored
out if it wasn't what we requested.

In the case of the MAX31785 it was shown that a one-shot retry was
enough to get the PAGE write to stick if the inital command failed. To
improve robustness, only error out if the one-shot retry also fails to
stick.

OpenBMC-Staging-Count: 1
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Signed-off-by: Joel Stanley <joel@jms.id.au>
---
 drivers/hwmon/pmbus/pmbus_core.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

Comments

Andrei Kartashev March 9, 2021, 8:21 p.m. UTC | #1
Hi,
I have a similar patch in our local tree, but it adds retry in more
places. I had to add retries for all i2c_smbus_* operations because pf
communication to PSUs sometime very unstable.
Here is it:

From 7688b90c3e7e4986535a194a271509095534c3e7 Mon Sep 17 00:00:00 2001
From: Andrei Kartashev <a.kartashev@yadro.com>
Date: Tue, 9 Mar 2021 21:47:25 +0300
Subject: [PATCH] hwmon: (pmbus) Retry I2C request on failure

In real world I2C communication errors are possible. It was discovered
that pmbus read operation for some PSUs occasionally fails in random
places. For pmbus_set_page call there is already retry implemented, but
seems it is not enough.

Add retries for every i2c_smbus_read/i2c_smbus_write call to increase
robust.

Signed-off-by: Andrei Kartashev <a.kartashev@yadro.com>
---
 drivers/hwmon/pmbus/pmbus_core.c | 65 +++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 27 deletions(-)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index 60ea917936a7..d98b52950022 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -27,6 +27,7 @@
  */
 #define PMBUS_ATTR_ALLOC_SIZE	32
 #define PMBUS_NAME_SIZE		24
+#define I2C_RETRIES 3
 
 struct pmbus_sensor {
 	struct pmbus_sensor *next;
@@ -144,7 +145,7 @@ EXPORT_SYMBOL_GPL(pmbus_clear_cache);
 int pmbus_set_page(struct i2c_client *client, int page, int phase)
 {
 	struct pmbus_data *data = i2c_get_clientdata(client);
-	int rv;
+	int rv, rtr;
 
 	if (page < 0)
 		return 0;
@@ -154,18 +155,12 @@ int pmbus_set_page(struct i2c_client *client, int page, int phase)
 		dev_dbg(&client->dev, "Want page %u, %u cached\n", page,
 			data->currpage);
 
-		rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE, page);
-		if (rv < 0) {
+		for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
 			rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE,
 						       page);
-			dev_dbg(&client->dev,
-				"Failed to set page %u, performed one-shot retry %s: %d\n",
-				page, rv ? "and failed" : "with success", rv);
-			if (rv < 0)
-				return rv;
-		}
 
-		rv = i2c_smbus_read_byte_data(client, PMBUS_PAGE);
+		for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+			rv = i2c_smbus_read_byte_data(client, PMBUS_PAGE);
 		if (rv < 0)
 			return rv;
 
@@ -176,8 +171,9 @@ int pmbus_set_page(struct i2c_client *client, int page, int phase)
 
 	if (data->info->phases[page] && data->currphase != phase &&
 	    !(data->info->func[page] & PMBUS_PHASE_VIRTUAL)) {
-		rv = i2c_smbus_write_byte_data(client, PMBUS_PHASE,
-					       phase);
+		for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+			rv = i2c_smbus_write_byte_data(client, PMBUS_PHASE,
+						       phase);
 		if (rv)
 			return rv;
 	}
@@ -189,13 +185,15 @@ EXPORT_SYMBOL_GPL(pmbus_set_page);
 
 int pmbus_write_byte(struct i2c_client *client, int page, u8 value)
 {
-	int rv;
+	int rv, rtr;
 
 	rv = pmbus_set_page(client, page, 0xff);
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_write_byte(client, value);
+	for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+		rv = i2c_smbus_write_byte(client, value);
+	return rv;
 }
 EXPORT_SYMBOL_GPL(pmbus_write_byte);
 
@@ -220,13 +218,15 @@ static int _pmbus_write_byte(struct i2c_client *client, int page, u8 value)
 int pmbus_write_word_data(struct i2c_client *client, int page, u8 reg,
 			  u16 word)
 {
-	int rv;
+	int rv, rtr;
 
 	rv = pmbus_set_page(client, page, 0xff);
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_write_word_data(client, reg, word);
+	for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+		rv = i2c_smbus_write_word_data(client, reg, word);
+	return rv;
 }
 EXPORT_SYMBOL_GPL(pmbus_write_word_data);
 
@@ -302,13 +302,15 @@ EXPORT_SYMBOL_GPL(pmbus_update_fan);
 
 int pmbus_read_word_data(struct i2c_client *client, int page, int phase, u8 reg)
 {
-	int rv;
+	int rv, rtr;
 
 	rv = pmbus_set_page(client, page, phase);
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_read_word_data(client, reg);
+	for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+		rv = i2c_smbus_read_word_data(client, reg);
+	return rv;
 }
 EXPORT_SYMBOL_GPL(pmbus_read_word_data);
 
@@ -361,25 +363,29 @@ static int __pmbus_read_word_data(struct i2c_client *client, int page, int reg)
 
 int pmbus_read_byte_data(struct i2c_client *client, int page, u8 reg)
 {
-	int rv;
+	int rv, rtr;
 
 	rv = pmbus_set_page(client, page, 0xff);
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_read_byte_data(client, reg);
+	for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+		rv = i2c_smbus_read_byte_data(client, reg);
+	return rv;
 }
 EXPORT_SYMBOL_GPL(pmbus_read_byte_data);
 
 int pmbus_write_byte_data(struct i2c_client *client, int page, u8 reg, u8 value)
 {
-	int rv;
+	int rv, rtr;
 
 	rv = pmbus_set_page(client, page, 0xff);
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_write_byte_data(client, reg, value);
+	for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+		rv = i2c_smbus_write_byte_data(client, reg, value);
+	return rv;
 }
 EXPORT_SYMBOL_GPL(pmbus_write_byte_data);
 
@@ -2193,7 +2199,7 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 			     struct pmbus_driver_info *info)
 {
 	struct device *dev = &client->dev;
-	int page, ret;
+	int page, ret, rtr;
 
 	/*
 	 * Some PMBus chips don't support PMBUS_STATUS_WORD, so try
@@ -2201,10 +2207,13 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 	 * Bail out if both registers are not supported.
 	 */
 	data->read_status = pmbus_read_status_word;
-	ret = i2c_smbus_read_word_data(client, PMBUS_STATUS_WORD);
+	for (rtr = 0, ret = -1; (rtr < I2C_RETRIES) && (ret < 0); rtr++)
+		ret = i2c_smbus_read_word_data(client, PMBUS_STATUS_WORD);
 	if (ret < 0 || ret == 0xffff) {
 		data->read_status = pmbus_read_status_byte;
-		ret = i2c_smbus_read_byte_data(client, PMBUS_STATUS_BYTE);
+		for (rtr = 0, ret = -1; (rtr < I2C_RETRIES) && (ret < 0); rtr++)
+			ret = i2c_smbus_read_byte_data(client,
+						       PMBUS_STATUS_BYTE);
 		if (ret < 0 || ret == 0xff) {
 			dev_err(dev, "PMBus status register not found\n");
 			return -ENODEV;
@@ -2214,7 +2223,8 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 	}
 
 	/* Enable PEC if the controller supports it */
-	ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
+	for (rtr = 0, ret = -1; (rtr < I2C_RETRIES) && (ret < 0); rtr++)
+		ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
 	if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK))
 		client->flags |= I2C_CLIENT_PEC;
 
@@ -2223,7 +2233,8 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 	 * faults, and we should not try it. Also, in that case, writes into
 	 * limit registers need to be disabled.
 	 */
-	ret = i2c_smbus_read_byte_data(client, PMBUS_WRITE_PROTECT);
+	for (rtr = 0, ret = -1; (rtr < I2C_RETRIES) && (ret < 0); rtr++)
+		ret = i2c_smbus_read_byte_data(client, PMBUS_WRITE_PROTECT);
 	if (ret > 0 && (ret & PB_WP_ANY))
 		data->flags |= PMBUS_WRITE_PROTECTED | PMBUS_SKIP_STATUS_CHECK;
Joel Stanley March 12, 2021, 12:35 a.m. UTC | #2
On Mon, 8 Mar 2021 at 22:56, Eddie James <eajames@linux.ibm.com> wrote:
>
> From: Andrew Jeffery <andrew@aj.id.au>
>
> From extensive testing and tracing it was discovered that the MAX31785
> occasionally fails to switch pages despite ACK'ing the PAGE PMBus data
> write. I suspect this behaviour had been seen on other devices as well,
> as pmbus_set_page() already read-back the freshly set value and errored
> out if it wasn't what we requested.
>
> In the case of the MAX31785 it was shown that a one-shot retry was
> enough to get the PAGE write to stick if the inital command failed. To
> improve robustness, only error out if the one-shot retry also fails to
> stick.
>
> OpenBMC-Staging-Count: 1
> Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
> Signed-off-by: Joel Stanley <joel@jms.id.au>

Andrew, please review the pmbus related changes and let me know how
you would like to proceed.

> ---
>  drivers/hwmon/pmbus/pmbus_core.c | 31 ++++++++++++++++++++-----------
>  1 file changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
> index 44c1a0a07509..dd4a09d18730 100644
> --- a/drivers/hwmon/pmbus/pmbus_core.c
> +++ b/drivers/hwmon/pmbus/pmbus_core.c
> @@ -151,25 +151,34 @@ int pmbus_set_page(struct i2c_client *client, int page, int phase)
>
>         if (!(data->info->func[page] & PMBUS_PAGE_VIRTUAL) &&
>             data->info->pages > 1 && page != data->currpage) {
> +               int i;
> +
>                 dev_dbg(&client->dev, "Want page %u, %u cached\n", page,
>                         data->currpage);
>
> -               rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE, page);
> -               if (rv < 0) {
> +               for (i = 0; i < 2; i++) {
>                         rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE,
>                                                        page);
> -                       dev_dbg(&client->dev,
> -                               "Failed to set page %u, performed one-shot retry %s: %d\n",
> -                               page, rv ? "and failed" : "with success", rv);
> +                       if (rv)
> +                               continue;
> +
> +                       rv = i2c_smbus_read_byte_data(client, PMBUS_PAGE);
>                         if (rv < 0)
> -                               return rv;
> -               }
> +                               continue;
>
> -               rv = i2c_smbus_read_byte_data(client, PMBUS_PAGE);
> -               if (rv < 0)
> -                       return rv;
> +                       /* Success, exit loop */
> +                       if (rv == page)
> +                               break;
> +
> +                       rv = i2c_smbus_read_byte_data(client, PMBUS_STATUS_CML);
> +                       if (rv < 0)
> +                               continue;
> +
> +                       if (rv & PB_CML_FAULT_INVALID_DATA)
> +                               return -EIO;
> +               }
>
> -               if (rv != page)
> +               if (i == 2)
>                         return -EIO;
>         }
>         data->currpage = page;
> --
> 2.27.0
>
diff mbox series

Patch

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index 44c1a0a07509..dd4a09d18730 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -151,25 +151,34 @@  int pmbus_set_page(struct i2c_client *client, int page, int phase)
 
 	if (!(data->info->func[page] & PMBUS_PAGE_VIRTUAL) &&
 	    data->info->pages > 1 && page != data->currpage) {
+		int i;
+
 		dev_dbg(&client->dev, "Want page %u, %u cached\n", page,
 			data->currpage);
 
-		rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE, page);
-		if (rv < 0) {
+		for (i = 0; i < 2; i++) {
 			rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE,
 						       page);
-			dev_dbg(&client->dev,
-				"Failed to set page %u, performed one-shot retry %s: %d\n",
-				page, rv ? "and failed" : "with success", rv);
+			if (rv)
+				continue;
+
+			rv = i2c_smbus_read_byte_data(client, PMBUS_PAGE);
 			if (rv < 0)
-				return rv;
-		}
+				continue;
 
-		rv = i2c_smbus_read_byte_data(client, PMBUS_PAGE);
-		if (rv < 0)
-			return rv;
+			/* Success, exit loop */
+			if (rv == page)
+				break;
+
+			rv = i2c_smbus_read_byte_data(client, PMBUS_STATUS_CML);
+			if (rv < 0)
+				continue;
+
+			if (rv & PB_CML_FAULT_INVALID_DATA)
+				return -EIO;
+		}
 
-		if (rv != page)
+		if (i == 2)
 			return -EIO;
 	}
 	data->currpage = page;