Message ID | 20220426154956.27205-3-eajames@linux.ibm.com |
---|---|
State | Accepted, archived |
Headers | show |
Series | fsi and hwmon (occ): Prevent occasional checksum failures | expand |
On Tue, 26 Apr 2022 at 15:50, Eddie James <eajames@linux.ibm.com> wrote: > > Due to the OCC communication design with a shared SRAM area, > checkum errors are expected due to corrupted buffer from OCC > communications with other system components. Therefore, retry > the command twice in the event of a checksum failure. > > Signed-off-by: Eddie James <eajames@linux.ibm.com> > Acked-by: Guenter Roeck <linux@roeck-us.net> > --- > drivers/hwmon/occ/p9_sbe.c | 15 +++++++++++---- > 1 file changed, 11 insertions(+), 4 deletions(-) > > diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c > index 49b13cc01073..e6ccef2af659 100644 > --- a/drivers/hwmon/occ/p9_sbe.c > +++ b/drivers/hwmon/occ/p9_sbe.c > @@ -14,6 +14,8 @@ > > #include "common.h" > > +#define OCC_CHECKSUM_RETRIES 3 > + > struct p9_sbe_occ { > struct occ occ; > bool sbe_error; > @@ -83,17 +85,22 @@ static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) > struct occ_response *resp = &occ->resp; > struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); > size_t resp_len = sizeof(*resp); > + int i; > int rc; > > - rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); > - if (rc < 0) { > + for (i = 0; i < OCC_CHECKSUM_RETRIES; ++i) { > + rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); > + if (rc >= 0) > + break; > if (resp_len) { > if (p9_sbe_occ_save_ffdc(ctx, resp, resp_len)) > sysfs_notify(&occ->bus_dev->kobj, NULL, > bin_attr_ffdc.attr.name); > - } > > - return rc; > + return rc; > + } > + if (rc != -EBADE) > + return rc; Future you might appreciate a comment above the EBADE check clarifying why that error is being special cased. > } > > switch (resp->return_status) { > -- > 2.27.0 >
diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c index 49b13cc01073..e6ccef2af659 100644 --- a/drivers/hwmon/occ/p9_sbe.c +++ b/drivers/hwmon/occ/p9_sbe.c @@ -14,6 +14,8 @@ #include "common.h" +#define OCC_CHECKSUM_RETRIES 3 + struct p9_sbe_occ { struct occ occ; bool sbe_error; @@ -83,17 +85,22 @@ static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) struct occ_response *resp = &occ->resp; struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); size_t resp_len = sizeof(*resp); + int i; int rc; - rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); - if (rc < 0) { + for (i = 0; i < OCC_CHECKSUM_RETRIES; ++i) { + rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); + if (rc >= 0) + break; if (resp_len) { if (p9_sbe_occ_save_ffdc(ctx, resp, resp_len)) sysfs_notify(&occ->bus_dev->kobj, NULL, bin_attr_ffdc.attr.name); - } - return rc; + return rc; + } + if (rc != -EBADE) + return rc; } switch (resp->return_status) {