diff mbox series

[v2,1/2] mtd: spi-nand: Add read retry support

Message ID 20241114023528.181583-2-linchengming884@gmail.com
State New
Headers show
Series Add support for read retry | expand

Commit Message

Cheng Ming Lin Nov. 14, 2024, 2:35 a.m. UTC
From: Cheng Ming Lin <chengminglin@mxic.com.tw>

When the host ECC fails to correct the data error of NAND device,
there's a special read for data recovery method which host setups
for the next read retry mode and may recover the lost data by host
ECC again.

Signed-off-by: Cheng Ming Lin <chengminglin@mxic.com.tw>
---
 drivers/mtd/nand/spi/core.c | 35 +++++++++++++++++++++++++++++++++--
 include/linux/mtd/spinand.h | 14 ++++++++++++++
 2 files changed, 47 insertions(+), 2 deletions(-)

Comments

Miquel Raynal Nov. 18, 2024, 10:31 a.m. UTC | #1
Hi Cheng Ming,

On 14/11/2024 at 10:35:27 +08, Cheng Ming Lin <linchengming884@gmail.com> wrote:

> From: Cheng Ming Lin <chengminglin@mxic.com.tw>
>
> When the host ECC fails to correct the data error of NAND device,
> there's a special read for data recovery method which host setups

Here is a suggestion for rewording the second part of your commit log:

... which can be setup by the host for the next read. There are several
retry levels that can be attempted until the lost data is recovered or
definitely assumed lost.

> for the next read retry mode and may recover the lost data by host
> ECC again.
>
> Signed-off-by: Cheng Ming Lin <chengminglin@mxic.com.tw>
> ---
>  drivers/mtd/nand/spi/core.c | 35 +++++++++++++++++++++++++++++++++--
>  include/linux/mtd/spinand.h | 14 ++++++++++++++
>  2 files changed, 47 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
> index 4d76f9f71a0e..bd5339a308aa 100644
> --- a/drivers/mtd/nand/spi/core.c
> +++ b/drivers/mtd/nand/spi/core.c
> @@ -672,10 +672,14 @@ static int spinand_mtd_regular_page_read(struct mtd_info *mtd, loff_t from,
>  	struct spinand_device *spinand = mtd_to_spinand(mtd);
>  	struct nand_device *nand = mtd_to_nanddev(mtd);
>  	struct nand_io_iter iter;
> +	struct mtd_ecc_stats old_stats;

Reverse christmas tree is nicer :)

>  	bool disable_ecc = false;
>  	bool ecc_failed = false;
> +	unsigned int retry_mode = 0;
>  	int ret;
>  
> +	old_stats = mtd->ecc_stats;
> +
>  	if (ops->mode == MTD_OPS_RAW || !mtd->ooblayout)
>  		disable_ecc = true;
>  
> @@ -687,18 +691,43 @@ static int spinand_mtd_regular_page_read(struct mtd_info *mtd, loff_t from,
>  		if (ret)
>  			break;
>  
> +read_retry:
>  		ret = spinand_read_page(spinand, &iter.req);
>  		if (ret < 0 && ret != -EBADMSG)
>  			break;
>  
> -		if (ret == -EBADMSG)
> +		if (ret == -EBADMSG && spinand->set_read_retry) {
> +			if (spinand->read_retries && (++retry_mode < spinand->read_retries)) {
> +				ret = spinand->set_read_retry(spinand, retry_mode);
> +				if (ret < 0) {
> +					ecc_failed = true;
> +					break;

What is this break gonna do? You're not in a loop. I don't think breaks
have any effect on if blocks.

> +				}
> +
> +				/* Reset ecc_stats; retry */
> +				mtd->ecc_stats = old_stats;
> +				goto read_retry;
> +			} else {
> +				/* No more retry modes; real failure */
> +				ecc_failed = true;
> +			}
> +		} else if (ret == -EBADMSG) {
>  			ecc_failed = true;
> -		else
> +		} else {
>  			*max_bitflips = max_t(unsigned int, *max_bitflips, ret);
> +		}
>  

Thanks,
Miquèl
Cheng Ming Lin Nov. 22, 2024, 5:39 a.m. UTC | #2
Hi Miquel,

Miquel Raynal <miquel.raynal@bootlin.com> 於 2024年11月18日 週一 下午6:31寫道:
>
>
> Hi Cheng Ming,
>
> On 14/11/2024 at 10:35:27 +08, Cheng Ming Lin <linchengming884@gmail.com> wrote:
>
> > From: Cheng Ming Lin <chengminglin@mxic.com.tw>
> >
> > When the host ECC fails to correct the data error of NAND device,
> > there's a special read for data recovery method which host setups
>
> Here is a suggestion for rewording the second part of your commit log:
>
> ... which can be setup by the host for the next read. There are several
> retry levels that can be attempted until the lost data is recovered or
> definitely assumed lost.
>

Thank you for your suggestion. I will make adjustments based on it.

> > for the next read retry mode and may recover the lost data by host
> > ECC again.
> >
> > Signed-off-by: Cheng Ming Lin <chengminglin@mxic.com.tw>
> > ---
> >  drivers/mtd/nand/spi/core.c | 35 +++++++++++++++++++++++++++++++++--
> >  include/linux/mtd/spinand.h | 14 ++++++++++++++
> >  2 files changed, 47 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
> > index 4d76f9f71a0e..bd5339a308aa 100644
> > --- a/drivers/mtd/nand/spi/core.c
> > +++ b/drivers/mtd/nand/spi/core.c
> > @@ -672,10 +672,14 @@ static int spinand_mtd_regular_page_read(struct mtd_info *mtd, loff_t from,
> >       struct spinand_device *spinand = mtd_to_spinand(mtd);
> >       struct nand_device *nand = mtd_to_nanddev(mtd);
> >       struct nand_io_iter iter;
> > +     struct mtd_ecc_stats old_stats;
>
> Reverse christmas tree is nicer :)

Got it! I'll make the changes.

>
> >       bool disable_ecc = false;
> >       bool ecc_failed = false;
> > +     unsigned int retry_mode = 0;
> >       int ret;
> >
> > +     old_stats = mtd->ecc_stats;
> > +
> >       if (ops->mode == MTD_OPS_RAW || !mtd->ooblayout)
> >               disable_ecc = true;
> >
> > @@ -687,18 +691,43 @@ static int spinand_mtd_regular_page_read(struct mtd_info *mtd, loff_t from,
> >               if (ret)
> >                       break;
> >
> > +read_retry:
> >               ret = spinand_read_page(spinand, &iter.req);
> >               if (ret < 0 && ret != -EBADMSG)
> >                       break;
> >
> > -             if (ret == -EBADMSG)
> > +             if (ret == -EBADMSG && spinand->set_read_retry) {
> > +                     if (spinand->read_retries && (++retry_mode < spinand->read_retries)) {
> > +                             ret = spinand->set_read_retry(spinand, retry_mode);
> > +                             if (ret < 0) {
> > +                                     ecc_failed = true;
> > +                                     break;
>
> What is this break gonna do? You're not in a loop. I don't think breaks
> have any effect on if blocks.

There's a nanddev_io_for_each_page iteration above, so the break
was intended to terminate that loop. However, I realized that break
should be replaced with return ret because if the set feature operation
fails, it should return the error.

>
> > +                             }
> > +
> > +                             /* Reset ecc_stats; retry */
> > +                             mtd->ecc_stats = old_stats;
> > +                             goto read_retry;
> > +                     } else {
> > +                             /* No more retry modes; real failure */
> > +                             ecc_failed = true;
> > +                     }
> > +             } else if (ret == -EBADMSG) {
> >                       ecc_failed = true;
> > -             else
> > +             } else {
> >                       *max_bitflips = max_t(unsigned int, *max_bitflips, ret);
> > +             }
> >
>
> Thanks,
> Miquèl

Thanks,
Cheng Ming Lin
diff mbox series

Patch

diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 4d76f9f71a0e..bd5339a308aa 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -672,10 +672,14 @@  static int spinand_mtd_regular_page_read(struct mtd_info *mtd, loff_t from,
 	struct spinand_device *spinand = mtd_to_spinand(mtd);
 	struct nand_device *nand = mtd_to_nanddev(mtd);
 	struct nand_io_iter iter;
+	struct mtd_ecc_stats old_stats;
 	bool disable_ecc = false;
 	bool ecc_failed = false;
+	unsigned int retry_mode = 0;
 	int ret;
 
+	old_stats = mtd->ecc_stats;
+
 	if (ops->mode == MTD_OPS_RAW || !mtd->ooblayout)
 		disable_ecc = true;
 
@@ -687,18 +691,43 @@  static int spinand_mtd_regular_page_read(struct mtd_info *mtd, loff_t from,
 		if (ret)
 			break;
 
+read_retry:
 		ret = spinand_read_page(spinand, &iter.req);
 		if (ret < 0 && ret != -EBADMSG)
 			break;
 
-		if (ret == -EBADMSG)
+		if (ret == -EBADMSG && spinand->set_read_retry) {
+			if (spinand->read_retries && (++retry_mode < spinand->read_retries)) {
+				ret = spinand->set_read_retry(spinand, retry_mode);
+				if (ret < 0) {
+					ecc_failed = true;
+					break;
+				}
+
+				/* Reset ecc_stats; retry */
+				mtd->ecc_stats = old_stats;
+				goto read_retry;
+			} else {
+				/* No more retry modes; real failure */
+				ecc_failed = true;
+			}
+		} else if (ret == -EBADMSG) {
 			ecc_failed = true;
-		else
+		} else {
 			*max_bitflips = max_t(unsigned int, *max_bitflips, ret);
+		}
 
 		ret = 0;
 		ops->retlen += iter.req.datalen;
 		ops->oobretlen += iter.req.ooblen;
+
+		/* Reset to retry mode 0 */
+		if (retry_mode) {
+			retry_mode = 0;
+			ret = spinand->set_read_retry(spinand, retry_mode);
+			if (ret < 0)
+				break;
+		}
 	}
 
 	if (ecc_failed && !ret)
@@ -1268,6 +1297,8 @@  int spinand_match_and_init(struct spinand_device *spinand,
 		spinand->id.len = 1 + table[i].devid.len;
 		spinand->select_target = table[i].select_target;
 		spinand->set_cont_read = table[i].set_cont_read;
+		spinand->read_retries = table[i].read_retries;
+		spinand->set_read_retry = table[i].set_read_retry;
 
 		op = spinand_select_op_variant(spinand,
 					       info->op_variants.read_cache);
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 702e5fb13dae..bbfef90135f5 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -339,6 +339,8 @@  struct spinand_ondie_ecc_conf {
  * @select_target: function used to select a target/die. Required only for
  *		   multi-die chips
  * @set_cont_read: enable/disable continuous cached reads
+ * @read_retries: the number of read retry modes supported
+ * @set_read_retry: enable/disable read retry for data recovery
  *
  * Each SPI NAND manufacturer driver should have a spinand_info table
  * describing all the chips supported by the driver.
@@ -359,6 +361,9 @@  struct spinand_info {
 			     unsigned int target);
 	int (*set_cont_read)(struct spinand_device *spinand,
 			     bool enable);
+	unsigned int read_retries;
+	int (*set_read_retry)(struct spinand_device *spinand,
+			     unsigned int read_retry);
 };
 
 #define SPINAND_ID(__method, ...)					\
@@ -387,6 +392,10 @@  struct spinand_info {
 #define SPINAND_CONT_READ(__set_cont_read)				\
 	.set_cont_read = __set_cont_read,
 
+#define SPINAND_READ_RETRY(__read_retries, __set_read_retry)		\
+	.read_retries = __read_retries,				\
+	.set_read_retry = __set_read_retry,
+
 #define SPINAND_INFO(__model, __id, __memorg, __eccreq, __op_variants,	\
 		     __flags, ...)					\
 	{								\
@@ -436,6 +445,8 @@  struct spinand_dirmap {
  *		A per-transfer check must of course be done to ensure it is
  *		actually relevant to enable this feature.
  * @set_cont_read: Enable/disable the continuous read feature
+ * @read_retries: the number of read retry modes supported
+ * @set_read_retry: Enable/disable the read retry feature
  * @priv: manufacturer private data
  */
 struct spinand_device {
@@ -469,6 +480,9 @@  struct spinand_device {
 	bool cont_read_possible;
 	int (*set_cont_read)(struct spinand_device *spinand,
 			     bool enable);
+	unsigned int read_retries;
+	int (*set_read_retry)(struct spinand_device *spinand,
+			     unsigned int retry_mode);
 };
 
 /**