diff mbox series

[v2,2/2] ubi: Implement ioctl for detailed erase counters

Message ID 20241127091050.1254359-2-rickard.andersson@axis.com
State New
Headers show
Series [v2,1/2] ubi: Expose interface for detailed erase counters | expand

Commit Message

Rickard Andersson Nov. 27, 2024, 9:10 a.m. UTC
Currently, "max_ec" can be read from sysfs, which provides a limited
view of the flash device’s wear. In certain cases, such as bugs in
the wear-leveling algorithm, specific blocks can be worn down more
than others, resulting in uneven wear distribution. Also some use cases
can wear the erase blocks of the fastmap area more heavily than other
parts of flash.
Providing detailed erase counter values give a better understanding of
the overall flash wear and is needed to be able to calculate for example
expected life time.
There exists more detailed info in debugfs, but this information is
only available for debug builds.

Signed-off-by: Rickard Andersson <rickard.andersson@axis.com>
---
 drivers/mtd/ubi/cdev.c | 96 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

Comments

Richard Weinberger Dec. 3, 2024, 7:09 p.m. UTC | #1
Rickard,

----- Ursprüngliche Mail -----
> Von: "Rickard X Andersson" <rickard.andersson@axis.com>
> An: "richard" <richard@nod.at>, "chengzhihao1" <chengzhihao1@huawei.com>, "linux-mtd" <linux-mtd@lists.infradead.org>,
> "rickard314 andersson" <rickard314.andersson@gmail.com>
> CC: "Rickard X Andersson" <rickard.andersson@axis.com>, "kernel" <kernel@axis.com>
> Gesendet: Mittwoch, 27. November 2024 10:10:50
> Betreff: [PATCH v2 2/2] ubi: Implement ioctl for detailed erase counters

> Currently, "max_ec" can be read from sysfs, which provides a limited
> view of the flash device’s wear. In certain cases, such as bugs in
> the wear-leveling algorithm, specific blocks can be worn down more
> than others, resulting in uneven wear distribution. Also some use cases
> can wear the erase blocks of the fastmap area more heavily than other
> parts of flash.
> Providing detailed erase counter values give a better understanding of
> the overall flash wear and is needed to be able to calculate for example
> expected life time.
> There exists more detailed info in debugfs, but this information is
> only available for debug builds.
> 
> Signed-off-by: Rickard Andersson <rickard.andersson@axis.com>
> ---
> drivers/mtd/ubi/cdev.c | 96 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 96 insertions(+)
> 
> diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
> index 0d8f04cf03c5..681cf8526e2f 100644
> --- a/drivers/mtd/ubi/cdev.c
> +++ b/drivers/mtd/ubi/cdev.c
> @@ -828,6 +828,96 @@ static int rename_volumes(struct ubi_device *ubi,
> 	return err;
> }
> 
> +
> +static int ubi_get_ec_info(struct ubi_device *ubi, struct ubi_ecinfo_req __user
> *ureq)
> +{
> +	struct ubi_ecinfo_req req;
> +	struct ubi_wl_entry *wl;
> +	int i;
> +	int peb;
> +	int end_peb;
> +	int err = 0;
> +	int max_elem;
> +	int32_t *erase_counters;
> +
> +	/* Copy the input arguments */
> +	err = copy_from_user(&req, ureq, sizeof(struct ubi_ecinfo_req));
> +	if (err) {
> +		err = -EFAULT;
> +		goto out;
> +	}
> +
> +	/* Check input arguments */
> +	if (req.length <= 0 || req.start < 0) {
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	max_elem = req.length;
> +
> +	/* We can not read more than the total amount of PEBs */
> +	if (max_elem > ubi->peb_count)
> +		max_elem = ubi->peb_count;
> +
> +	/* Check for overflow */
> +	if (req.start + max_elem < req.start) {

You can use check_add_overflow() or a similar helper.

> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	erase_counters = kmalloc_array(max_elem,
> +				       sizeof(int32_t),
> +				       GFP_KERNEL);

I don't think this temporally buffer is needed.
Especially since the allocation is userspace controlled and unbound,
it can be a problem later.

> +	if (!erase_counters) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +
> +	end_peb = req.start + max_elem;
> +	if (end_peb > ubi->peb_count)
> +		end_peb = ubi->peb_count;
> +
> +	i = 0;
> +	for (peb = req.start; peb < end_peb; i++, peb++) {
> +		int ec;
> +
> +		if (ubi_io_is_bad(ubi, peb)) {
> +			erase_counters[i] = UBI_UNKNOWN;
> +			continue;
> +		}
> +
> +		spin_lock(&ubi->wl_lock);
> +
> +		wl = ubi->lookuptbl[peb];
> +		if (wl)
> +			ec = wl->ec;
> +		else
> +			ec = UBI_UNKNOWN;
> +
> +		spin_unlock(&ubi->wl_lock);
> +
> +		erase_counters[i] = ec;

My idea way using there something like __put_user() to directly write the
record to userspace memory.
With an access_ok() check before you can make sure that the whole destination
is within userspace.

Thanks,
//richard
Zhihao Cheng Dec. 4, 2024, 1:59 a.m. UTC | #2
在 2024/12/4 3:09, Richard Weinberger 写道:
> Rickard,
> 
> ----- Ursprüngliche Mail -----
>> Von: "Rickard X Andersson" <rickard.andersson@axis.com>
>> An: "richard" <richard@nod.at>, "chengzhihao1" <chengzhihao1@huawei.com>, "linux-mtd" <linux-mtd@lists.infradead.org>,
>> "rickard314 andersson" <rickard314.andersson@gmail.com>
>> CC: "Rickard X Andersson" <rickard.andersson@axis.com>, "kernel" <kernel@axis.com>
>> Gesendet: Mittwoch, 27. November 2024 10:10:50
>> Betreff: [PATCH v2 2/2] ubi: Implement ioctl for detailed erase counters
> 
>> Currently, "max_ec" can be read from sysfs, which provides a limited
>> view of the flash device’s wear. In certain cases, such as bugs in
>> the wear-leveling algorithm, specific blocks can be worn down more
>> than others, resulting in uneven wear distribution. Also some use cases
>> can wear the erase blocks of the fastmap area more heavily than other
>> parts of flash.
>> Providing detailed erase counter values give a better understanding of
>> the overall flash wear and is needed to be able to calculate for example
>> expected life time.
>> There exists more detailed info in debugfs, but this information is
>> only available for debug builds.
>>
>> Signed-off-by: Rickard Andersson <rickard.andersson@axis.com>
>> ---
>> drivers/mtd/ubi/cdev.c | 96 ++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 96 insertions(+)
>>
>> diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
>> index 0d8f04cf03c5..681cf8526e2f 100644
>> --- a/drivers/mtd/ubi/cdev.c
>> +++ b/drivers/mtd/ubi/cdev.c
>> @@ -828,6 +828,96 @@ static int rename_volumes(struct ubi_device *ubi,
>> 	return err;
>> }
>>
>> +
>> +static int ubi_get_ec_info(struct ubi_device *ubi, struct ubi_ecinfo_req __user
>> *ureq)
>> +{
>> +	struct ubi_ecinfo_req req;
>> +	struct ubi_wl_entry *wl;
>> +	int i;
>> +	int peb;
>> +	int end_peb;
>> +	int err = 0;
>> +	int max_elem;
>> +	int32_t *erase_counters;
>> +
>> +	/* Copy the input arguments */
>> +	err = copy_from_user(&req, ureq, sizeof(struct ubi_ecinfo_req));
>> +	if (err) {
>> +		err = -EFAULT;
>> +		goto out;

How about directly returning function?
>> +	}
>> +
>> +	/* Check input arguments */
>> +	if (req.length <= 0 || req.start < 0) {
>> +		err = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	max_elem = req.length;
>> +
>> +	/* We can not read more than the total amount of PEBs */
>> +	if (max_elem > ubi->peb_count)
>> +		max_elem = ubi->peb_count;
>> +
>> +	/* Check for overflow */
>> +	if (req.start + max_elem < req.start) {
> 
> You can use check_add_overflow() or a similar helper.
> 
>> +		err = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	erase_counters = kmalloc_array(max_elem,
>> +				       sizeof(int32_t),
>> +				       GFP_KERNEL);
> 
> I don't think this temporally buffer is needed.
> Especially since the allocation is userspace controlled and unbound,
> it can be a problem later.
> 
>> +	if (!erase_counters) {
>> +		err = -ENOMEM;
>> +		goto out;
>> +	}
>> +
>> +	end_peb = req.start + max_elem;
>> +	if (end_peb > ubi->peb_count)
>> +		end_peb = ubi->peb_count;
>> +
>> +	i = 0;
>> +	for (peb = req.start; peb < end_peb; i++, peb++) {
>> +		int ec;
>> +
>> +		if (ubi_io_is_bad(ubi, peb)) {
>> +			erase_counters[i] = UBI_UNKNOWN;
>> +			continue;
>> +		}
>> +
>> +		spin_lock(&ubi->wl_lock);
>> +
>> +		wl = ubi->lookuptbl[peb];
>> +		if (wl)
>> +			ec = wl->ec;
>> +		else
>> +			ec = UBI_UNKNOWN;
>> +
>> +		spin_unlock(&ubi->wl_lock);
>> +
>> +		erase_counters[i] = ec;
> 
> My idea way using there something like __put_user() to directly write the
> record to userspace memory.
> With an access_ok() check before you can make sure that the whole destination
> is within userspace.

Agree. Hi, Rickard. You can have a look at ioctl_fiemap -> ext4_fiemap 
-> iomap_fiemap -> iomap_to_fiemap -> fiemap_fill_next_extent, just like 
the suggestions from Richard. No extra memory allocations, copy results 
to userspace one by one.
> 
> Thanks,
> //richard
> 
> .
>
diff mbox series

Patch

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 0d8f04cf03c5..681cf8526e2f 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -828,6 +828,96 @@  static int rename_volumes(struct ubi_device *ubi,
 	return err;
 }
 
+
+static int ubi_get_ec_info(struct ubi_device *ubi, struct ubi_ecinfo_req __user *ureq)
+{
+	struct ubi_ecinfo_req req;
+	struct ubi_wl_entry *wl;
+	int i;
+	int peb;
+	int end_peb;
+	int err = 0;
+	int max_elem;
+	int32_t *erase_counters;
+
+	/* Copy the input arguments */
+	err = copy_from_user(&req, ureq, sizeof(struct ubi_ecinfo_req));
+	if (err) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	/* Check input arguments */
+	if (req.length <= 0 || req.start < 0) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	max_elem = req.length;
+
+	/* We can not read more than the total amount of PEBs */
+	if (max_elem > ubi->peb_count)
+		max_elem = ubi->peb_count;
+
+	/* Check for overflow */
+	if (req.start + max_elem < req.start) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	erase_counters = kmalloc_array(max_elem,
+				       sizeof(int32_t),
+				       GFP_KERNEL);
+	if (!erase_counters) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	end_peb = req.start + max_elem;
+	if (end_peb > ubi->peb_count)
+		end_peb = ubi->peb_count;
+
+	i = 0;
+	for (peb = req.start; peb < end_peb; i++, peb++) {
+		int ec;
+
+		if (ubi_io_is_bad(ubi, peb)) {
+			erase_counters[i] = UBI_UNKNOWN;
+			continue;
+		}
+
+		spin_lock(&ubi->wl_lock);
+
+		wl = ubi->lookuptbl[peb];
+		if (wl)
+			ec = wl->ec;
+		else
+			ec = UBI_UNKNOWN;
+
+		spin_unlock(&ubi->wl_lock);
+
+		erase_counters[i] = ec;
+	}
+
+	/* Return actual read length */
+	req.read_length = i;
+
+	/* Copy everything except erase counter array */
+	if (copy_to_user(ureq, &req, sizeof(struct ubi_ecinfo_req))) {
+		err = -EFAULT;
+		goto out_free;
+	}
+
+	/* Copy erase counter array */
+	if (copy_to_user(ureq->erase_counters, erase_counters, max_elem * sizeof(int32_t)))
+		err = -EFAULT;
+
+ out_free:
+	kfree(erase_counters);
+ out:
+	return err;
+}
+
 static long ubi_cdev_ioctl(struct file *file, unsigned int cmd,
 			   unsigned long arg)
 {
@@ -991,6 +1081,12 @@  static long ubi_cdev_ioctl(struct file *file, unsigned int cmd,
 		break;
 	}
 
+	case UBI_IOCECNFO:
+	{
+		err = ubi_get_ec_info(ubi, argp);
+		break;
+	}
+
 	default:
 		err = -ENOTTY;
 		break;