[v2,3/7] mtd: mtd_raid: Init a new layer of MTD RAID

Message ID	1449909667-7759-4-git-send-email-yangds.fnst@cn.fujitsu.com
State	Superseded
Headers	show Return-Path: <linux-mtd-bounces+incoming=patchwork.ozlabs.org@lists.infradead.org> From: Dongsheng Yang <yangds.fnst@cn.fujitsu.com> To: <dwmw2@infradead.org>, <computersforpeace@gmail.com>, <shengyong1@huawei.com>, <richard@nod.at>, <boris.brezillon@free-electrons.com>, <linux-cris-kernel@axis.com>, <starvik@axis.com>, <jesper.nilsson@axis.com>, <jschultz@xes-inc.com>, <ard.biesheuvel@linaro.org>, <asierra@xes-inc.com>, <colin.king@canonical.com>, <fabf@skynet.be>, <linux-mtd@lists.infradead.org>, <dmitry.torokhov@gmail.com> Subject: [PATCH v2 3/7] mtd: mtd_raid: Init a new layer of MTD RAID Date: Sat, 12 Dec 2015 16:41:03 +0800 Message-ID: <1449909667-7759-4-git-send-email-yangds.fnst@cn.fujitsu.com> In-Reply-To: <1449909667-7759-1-git-send-email-yangds.fnst@cn.fujitsu.com> References: <1449909667-7759-1-git-send-email-yangds.fnst@cn.fujitsu.com> MIME-Version: 1.0 summary: Content analysis details: (-1.1 points) pts rule name description ---- ---------------------- -------------------------------------------------- -1.9 BAYES_00 BODY: Bayes spam probability is 0 to 1% [score: 0.0000] 0.8 RDNS_NONE Delivered to internal network by a host with no rDNS Precedence: list Cc: Dongsheng Yang <yangds.fnst@cn.fujitsu.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: "linux-mtd" <linux-mtd-bounces@lists.infradead.org> Errors-To: linux-mtd-bounces+incoming=patchwork.ozlabs.org@lists.infradead.org

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index df1b25e..e13d60f 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -334,3 +334,4 @@ Code Seq#(hex) Include File Comments 0xF6 all LTTng Linux Trace Toolkit Next Generation <mailto:mathieu.desnoyers@efficios.com> 0xFD all linux/dm-ioctl.h +0xFE all mtd/mtd-raid-user.h diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index a03ad29..63fcdbe 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -338,4 +338,6 @@ source "drivers/mtd/spi-nor/Kconfig" source "drivers/mtd/ubi/Kconfig" +source "drivers/mtd/mtd_raid/Kconfig" + endif # MTD diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile index 99bb9a1..1de2202 100644 --- a/drivers/mtd/Makefile +++ b/drivers/mtd/Makefile @@ -34,3 +34,4 @@ obj-y += chips/ lpddr/ maps/ devices/ nand/ onenand/ tests/ obj-$(CONFIG_MTD_SPI_NOR) += spi-nor/ obj-$(CONFIG_MTD_UBI) += ubi/ +obj-$(CONFIG_MTD_RAID) += mtd_raid/ diff --git a/drivers/mtd/mtd_raid/Kconfig b/drivers/mtd/mtd_raid/Kconfig new file mode 100644 index 0000000..b70a68f --- /dev/null +++ b/drivers/mtd/mtd_raid/Kconfig @@ -0,0 +1,12 @@ +menuconfig MTD_RAID + tristate "MTD RAID Support" + depends on MTD + help + This is a module for MTD raid. There are two ways to use + it. One for multi-chips flash drivers, driver can call + mtd_raid_create() to create raid device in requested level. + The other for user, user can use problem of mtd_raid to + create raid device from flashes they have in system. + + More about raid: + <https://en.wikipedia.org/wiki/Standard_RAID_levels>. diff --git a/drivers/mtd/mtd_raid/Makefile b/drivers/mtd/mtd_raid/Makefile new file mode 100644 index 0000000..517149b4 --- /dev/null +++ b/drivers/mtd/mtd_raid/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_MTD_RAID) += mtd_raid.o + +mtd_raid-y += ioctl.o raid_single.o core.o raid0.o raid1.o raid_io.o diff --git a/drivers/mtd/mtd_raid/core.c b/drivers/mtd/mtd_raid/core.c new file mode 100644 index 0000000..fe43611 --- /dev/null +++ b/drivers/mtd/mtd_raid/core.c @@ -0,0 +1,1103 @@ +/* + * Part of MTD RAID + * + * Copyright (C) 2015 Dongsheng Yang. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Dongsheng Yang <yangds.fnst@cn.fujitsu.com> + */ + +/* + * TODO + * - merge requests + * - To support writev + * - raid10 + * - raid5/6 + * + * This is the core part of mtd raid layer. We implemented the generic mtd_func + * in this part. each raid devices share the same generic mtd interfaces. There + * is only interfaces for each mtd operations, the real io with each flashes are + * handled by raid_io.c. + * + * There is a global list for all struct mtd_raid. It's a 2-D list, we give each + * raid level a list to manage all mtd_raids in this level. When a mtd raid device + * created, we need to register it into related list. When a mtd raid device to + * be destroyed, we need to unregister it from related list. + **/ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/major.h> +#include <linux/miscdevice.h> +#include <linux/sched.h> +#include <linux/kthread.h> +#include <linux/slab.h> +#include <mtd/mtd-raid-user.h> + +#include "mtd_raid.h" + +/* We have a raid_list for each raid level. */ +struct raid_list { + int count; + struct list_head head; +}; + +/* To protect mtd_raid_list. */ +static spinlock_t mtd_raid_list_lock; +static struct raid_list mtd_raid_list[MTD_RAID_LEVEL_MAX]; + +int mtd_raid_list_init(void) +{ + int i = 0; + + spin_lock_init(&mtd_raid_list_lock); + for (i = 0; i < MTD_RAID_LEVEL_MAX; i++) + INIT_LIST_HEAD(&mtd_raid_list[i].head); + + return 0; +} + +void mtd_raid_list_destroy(void) +{ + struct mtd_raid *mtd_raid, *next; + int i = 0; + + for (i = 0; i < MTD_RAID_LEVEL_MAX; i++) { + list_for_each_entry_safe(mtd_raid, next, &mtd_raid_list[i].head, node) { + mtd_raid_destroy(mtd_raid); + } + } +} + +int mtd_raid_list_register(enum mtd_raid_level raid_level, struct mtd_raid *mtd_raid) +{ + int raid_id = 0; + + spin_lock(&mtd_raid_list_lock); + list_add_tail(&mtd_raid->node, &mtd_raid_list[raid_level].head); + raid_id = ++mtd_raid_list[raid_level].count; + spin_unlock(&mtd_raid_list_lock); + + return raid_id; +} + +struct mtd_raid *mtd_raid_list_get(int mtd_num) +{ + struct mtd_raid *raid; + int i = 0; + + spin_lock(&mtd_raid_list_lock); + for (i = 0; i < MTD_RAID_LEVEL_MAX; i++) { + list_for_each_entry(raid, &mtd_raid_list[i].head, node) { + if (raid->mtd.index == mtd_num) { + spin_unlock(&mtd_raid_list_lock); + return raid; + } + } + } + spin_unlock(&mtd_raid_list_lock); + + return NULL; +} + +void mtd_raid_list_unregister(struct mtd_raid *mtd_raid) +{ + spin_lock(&mtd_raid_list_lock); + list_del(&mtd_raid->node); + spin_unlock(&mtd_raid_list_lock); +} + +/* MTD interfaces */ +/* Check the range of addr:len to see is that out of address space */ +int check_offs(struct mtd_info *mtd, loff_t addr, size_t len) +{ + /* Do not allow out of address space of device */ + if (addr < 0) { + pr_err("%s: From a negative address.\n", + __func__); + return -EINVAL; + } + + if (addr > mtd->size) { + pr_err("%s: From an address out of size.\n", + __func__); + return -EINVAL; + } + + if (len > mtd->size - addr) { + pr_err("%s: Read past size of mtd device.\n", + __func__); + return -EINVAL; + } + + return 0; +} + +/* It's more strict than check_offs(). It also check the ofs and len + * to see are they aligned with aligned_size. + **/ +int check_offs_aligned(struct mtd_info *mtd, loff_t ofs, uint64_t len, + size_t aligned_size) +{ + int ret = 0; + loff_t tmp_ofs = ofs; + loff_t tmp_len = len; + + /* Start address must align on given length */ + if (do_div(tmp_ofs, aligned_size)) { + pr_err("%s: Unaligned address\n", __func__); + return -EINVAL; + } + + /* Length must align on given length */ + if (do_div(tmp_len, aligned_size)) { + pr_err("%s: Length not aligned\n", + __func__); + return -EINVAL; + } + + /* Do not allow out of address space of device */ + ret = check_offs(mtd, ofs, len); + + return ret; +} + +static int __raid_read_async(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, u_char *buf) +{ + loff_t subdev_off; + int ret = 0, err = 0; + int devid, i_copy = 0; + size_t retsize, size; + struct mtd_raid_ctx *ctx; + struct mtd_raid_dev *subdev; + struct mtd_raid *raid = MTD_RAID(mtd); + struct mtd_raid_read_request *read_req; + struct mtd_raid_request *request; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ret = mtd_raid_ctx_init(ctx); + if (ret) + goto free; + + while (len) { + err = raid->ops->logical_to_physical(raid, from, len, i_copy, + &devid, &subdev_off, &size); + if (err) { + ret = err; + goto out; + } + + subdev = &raid->devs[devid]; + err = mtd_raid_dev_read(ctx, subdev, subdev_off, size, &retsize, buf); + if (unlikely(err)) { + ret = err; + goto out; + } + + buf += retsize; + from += retsize; + len -= retsize; + } +wait: + ret = mtd_raid_ctx_wait(ctx); + if (ret) { + if (!list_empty(&ctx->error_list)) { + request = list_first_entry(&ctx->error_list, + struct mtd_raid_request, node); + read_req = READ_REQUEST(request); + ret = read_req->retval; + goto out; + } else if (!list_empty(&ctx->failed_list)) { + ret = -EBADMSG; + } else { + if (!ret) + ret = -EUCLEAN; + } + } else { + goto out; + } + + if (++i_copy >= raid->ncopies) + goto out; + + ret = mtd_raid_ctx_retry(ctx, i_copy); + if (ret) + goto out; + goto wait; +out: + mtd->ecc_stats.failed += ctx->failed; + mtd->ecc_stats.corrected += ctx->corrected; + /* Fill retlen */ + *retlen = 0; + list_for_each_entry(request, &ctx->all_list, node_all) { + read_req = READ_REQUEST(request); + if (read_req->retval && !mtd_is_bitflip_or_eccerr(read_req->retval)) + break; + *retlen += read_req->retlen; + } + mtd_raid_ctx_destroy(ctx); +free: + kfree(ctx); + + return ret; +} + +static int __raid_read_sync(struct mtd_info *mtd, loff_t from, size_t len, + size_t * retlen, u_char * buf) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + int ret = 0, err = 0; + int devid; + loff_t subdev_off; + size_t retsize, size; + struct mtd_info *subdev = NULL; + + while (len) { + err = raid->ops->logical_to_physical(raid, from, len, 0, + &devid, &subdev_off, &size); + if (err) + return err; + + subdev = raid->devs[devid].mtd; + err = mtd_read(subdev, subdev_off, size, &retsize, buf); + /* Save information about bitflips! */ + if (unlikely(err)) { + if (mtd_is_eccerr(err)) { + mtd->ecc_stats.failed++; + ret = err; + } else if (mtd_is_bitflip(err)) { + mtd->ecc_stats.corrected++; + /* Do not overwrite -EBADMSG !! */ + if (!ret) + ret = err; + } else { + return err; + } + } + + *retlen += retsize; + len -= retsize; + buf += retsize; + from += retsize; + } + + return ret; +} + +int mtd_raid_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, u_char *buf) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + + if (check_offs(mtd, from, len)) + return -EINVAL; + + if (need_async_reading(raid, from, len)) + return __raid_read_async(mtd, from, len, retlen, buf); + + return __raid_read_sync(mtd, from, len, retlen, buf); +} + +/* Interface for mtd->_write() */ +int mtd_raid_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t * retlen, const u_char * buf) +{ + int err = 0; + int i = 0; + int devid; + loff_t subdev_off; + size_t retsize, size; + struct mtd_info *subdev = NULL; + struct mtd_raid *raid = MTD_RAID(mtd); + + if (!(mtd->flags & MTD_WRITEABLE)) + return -EROFS; + + if (check_offs(mtd, to, len)) + return -EINVAL; + + while (len) { + for (i = 0; i < raid->ncopies; i++) { + err = raid->ops->logical_to_physical(raid, to, len, i, + &devid, &subdev_off, &size); + if (err) + goto out; + + subdev = raid->devs[devid].mtd; + err = mtd_write(subdev, subdev_off, size, &retsize, buf); + if (unlikely(err)) + goto out; + } + + *retlen += retsize; + len -= retsize; + buf += retsize; + to += retsize; + } + +out: + return err; +} + +int __raid_read_oob_async(struct mtd_info *mtd, loff_t from, size_t readlen, + size_t oobsize, struct mtd_oob_ops *ops) +{ + size_t size; + loff_t subdev_off; + struct mtd_oob_ops devops = *ops; + struct mtd_raid_ctx *ctx; + struct mtd_raid_dev *subdev; + struct mtd_raid *raid = MTD_RAID(mtd); + struct mtd_raid_read_oob_request *read_oob_req; + struct mtd_raid_request *request; + int i_copy = 0, devid = 0, i = 0, ret = 0; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ret = mtd_raid_ctx_init(ctx); + if (ret) + goto free; + + while (readlen > 0) { + ret = raid->ops->logical_to_physical(raid, from, readlen, i, + &devid, &subdev_off, &size); + if (unlikely(ret)) + goto out; + + if (devops.datbuf) { + devops.len = size; + } else { + if (devops.ooblen > (size / mtd->writesize * oobsize)) + devops.ooblen = size / mtd->writesize * oobsize; + } + + /* Read data from subdev */ + subdev = &raid->devs[devid]; + ret = mtd_raid_dev_read_oob(ctx, subdev, subdev_off, size, &devops); + if (unlikely(ret)) + goto out; + + readlen -= size; + from += size; + + if (devops.datbuf) + devops.datbuf += devops.len; + + if (devops.oobbuf) + devops.oobbuf += devops.ooblen; + } + +wait: + ret = mtd_raid_ctx_wait(ctx); + if (ret) { + /* Not all request succeeded */ + if (!list_empty(&ctx->error_list)) { + request = list_first_entry(&ctx->error_list, + struct mtd_raid_request, node); + read_oob_req = READ_OOB_REQUEST(request); + ret = read_oob_req->retval; + goto out; + } else if (!list_empty(&ctx->failed_list)) { + ret = -EBADMSG; + } else { + if (!ret) + ret = -EUCLEAN; + } + } else { + goto out; + } + + if (++i_copy >= raid->ncopies) + goto out; + + ret = mtd_raid_ctx_retry(ctx, i_copy); + if (ret) + goto out; + goto wait; +out: + mtd->ecc_stats.failed += ctx->failed; + mtd->ecc_stats.corrected += ctx->corrected; + /* Fill retlen */ + ops->retlen = ops->oobretlen = 0; + list_for_each_entry(request, &ctx->all_list, node_all) { + read_oob_req = READ_OOB_REQUEST(request); + if (read_oob_req->retval && !mtd_is_bitflip_or_eccerr(read_oob_req->retval)) + break; + ops->retlen += read_oob_req->ops.retlen; + ops->oobretlen += read_oob_req->ops.oobretlen; + } + mtd_raid_ctx_destroy(ctx); +free: + kfree(ctx); + + return ret; +} + +int __raid_read_oob_sync(struct mtd_info *mtd, loff_t from, size_t readlen, + size_t oobsize, struct mtd_oob_ops *ops) +{ + size_t size; + loff_t subdev_off; + struct mtd_oob_ops devops = *ops; + struct mtd_info *subdev; + struct mtd_raid *raid = MTD_RAID(mtd); + int devid = 0, i = 0, ret = 0; + + while (readlen > 0) { + ret = raid->ops->logical_to_physical(raid, from, readlen, i, + &devid, &subdev_off, &size); + if (unlikely(ret)) + goto out; + + if (devops.datbuf) { + devops.len = size; + } else { + if (devops.ooblen > (size / mtd->writesize * oobsize)) + devops.ooblen = size / mtd->writesize * oobsize; + } + + /* Read data from subdev */ + subdev = raid->devs[devid].mtd; + ret = mtd_read_oob(subdev, subdev_off, &devops); + if (ret) + break; + + readlen -= size; + from += size; + + if (devops.datbuf) { + devops.datbuf += devops.len; + ops->retlen += devops.len; + } + + if (devops.oobbuf) { + devops.oobbuf += devops.ooblen; + ops->oobretlen += devops.ooblen; + } + } +out: + return ret; +} + +int mtd_raid_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + size_t oobsize, readlen = ops->len; + + /* Check parameters for reading oob */ + if (ops->datbuf && check_offs(mtd, from, ops->len)) + return -EINVAL; + + /* Get oobsize depending on mode */ + if (ops->mode == MTD_OPS_AUTO_OOB) + oobsize = mtd->oobavail; + else + oobsize = mtd->oobsize; + + /* Check ooboffs */ + if (ops->ooboffs >= oobsize) + return -EINVAL; + + /* Check len and from */ + oobsize -= ops->ooboffs; + if (ops->datbuf) { + readlen = ops->len; + } else { + readlen = DIV_ROUND_UP(ops->ooblen, oobsize); + readlen *= mtd->writesize; + } + + if (readlen > mtd->size - from) + return -EINVAL; + + if (need_async_reading(raid, from, readlen)) + return __raid_read_oob_async(mtd, from, readlen, oobsize, ops); + + return __raid_read_oob_sync(mtd, from, readlen, oobsize, ops); +} + +int mtd_raid_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + struct mtd_oob_ops devops = *ops; + uint64_t oobsize; + int devid = 0, i = 0, ret = 0; + loff_t subdev_off; + size_t size, writelen = ops->len; + struct mtd_info *subdev = NULL; + + if (!(mtd->flags & MTD_WRITEABLE)) + return -EROFS; + + if (ops->datbuf && check_offs(mtd, to, ops->len)) + return -EINVAL; + + ops->retlen = ops->oobretlen = 0; + + if (ops->mode == MTD_OPS_AUTO_OOB) + oobsize = mtd->oobavail; + else + oobsize = mtd->oobsize; + + if (to < 0 || to > mtd->size) + return -EINVAL; + + if (devops.ooboffs >= oobsize) + return -EINVAL; + + oobsize -= devops.ooboffs; + + if (devops.datbuf) { + writelen = devops.len; + } else { + writelen = DIV_ROUND_UP(devops.ooblen, oobsize); + writelen *= mtd->writesize; + } + + if (writelen > mtd->size - to) + return -EINVAL; + + while (writelen > 0) { + for (i = 0; i < raid->ncopies; i++) { + ret = raid->ops->logical_to_physical(raid, to, writelen, i, + &devid, &subdev_off, &size); + if (unlikely(ret)) + goto out; + + if (devops.datbuf) { + devops.len = size; + } else { + if (devops.ooblen > (size / mtd->writesize * oobsize)) + devops.ooblen = size / mtd->writesize * oobsize; + } + + subdev = raid->devs[devid].mtd; + ret = mtd_write_oob(subdev, subdev_off, &devops); + if (ret) + goto out; + + writelen -= size; + to += size; + + if (devops.datbuf) { + devops.datbuf += devops.len; + ops->retlen += devops.len; + } + + if (devops.oobbuf) { + devops.oobbuf += devops.ooblen; + ops->oobretlen += devops.ooblen; + } + } + } +out: + return ret; +} + +//TODO make it async and paralleled +/* lblock means logical block */ +static int raid_erase_lblock(struct mtd_raid *raid, struct erase_info *instr) +{ + int devid; + size_t size; + loff_t subdev_off; + int i, icopy = 0; + int err = 0, ret = 0; + uint64_t addr, len; + struct mtd_raid_dev *subdev = NULL; + + addr = instr->addr; + len = 0; + for (i = 0; i < raid->npebs_per_leb; i++) { + for (icopy = 0; icopy < raid->ncopies; icopy++) { + err = raid->ops->logical_to_physical(raid, addr, len, icopy, &devid, &subdev_off, &size); + if (err) + goto out; + + subdev = &raid->devs[devid]; + if (!(subdev->mtd->flags & MTD_WRITEABLE)) { + ret = -EROFS; + goto out; + } + + instr->addr = subdev_off; + + ret = mtd_raid_dev_erase(subdev, instr); + if (ret) + goto out; + + if (instr->state != MTD_ERASE_DONE) { + ret = -EIO; + goto out; + } + } + + addr += raid->substripe_size; + } + +out: + return ret; +} + +int mtd_raid_erase(struct mtd_info *mtd, struct erase_info *instr) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + struct erase_info *erase; + loff_t logical; + loff_t length; + int err = 0; + + if (!(mtd->flags & MTD_WRITEABLE)) + return -EROFS; + + if (check_offs_aligned(mtd, instr->addr, instr->len, mtd->erasesize)) + return -EINVAL; + + instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; + + erase = kmalloc(sizeof(struct erase_info), GFP_KERNEL); + if (!erase) + return -ENOMEM; + + *erase = *instr; + logical = instr->addr; + length = instr->len; + + err = 0; + while (length > 0) { + erase->addr = logical; + err = raid_erase_lblock(raid, erase); + if (err) + break; + + logical += raid->mtd.erasesize; + length -= raid->mtd.erasesize; + } + + instr->state = erase->state; + kfree(erase); + + if (err) + return err; + + if (instr->callback) + instr->callback(instr); + + return 0; + +} + +void mtd_raid_sync(struct mtd_info *mtd) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + int i; + + for (i = 0; i < raid->dev_count; i++) { + struct mtd_info *subdev = raid->devs[i].mtd; + mtd_sync(subdev); + } +} + +int mtd_raid_suspend(struct mtd_info *mtd) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + int i, err = 0; + + for (i = 0; i < raid->dev_count; i++) { + struct mtd_info *subdev = raid->devs[i].mtd; + if ((err = mtd_suspend(subdev)) < 0) + return err; + } + return err; +} + +void mtd_raid_resume(struct mtd_info *mtd) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + int i; + + for (i = 0; i < raid->dev_count; i++) { + struct mtd_info *subdev = raid->devs[i].mtd; + mtd_resume(subdev); + } +} + +int mtd_raid_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + int ret = 0, err = 0, i = 0; + int devid; + loff_t subdev_off; + size_t size; + struct mtd_info *subdev = NULL; + + while (len) { + for (i = 0; i < raid->ncopies; i++) { + err = raid->ops->logical_to_physical(raid, ofs, len, i, &devid, &subdev_off, &size); + if (err) { + ret = err; + goto out; + } + + subdev = raid->devs[devid].mtd; + err = mtd_lock(subdev, subdev_off, size); + if (unlikely(err)) { + ret = err; + goto out; + } + } + + len -= size; + ofs += size; + } +out: + return ret; +} + +int mtd_raid_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + int ret = 0, err = 0, i = 0; + int devid; + loff_t subdev_off; + size_t size; + struct mtd_info *subdev = NULL; + + while (len) { + for (i = 0; i < raid->ncopies; i++) { + err = raid->ops->logical_to_physical(raid, ofs, len, i, &devid, &subdev_off, &size); + if (err) { + ret = err; + goto out; + } + + subdev = raid->devs[devid].mtd; + err = mtd_unlock(subdev, subdev_off, size); + if (unlikely(err)) { + ret = err; + goto out; + } + } + + len -= size; + ofs += size; + } + +out: + return ret; +} + +int mtd_raid_block_isbad(struct mtd_info *mtd, loff_t ofs) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + int i = 0; + loff_t from = ofs, subdev_off; + size_t len; + int devid; + size_t size; + struct mtd_info *subdev = NULL; + + if (!mtd_can_have_bb(raid->devs[0].mtd)) + return 0; + + ofs -= do_div(from, mtd->erasesize); + from = ofs; + len = mtd->erasesize; + while (len) { + for (i = 0; i < raid->ncopies; i++) { + raid->ops->logical_to_physical(raid, from, len, i, &devid, &subdev_off, &size); + + subdev = raid->devs[devid].mtd; + if (mtd_block_isbad(subdev, subdev_off)) + return 1; + } + + len -= size; + from += size; + } + + return 0; +} + +int mtd_raid_block_markbad(struct mtd_info *mtd, loff_t ofs) +{ + struct mtd_raid *raid = MTD_RAID(mtd); + int i, err = 0; + loff_t from = ofs; + size_t len; + int devid; + loff_t subdev_off; + size_t size; + struct mtd_info *subdev = NULL; + + ofs -= do_div(from, mtd->erasesize); + from = ofs; + len = mtd->erasesize; + while (len) { + for (i = 0; i < raid->ncopies; i++) { + err = raid->ops->logical_to_physical(raid, from, len, i, &devid, &subdev_off, &size); + if (err) + goto out; + + subdev = raid->devs[devid].mtd; + err = mtd_block_markbad(subdev, subdev_off); + if (err) + goto out; + else + mtd->ecc_stats.badblocks++; + } + + len -= size; + from += size; + } + +out: + return err; +} + +int mtd_raid_init(struct mtd_raid *raid, struct mtd_info **subdevs, + int dev_count, size_t substripe_size) +{ + struct mtd_info *subdev = NULL; + struct mtd_info *mtd = NULL; + int ret = 0; + int i = 0; + + INIT_LIST_HEAD(&raid->node); + raid->substripe_size = substripe_size; + raid->dev_count = dev_count; + raid->subdevs = subdevs; + + for (i = 0; i < dev_count; i++) { + raid->devs[i].mtd = raid->subdevs[i]; + raid->devs[i].id = i; + } + + mtd = &raid->mtd; + subdev = raid->devs[0].mtd; + + if (raid->substripe_size == 0) + raid->substripe_size = subdev->writesize; + + mtd->owner = THIS_MODULE; + mtd->type = subdev->type; + mtd->flags = subdev->flags; + mtd->writesize = subdev->writesize; + mtd->writebufsize = subdev->writebufsize; + mtd->subpage_sft = subdev->subpage_sft; + mtd->oobsize = subdev->oobsize; + mtd->oobavail = subdev->oobavail; + mtd->ecclayout = subdev->ecclayout; + + mtd->_erase = mtd_raid_erase; + mtd->_read = mtd_raid_read; + mtd->_write = mtd_raid_write; + mtd->_sync = mtd_raid_sync; + mtd->_lock = mtd_raid_lock; + mtd->_unlock = mtd_raid_unlock; + mtd->_suspend = mtd_raid_suspend; + mtd->_resume = mtd_raid_resume; + + if (subdev->_read_oob) + mtd->_read_oob = mtd_raid_read_oob; + if (subdev->_write_oob) + mtd->_write_oob = mtd_raid_write_oob; + if (subdev->_block_isbad) + mtd->_block_isbad = mtd_raid_block_isbad; + if (subdev->_block_markbad) + mtd->_block_markbad = mtd_raid_block_markbad; + + for (i = 1; i < dev_count; i++) { + if (mtd->flags != raid->devs[i].mtd->flags) { + /* + * Expect all flags except MTD_WRITEABLE to be + * equal on all subdevices. + */ + if ((mtd->flags ^ raid->devs[i].mtd-> + flags) & ~MTD_WRITEABLE) { + printk("Incompatible device flags on \"%s\"\n", + raid->devs[i].mtd->name); + ret = -EINVAL; + goto out; + } else { + /* if writeable attribute differs, + make super device writeable */ + mtd->flags |= + raid->devs[i].mtd->flags & MTD_WRITEABLE; + } + } + + if (mtd->writesize != raid->devs[i].mtd->writesize || + mtd->subpage_sft != raid->devs[i].mtd->subpage_sft || + mtd->oobsize != raid->devs[i].mtd->oobsize || + !mtd->_read_oob != !raid->devs[i].mtd->_read_oob || + !mtd->_write_oob != !raid->devs[i].mtd->_write_oob) { + printk("Incompatible OOB or ECC data on \"%s\"\n", + raid->devs[i].mtd->name); + ret = -EINVAL; + goto out; + } + + if (mtd->writebufsize != raid->devs[i].mtd->writebufsize) { + pr_err("Incompatible writebufsize on \"%s\"", + raid->devs[i].mtd->name); + ret = -EINVAL; + goto out; + } + } + + if (raid->ops->init) { + ret = raid->ops->init(raid, dev_count, substripe_size); + if (ret) + goto out; + } + + for (i = 0; i < dev_count; i++) { + /* + * Init bg thread for each raid_dev to handle io requests. + */ + INIT_LIST_HEAD(&raid->devs[i].list); + raid->devs[i].thread = kthread_create(mtd_raid_dev_thread, &raid->devs[i], + "%s_thread_%d", raid->name, i); + } + + return 0; +out: + return ret; +} + +int mtd_raid_destroy(struct mtd_raid *raid); + +int mtd_raid_create(enum mtd_raid_level raid_level, struct mtd_info **subdevs, + int dev_count, int substripe_size) +{ + int ret = 0; + struct mtd_raid *raid = NULL; + + switch (raid_level){ + case MTD_RAID_LEVEL_SINGLE: + { + raid = mtd_raid_single_create(dev_count, substripe_size); + if (!raid) { + pr_err("MTD RAID: Failed to create raid single device."); + ret = -EINVAL; + goto out; + } + break; + } + case MTD_RAID_LEVEL_RAID0: + { + raid = mtd_raid0_create(dev_count, substripe_size); + if (!raid) { + pr_err("MTD RAID: Failed to create raid0 device."); + ret = -EINVAL; + goto out; + } + break; + } + case MTD_RAID_LEVEL_RAID1: + { + raid = mtd_raid1_create(dev_count, substripe_size); + if (!raid) { + pr_err("MTD RAID: Failed to create raid1 device."); + ret = -EINVAL; + goto out; + } + break; + } + default: + pr_err("MTD RAID: Unsupported raid level: %d.", raid_level); + ret = -ENOTSUPP; + goto out; + } + + ret = mtd_raid_init(raid, subdevs, dev_count, substripe_size); + if (ret) + goto destroy; + + return mtd_device_register(&raid->mtd, NULL, 0); + +destroy: + mtd_raid_destroy(raid); +out: + return ret; +} + +int mtd_raid_destroy(struct mtd_raid *raid) +{ + int i = 0; + int ret = 0; + + ret = mtd_device_unregister(&raid->mtd); + if (ret) + goto out; + + for (i = 0; i < raid->dev_count; i++) { + if (raid->devs[i].thread) + kthread_stop(raid->devs[i].thread); + } + + if (raid->ops->destroy) + raid->ops->destroy(raid); +out: + return ret; +} + +static struct miscdevice mtd_raid_ctrl_cdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "mtd_raid_ctrl", + .fops = &mtd_raid_ctrl_cdev_operations, +}; + +static int __init init_mtd_raid(void) +{ + int err = 0; + + err = mtd_raid_list_init(); + if (err) + goto out; + + err = misc_register(&mtd_raid_ctrl_cdev); + if (err) { + pr_err("MTD RAID error: cannot register device"); + goto out; + } + + return 0; +out: + pr_err("MTD RAID error: cannot initialize MTD RAID, error %d", err); + return err; +} + +static void __exit cleanup_mtd_raid(void) +{ + misc_deregister(&mtd_raid_ctrl_cdev); + mtd_raid_list_destroy(); +} + +module_init(init_mtd_raid); +module_exit(cleanup_mtd_raid); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dongsheng Yang <yangds.fnst@cn.fujitsu.com>"); +MODULE_DESCRIPTION("Support for MTD RAID"); diff --git a/drivers/mtd/mtd_raid/mtd_raid.h b/drivers/mtd/mtd_raid/mtd_raid.h new file mode 100644 index 0000000..5a390b3 --- /dev/null +++ b/drivers/mtd/mtd_raid/mtd_raid.h @@ -0,0 +1,273 @@ +/* + * This file is part of MTD RAID. + * + * Copyright (C) 2015 Dongsheng Yang. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Dongsheng Yang <yangds.fnst@cn.fujitsu.com> + */ + +#ifndef __MTD_RAID_H +#define __MTD_RAID_H + +#include <linux/mtd/mtd.h> +#include <mtd/mtd-abi.h> +#include <mtd/mtd-raid-user.h> + +struct mtd_raid; + +/* + * Operations for different mtd raid structure, it + * differs in different raid level. + **/ +struct mtd_raid_operations { + int (*init)(struct mtd_raid *, int, size_t); + void (*destroy)(struct mtd_raid *); + + /* logical_to_physical is the core function in mtd raid, + * when we get a logical address from user, + * we will call ->logical_to_physical() to get the phisical + * devid, address and length.*/ + int (*logical_to_physical)(struct mtd_raid *, loff_t, size_t, int, int *, loff_t *, size_t *); + int (*physical_to_logical)(struct mtd_raid *, int, loff_t, size_t, loff_t *, size_t *); +}; + +/* + * struct mtd_raid_dev, one mtd_raid_dev represent a mtd device + * int the raid array. + **/ +struct mtd_raid_dev { + int id; + struct mtd_raid *raid; + struct mtd_info *mtd; + + spinlock_t lock; + struct list_head list; + struct task_struct *thread; +}; + +/* + * Context of a IO, we will send lots of io request to + * mtd_raid_dev, and mtd_raid_dev will do it in thread. + * But in order to track the status of each request, we + * will register them in a mtd_raid_ctx. + */ +struct mtd_raid_ctx { + spinlock_t lock; + struct list_head all_list; + + struct list_head submit_list; + struct list_head complete_list; + struct list_head failed_list; + struct list_head corrected_list; + struct list_head error_list; + + unsigned int failed; + unsigned int corrected; + unsigned int errored; + + struct task_struct *wait; +}; + + +/* + * Type for each io request; + */ +enum mtd_raid_request_type { + MTD_RAID_REQ_READ = 0, + MTD_RAID_REQ_READ_OOB +}; + +struct mtd_raid_request; +typedef void request_func_t(struct mtd_raid_request *); +typedef void end_func_t(struct mtd_raid_request *); +typedef int retry_func_t(struct mtd_raid_request *request, int i_copy); + +struct mtd_raid_request { + enum mtd_raid_request_type type; + struct mtd_raid_ctx *ctx; + struct mtd_raid_dev *raid_dev; + + struct list_head node; + struct list_head node_all; + struct list_head node_request; + + /* Main action of this request. */ + request_func_t *func; + /* + * If this request failed, how to retry. NULL means + * don't retry for this request. + */ + retry_func_t *retry_func; + /* + * When we want to destroy this request, what we need + * to do. + */ + end_func_t *end_func; +}; + +/* + * Request for mtd_read. + */ +struct mtd_raid_read_request { + struct mtd_raid_request request; + + loff_t from; + size_t len; + size_t retlen; + u_char *buf; + int retval; +}; + +/* + * Request for mtd_read_oob + */ +struct mtd_raid_read_oob_request { + struct mtd_raid_request request; + + loff_t from; + size_t len; + struct mtd_oob_ops ops; + int retval; +}; + +/* + * structure to represent a RAID device + **/ +struct mtd_raid { + char name[32]; + int ncopies; + int dev_count; + struct mtd_info **subdevs; + int npebs_per_leb; + int substripe_size; + /* + * This is the "superblock" for this RAID device. + * We will fill up it and register it. + **/ + struct mtd_info mtd; + struct list_head node; + const struct mtd_raid_operations *ops; + enum mtd_raid_level raid_level; + struct mtd_raid_dev devs[0]; +}; + +struct mtd_raid_single { + /* + * Please make the raid to be the last member, + * because we will alloc devs appending this structure. + */ + struct mtd_raid raid; +}; + +struct mtd_raid0 { + // XXX Add reada support here. + + /* + * Please make the raid to be the last member, + * because we will alloc devs appending this structure. + */ + struct mtd_raid raid; +}; + +struct mtd_raid1 { + /* + * Please make the raid to be the last member, + * because we will alloc devs appending this structure. + */ + struct mtd_raid raid; +}; + +/* Macros to get specified request pointers from generic request */ +#define READ_REQUEST(req) \ + container_of(req, struct mtd_raid_read_request, request) + +#define READ_OOB_REQUEST(req) \ + container_of(req, struct mtd_raid_read_oob_request, request) + +/* Macros to get specified mtd_raid pointers from mtd_info pointer */ +#define MTD_RAID(mtd) \ + container_of(mtd, struct mtd_raid, mtd) + +#define MTD_RAID_SINGLE(mtd_raid) \ + container_of(mtd_raid, struct mtd_raid_single, raid) + +#define MTD_RAID_RAID0(mtd_raid) \ + container_of(mtd_raid, struct mtd_raid0, raid) + +#define MTD_RAID_RAID1(mtd_raid) \ + container_of(mtd_raid, struct mtd_raid1, raid) + +/* ioctl.c */ +extern const struct file_operations mtd_raid_ctrl_cdev_operations; + +/* core.c */ +int mtd_raid_list_init(void); +void mtd_raid_list_destroy(void); +int mtd_raid_list_register(enum mtd_raid_level raid_level, struct mtd_raid *mtd_raid); +struct mtd_raid *mtd_raid_list_get(int mtd_num); +void mtd_raid_list_unregister(struct mtd_raid *mtd_raid); + +int mtd_raid_create(enum mtd_raid_level raid_level, struct mtd_info **subdevs, + int dev_count, int substripe_size); +int mtd_raid_destroy(struct mtd_raid *mtd_raid); + +int mtd_raid_read(struct mtd_info *mtd, loff_t from, size_t len, size_t * retlen, u_char * buf); +int mtd_raid_write(struct mtd_info *mtd, loff_t to, size_t len, size_t * retlen, const u_char * buf); +int mtd_raid_erase(struct mtd_info *mtd, struct erase_info *instr); +int mtd_raid_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops); +int mtd_raid_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); +int mtd_raid_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); +void mtd_raid_resume(struct mtd_info *mtd); +int mtd_raid_suspend(struct mtd_info *mtd); +void mtd_raid_sync(struct mtd_info *mtd); +int mtd_raid_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops); +int mtd_raid_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops); +int mtd_raid_block_isbad(struct mtd_info *mtd, loff_t ofs); +int mtd_raid_block_markbad(struct mtd_info *mtd, loff_t ofs); + +/* raid_io.c */ +int mtd_raid_dev_thread(void *u); +int mtd_raid_ctx_init(struct mtd_raid_ctx* ctx); +int mtd_raid_ctx_wait(struct mtd_raid_ctx *ctx); +void mtd_raid_ctx_destroy(struct mtd_raid_ctx *ctx); +int mtd_raid_ctx_retry(struct mtd_raid_ctx *ctx, int i_copy); + +int mtd_raid_dev_read(struct mtd_raid_ctx *ctx, struct mtd_raid_dev *raid_dev, + loff_t from, size_t len, size_t *retlen, u_char *buf); +int mtd_raid_dev_read_oob(struct mtd_raid_ctx *ctx, struct mtd_raid_dev *raid_dev, + loff_t from, size_t len, struct mtd_oob_ops *ops); +int mtd_raid_dev_erase(struct mtd_raid_dev *raid_dev, struct erase_info *erase); + +/* raid_single.c */ +extern const struct mtd_raid_operations mtd_raid_single_ops; +struct mtd_raid *mtd_raid_single_create(int dev_count, size_t substripe_size); + +/* raid0.c */ +extern const struct mtd_raid_operations mtd_raid0_ops; +struct mtd_raid *mtd_raid0_create(int dev_count, size_t substripe_size); + +/* raid1.c */ +extern const struct mtd_raid_operations mtd_raid1_ops; +struct mtd_raid *mtd_raid1_create(int dev_count, size_t substripe_size); + +/* inline functions */ +static inline int need_async_reading(struct mtd_raid *raid, + loff_t from, size_t len) +{ + return (len >= (raid->substripe_size * raid->dev_count)); +} + +#endif /* __MTD_RAID_H */ diff --git a/drivers/mtd/mtd_raid/raid_io.c b/drivers/mtd/mtd_raid/raid_io.c new file mode 100644 index 0000000..71a5c6b --- /dev/null +++ b/drivers/mtd/mtd_raid/raid_io.c @@ -0,0 +1,449 @@ +/* + * This file is part of MTD RAID. + * + * Copyright (C) 2015 Dongsheng Yang. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Dongsheng Yang <yangds.fnst@cn.fujitsu.com> + */ + +/* + * This file handles the all io-related work. + */ + +#include <linux/freezer.h> +#include <linux/kthread.h> +#include <linux/slab.h> + +#include "mtd_raid.h" + +/** + * Context related operations: + * mtd_raid_ctx_init() --> Init a ctx + * --> Attach requests to ctx + * mtd_raid_ctx_wait() --> Wait all requests end + * mtd_raid_ctx_retry() --> Retry failed requests + * mtd_raid_ctx_destroy() --> Destroy the ctx + */ +int mtd_raid_ctx_init(struct mtd_raid_ctx* ctx) +{ + spin_lock_init(&ctx->lock); + INIT_LIST_HEAD(&ctx->all_list); + INIT_LIST_HEAD(&ctx->submit_list); + INIT_LIST_HEAD(&ctx->complete_list); + INIT_LIST_HEAD(&ctx->failed_list); + INIT_LIST_HEAD(&ctx->corrected_list); + INIT_LIST_HEAD(&ctx->error_list); + + ctx->failed = ctx->corrected = ctx->errored = 0; + ctx->wait = current; + + return 0; +} + +int mtd_raid_ctx_wait(struct mtd_raid_ctx *ctx) +{ + int ret = 0; + + while (1) { + spin_lock(&ctx->lock); + if (list_empty(&ctx->submit_list)) { + ret = ctx->failed + ctx->corrected + ctx->errored; + spin_unlock(&ctx->lock); + return ret; + } + + spin_unlock(&ctx->lock); + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + set_current_state(TASK_RUNNING); + + cond_resched(); + } +} + +void mtd_raid_ctx_destroy(struct mtd_raid_ctx *ctx) +{ + struct mtd_raid_request *request, *next; + + list_for_each_entry_safe(request, next, &ctx->all_list, node_all) { + if (request->end_func) + request->end_func(request); + } +} + +int mtd_raid_ctx_retry(struct mtd_raid_ctx *ctx, int i_copy) +{ + struct mtd_raid_request *request; + int ret = 0; + + list_for_each_entry(request, &ctx->failed_list, node) { + ret = request->retry_func(request, i_copy); + if (ret) + goto out; + spin_lock(&ctx->lock); + list_move_tail(&request->node, &ctx->submit_list); + spin_unlock(&ctx->lock); + } +out: + return ret; +} + +/* + * hooks for each type of request + * */ +static void read_req_func(struct mtd_raid_request *request) +{ + struct mtd_raid_read_request *read_req; + struct mtd_raid_ctx *ctx; + struct mtd_raid_dev *raid_dev; + int ret = 0; + + read_req = READ_REQUEST(request); + raid_dev = request->raid_dev; + ret = mtd_read(raid_dev->mtd, read_req->from, read_req->len, + &read_req->retlen, read_req->buf); + + read_req->retval = ret; + ctx = request->ctx; + spin_lock(&ctx->lock); + list_del_init(&request->node); + if (unlikely(ret)) { + if (mtd_is_eccerr(ret)) { + ctx->failed++; + list_add_tail(&request->node, &ctx->failed_list); + } else if (mtd_is_bitflip(ret)) { + ctx->corrected++; + list_add_tail(&request->node, &ctx->corrected_list); + } else { + ctx->errored++; + list_add_tail(&request->node, &ctx->error_list); + } + } else { + list_add_tail(&request->node, &ctx->complete_list); + } + spin_unlock(&ctx->lock); + wake_up_process(ctx->wait); + + return; +} + +static void read_oob_req_func(struct mtd_raid_request *request) +{ + struct mtd_raid_read_oob_request *read_oob_req; + struct mtd_raid_ctx *ctx; + struct mtd_raid_dev *raid_dev; + int ret = 0; + + read_oob_req = READ_OOB_REQUEST(request); + raid_dev = request->raid_dev; + ret = mtd_read_oob(raid_dev->mtd, read_oob_req->from, &read_oob_req->ops); + + read_oob_req->retval = ret; + ctx = request->ctx; + spin_lock(&ctx->lock); + list_del_init(&request->node); + if (unlikely(ret)) { + if (mtd_is_eccerr(ret)) { + ctx->failed++; + list_add_tail(&request->node, &ctx->failed_list); + } else if (mtd_is_bitflip(ret)) { + ctx->corrected++; + list_add_tail(&request->node, &ctx->corrected_list); + } else { + ctx->errored++; + list_add_tail(&request->node, &ctx->error_list); + } + } else { + list_add_tail(&request->node, &ctx->complete_list); + } + spin_unlock(&ctx->lock); + wake_up_process(ctx->wait); + + return; +} + +static int read_req_retry_func(struct mtd_raid_request *request, int i_copy) +{ + struct mtd_raid *mtd_raid; + struct mtd_raid_read_request *read_req; + loff_t address, subdev_off; + size_t length, size; + int devid, ret = 0; + + mtd_raid = request->raid_dev->raid; + read_req = READ_REQUEST(request); + if (!mtd_raid->ops->physical_to_logical || !mtd_raid->ops->logical_to_physical) + return -EINVAL; + + subdev_off = read_req->from; + size = read_req->len; + devid = request->raid_dev->id; + ret = mtd_raid->ops->physical_to_logical(mtd_raid, devid, subdev_off, size, + &address, &length); + if (ret) + goto out; + + ret = mtd_raid->ops->logical_to_physical(mtd_raid, address, length, i_copy, + &devid, &subdev_off, &size); + if (ret) + goto out; + + /* Fill request with the address of new copy */ + request->raid_dev = &mtd_raid->devs[devid]; + read_req->from = subdev_off; + read_req->len = size; +out: + return ret; +} + +static int read_oob_req_retry_func(struct mtd_raid_request *request, int i_copy) +{ + struct mtd_raid *mtd_raid; + struct mtd_raid_read_oob_request *read_oob_req; + loff_t address, subdev_off; + size_t length, size; + int devid, ret = 0; + + mtd_raid = request->raid_dev->raid; + read_oob_req = READ_OOB_REQUEST(request); + if (!mtd_raid->ops->physical_to_logical || !mtd_raid->ops->logical_to_physical) + return -EINVAL; + + subdev_off = read_oob_req->from; + size = read_oob_req->len; + devid = request->raid_dev->id; + ret = mtd_raid->ops->physical_to_logical(mtd_raid, devid, subdev_off, size, + &address, &length); + if (ret) + goto out; + + ret = mtd_raid->ops->logical_to_physical(mtd_raid, address, length, i_copy, + &devid, &subdev_off, &size); + if (ret) + goto out; + + /* Fill request with the address of new copy */ + request->raid_dev = &mtd_raid->devs[devid]; + read_oob_req->from = subdev_off; + read_oob_req->len = size; +out: + return ret; +} + +/* Generic end_func for request */ +static void request_end_func(struct mtd_raid_request *request) +{ + struct mtd_raid_ctx *ctx = NULL; + struct mtd_raid_dev *raid_dev = NULL; + + ctx = request->ctx; + spin_lock(&ctx->lock); + list_del(&request->node); + list_del(&request->node_all); + spin_unlock(&ctx->lock); + + raid_dev = request->raid_dev; + spin_lock(&raid_dev->lock); + list_del(&request->node_request); + spin_unlock(&raid_dev->lock); +} + +static void read_req_end_func(struct mtd_raid_request *request) +{ + struct mtd_raid_read_request *read_req; + + read_req = READ_REQUEST(request); + request_end_func(request); + kfree(read_req); +} + +static void read_oob_req_end_func(struct mtd_raid_request *request) +{ + struct mtd_raid_read_oob_request *read_oob_req; + + read_oob_req = READ_OOB_REQUEST(request); + request_end_func(request); + kfree(read_oob_req); +} + +/** + * Thread for each raid_dev. + * + * It get requests from raid_dev->list and do the + * requested work, until raid_dev to be empty. Then + * go to sleep. + */ +int mtd_raid_dev_thread(void *u) +{ + struct mtd_raid_dev *raid_dev = u; + struct mtd_raid_request *request; + + set_freezable(); + for (;;) { + if (kthread_should_stop()) + break; + + if (try_to_freeze()) + continue; + + spin_lock(&raid_dev->lock); + if (list_empty(&raid_dev->list)) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock(&raid_dev->lock); + schedule(); + continue; + } + /* + * Get the first request from request list. + **/ + request = list_first_entry(&raid_dev->list, + struct mtd_raid_request, node_request); + list_del_init(&request->node_request); + spin_unlock(&raid_dev->lock); + + if (request->func) + request->func(request); + cond_resched(); + } + + return 0; +} + +/* Interfaces of raid_dev */ + +/* + * read interface for raid_dev. + * */ +int mtd_raid_dev_read(struct mtd_raid_ctx *ctx, struct mtd_raid_dev *raid_dev, + loff_t from, size_t len, size_t *retlen, u_char *buf) +{ + struct mtd_raid_read_request *read_req; + struct mtd_raid_request *request; + int ret = 0; + + /* Alloc a request */ + read_req = kzalloc(sizeof(*read_req), GFP_KERNEL); + if (!read_req) + goto out; + + request = &read_req->request; + /* Init the request */ + INIT_LIST_HEAD(&request->node); + INIT_LIST_HEAD(&request->node_all); + INIT_LIST_HEAD(&request->node_request); + + request->ctx = ctx; + request->raid_dev = raid_dev; + request->type = MTD_RAID_REQ_READ; + request->func = read_req_func; + request->retry_func = read_req_retry_func; + request->end_func = read_req_end_func; + + /* Init read_request */ + read_req->from = from; + read_req->len = len; + read_req->buf = buf; + + /* Add request to context */ + spin_lock(&ctx->lock); + list_add_tail(&request->node, &ctx->submit_list); + list_add_tail(&request->node_all, &ctx->all_list); + spin_unlock(&ctx->lock); + + /* Dispatch request to related raid_dev */ + spin_lock(&raid_dev->lock); + list_add_tail(&request->node_request, &raid_dev->list); + spin_unlock(&raid_dev->lock); + + /* Wakeup background thread to handle requests */ + wake_up_process(raid_dev->thread); + + *retlen = len; +out: + return ret; +} + +/* + * read_oob interface for raid_dev. + */ +int mtd_raid_dev_read_oob(struct mtd_raid_ctx *ctx, struct mtd_raid_dev *raid_dev, + loff_t from, size_t len, struct mtd_oob_ops *ops) +{ + struct mtd_raid_read_oob_request *read_oob_req; + struct mtd_raid_request *request; + int ret = 0; + + /* Alloc a request */ + read_oob_req = kzalloc(sizeof(*read_oob_req), GFP_KERNEL); + if (!read_oob_req) + goto out; + + request = &read_oob_req->request; + /* Init the request */ + INIT_LIST_HEAD(&request->node); + INIT_LIST_HEAD(&request->node_all); + INIT_LIST_HEAD(&request->node_request); + + request->ctx = ctx; + request->raid_dev = raid_dev; + request->type = MTD_RAID_REQ_READ_OOB; + request->func = read_oob_req_func; + request->retry_func = read_oob_req_retry_func; + request->end_func = read_oob_req_end_func; + + /* Init read_request */ + read_oob_req->from = from; + read_oob_req->len = len; + memcpy(&read_oob_req->ops, ops, sizeof(*ops)); + + /* Add request to context */ + spin_lock(&ctx->lock); + list_add_tail(&request->node, &ctx->submit_list); + list_add_tail(&request->node_all, &ctx->all_list); + spin_unlock(&ctx->lock); + + /* Dispatch request to related raid_dev */ + spin_lock(&raid_dev->lock); + list_add_tail(&request->node_request, &raid_dev->list); + spin_unlock(&raid_dev->lock); + + /* Wakeup background thread to handle requests */ + wake_up_process(raid_dev->thread); +out: + return ret; +} + +/* + * erase interface for raid_dev. + * */ +int mtd_raid_dev_erase(struct mtd_raid_dev *raid_dev, struct erase_info *erase) +{ + int err = 0; + struct mtd_info *mtd = raid_dev->mtd; + + erase->mtd = mtd; + erase->len = mtd->erasesize; + err = mtd_erase(mtd, erase); + if (err) + goto out; + + if (erase->state != MTD_ERASE_DONE) { + err = -EIO; + goto out; + } +out: + return err; +} diff --git a/include/uapi/mtd/mtd-raid-user.h b/include/uapi/mtd/mtd-raid-user.h new file mode 100644 index 0000000..8c735ba --- /dev/null +++ b/include/uapi/mtd/mtd-raid-user.h @@ -0,0 +1,33 @@ +/* + * Copyright 2015, see mtd/mtd-raid for licensing and copyright details + */ +#ifndef __MTD_RAID_USER_H__ +#define __MTD_RAID_USER_H__ + +#include <linux/types.h> +#include <linux/magic.h> + +/* ioctl's command */ +#define MTD_RAID_IOC_CREATE _IOW(0xFE, 1, struct mtd_raid_create_req) +#define MTD_RAID_IOC_DESTROY _IOW(0xFE, 2, struct mtd_raid_destroy_req) + +enum mtd_raid_level { + MTD_RAID_LEVEL_SINGLE = 0, + MTD_RAID_LEVEL_RAID0, + MTD_RAID_LEVEL_RAID1, + MTD_RAID_LEVEL_MAX +}; + +struct mtd_raid_create_req { + __u8 raid_level; + __u8 reserved[3]; + __u32 dev_count; + __u64 substripe_size; + __u32 mtd_nums[0]; +}; + +struct mtd_raid_destroy_req { + __u32 mtd_num; +}; + +#endif /* __MTD_RAID_USER_H__ */

[v2,3/7] mtd: mtd_raid: Init a new layer of MTD RAID

Commit Message

Patch