@@ -334,3 +334,4 @@ Code Seq#(hex) Include File Comments
0xF6 all LTTng Linux Trace Toolkit Next Generation
<mailto:mathieu.desnoyers@efficios.com>
0xFD all linux/dm-ioctl.h
+0xFE all mtd/mtd-raid-user.h
@@ -338,4 +338,6 @@ source "drivers/mtd/spi-nor/Kconfig"
source "drivers/mtd/ubi/Kconfig"
+source "drivers/mtd/mtd_raid/Kconfig"
+
endif # MTD
@@ -34,3 +34,4 @@ obj-y += chips/ lpddr/ maps/ devices/ nand/ onenand/ tests/
obj-$(CONFIG_MTD_SPI_NOR) += spi-nor/
obj-$(CONFIG_MTD_UBI) += ubi/
+obj-$(CONFIG_MTD_RAID) += mtd_raid/
new file mode 100644
@@ -0,0 +1,12 @@
+menuconfig MTD_RAID
+ tristate "MTD RAID Support"
+ depends on MTD
+ help
+ This is a module for MTD raid. There are two ways to use
+ it. One for multi-chips flash drivers, driver can call
+ mtd_raid_create() to create raid device in requested level.
+ The other for user, user can use problem of mtd_raid to
+ create raid device from flashes they have in system.
+
+ More about raid:
+ <https://en.wikipedia.org/wiki/Standard_RAID_levels>.
new file mode 100644
@@ -0,0 +1,3 @@
+obj-$(CONFIG_MTD_RAID) += mtd_raid.o
+
+mtd_raid-y += ioctl.o raid_single.o core.o raid0.o raid1.o raid_io.o
new file mode 100644
@@ -0,0 +1,1103 @@
+/*
+ * Part of MTD RAID
+ *
+ * Copyright (C) 2015 Dongsheng Yang. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
+ */
+
+/*
+ * TODO
+ * - merge requests
+ * - To support writev
+ * - raid10
+ * - raid5/6
+ *
+ * This is the core part of mtd raid layer. We implemented the generic mtd_func
+ * in this part. each raid devices share the same generic mtd interfaces. There
+ * is only interfaces for each mtd operations, the real io with each flashes are
+ * handled by raid_io.c.
+ *
+ * There is a global list for all struct mtd_raid. It's a 2-D list, we give each
+ * raid level a list to manage all mtd_raids in this level. When a mtd raid device
+ * created, we need to register it into related list. When a mtd raid device to
+ * be destroyed, we need to unregister it from related list.
+ **/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/major.h>
+#include <linux/miscdevice.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <mtd/mtd-raid-user.h>
+
+#include "mtd_raid.h"
+
+/* We have a raid_list for each raid level. */
+struct raid_list {
+ int count;
+ struct list_head head;
+};
+
+/* To protect mtd_raid_list. */
+static spinlock_t mtd_raid_list_lock;
+static struct raid_list mtd_raid_list[MTD_RAID_LEVEL_MAX];
+
+int mtd_raid_list_init(void)
+{
+ int i = 0;
+
+ spin_lock_init(&mtd_raid_list_lock);
+ for (i = 0; i < MTD_RAID_LEVEL_MAX; i++)
+ INIT_LIST_HEAD(&mtd_raid_list[i].head);
+
+ return 0;
+}
+
+void mtd_raid_list_destroy(void)
+{
+ struct mtd_raid *mtd_raid, *next;
+ int i = 0;
+
+ for (i = 0; i < MTD_RAID_LEVEL_MAX; i++) {
+ list_for_each_entry_safe(mtd_raid, next, &mtd_raid_list[i].head, node) {
+ mtd_raid_destroy(mtd_raid);
+ }
+ }
+}
+
+int mtd_raid_list_register(enum mtd_raid_level raid_level, struct mtd_raid *mtd_raid)
+{
+ int raid_id = 0;
+
+ spin_lock(&mtd_raid_list_lock);
+ list_add_tail(&mtd_raid->node, &mtd_raid_list[raid_level].head);
+ raid_id = ++mtd_raid_list[raid_level].count;
+ spin_unlock(&mtd_raid_list_lock);
+
+ return raid_id;
+}
+
+struct mtd_raid *mtd_raid_list_get(int mtd_num)
+{
+ struct mtd_raid *raid;
+ int i = 0;
+
+ spin_lock(&mtd_raid_list_lock);
+ for (i = 0; i < MTD_RAID_LEVEL_MAX; i++) {
+ list_for_each_entry(raid, &mtd_raid_list[i].head, node) {
+ if (raid->mtd.index == mtd_num) {
+ spin_unlock(&mtd_raid_list_lock);
+ return raid;
+ }
+ }
+ }
+ spin_unlock(&mtd_raid_list_lock);
+
+ return NULL;
+}
+
+void mtd_raid_list_unregister(struct mtd_raid *mtd_raid)
+{
+ spin_lock(&mtd_raid_list_lock);
+ list_del(&mtd_raid->node);
+ spin_unlock(&mtd_raid_list_lock);
+}
+
+/* MTD interfaces */
+/* Check the range of addr:len to see is that out of address space */
+int check_offs(struct mtd_info *mtd, loff_t addr, size_t len)
+{
+ /* Do not allow out of address space of device */
+ if (addr < 0) {
+ pr_err("%s: From a negative address.\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (addr > mtd->size) {
+ pr_err("%s: From an address out of size.\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (len > mtd->size - addr) {
+ pr_err("%s: Read past size of mtd device.\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* It's more strict than check_offs(). It also check the ofs and len
+ * to see are they aligned with aligned_size.
+ **/
+int check_offs_aligned(struct mtd_info *mtd, loff_t ofs, uint64_t len,
+ size_t aligned_size)
+{
+ int ret = 0;
+ loff_t tmp_ofs = ofs;
+ loff_t tmp_len = len;
+
+ /* Start address must align on given length */
+ if (do_div(tmp_ofs, aligned_size)) {
+ pr_err("%s: Unaligned address\n", __func__);
+ return -EINVAL;
+ }
+
+ /* Length must align on given length */
+ if (do_div(tmp_len, aligned_size)) {
+ pr_err("%s: Length not aligned\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ /* Do not allow out of address space of device */
+ ret = check_offs(mtd, ofs, len);
+
+ return ret;
+}
+
+static int __raid_read_async(struct mtd_info *mtd, loff_t from, size_t len,
+ size_t *retlen, u_char *buf)
+{
+ loff_t subdev_off;
+ int ret = 0, err = 0;
+ int devid, i_copy = 0;
+ size_t retsize, size;
+ struct mtd_raid_ctx *ctx;
+ struct mtd_raid_dev *subdev;
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ struct mtd_raid_read_request *read_req;
+ struct mtd_raid_request *request;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ret = mtd_raid_ctx_init(ctx);
+ if (ret)
+ goto free;
+
+ while (len) {
+ err = raid->ops->logical_to_physical(raid, from, len, i_copy,
+ &devid, &subdev_off, &size);
+ if (err) {
+ ret = err;
+ goto out;
+ }
+
+ subdev = &raid->devs[devid];
+ err = mtd_raid_dev_read(ctx, subdev, subdev_off, size, &retsize, buf);
+ if (unlikely(err)) {
+ ret = err;
+ goto out;
+ }
+
+ buf += retsize;
+ from += retsize;
+ len -= retsize;
+ }
+wait:
+ ret = mtd_raid_ctx_wait(ctx);
+ if (ret) {
+ if (!list_empty(&ctx->error_list)) {
+ request = list_first_entry(&ctx->error_list,
+ struct mtd_raid_request, node);
+ read_req = READ_REQUEST(request);
+ ret = read_req->retval;
+ goto out;
+ } else if (!list_empty(&ctx->failed_list)) {
+ ret = -EBADMSG;
+ } else {
+ if (!ret)
+ ret = -EUCLEAN;
+ }
+ } else {
+ goto out;
+ }
+
+ if (++i_copy >= raid->ncopies)
+ goto out;
+
+ ret = mtd_raid_ctx_retry(ctx, i_copy);
+ if (ret)
+ goto out;
+ goto wait;
+out:
+ mtd->ecc_stats.failed += ctx->failed;
+ mtd->ecc_stats.corrected += ctx->corrected;
+ /* Fill retlen */
+ *retlen = 0;
+ list_for_each_entry(request, &ctx->all_list, node_all) {
+ read_req = READ_REQUEST(request);
+ if (read_req->retval && !mtd_is_bitflip_or_eccerr(read_req->retval))
+ break;
+ *retlen += read_req->retlen;
+ }
+ mtd_raid_ctx_destroy(ctx);
+free:
+ kfree(ctx);
+
+ return ret;
+}
+
+static int __raid_read_sync(struct mtd_info *mtd, loff_t from, size_t len,
+ size_t * retlen, u_char * buf)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ int ret = 0, err = 0;
+ int devid;
+ loff_t subdev_off;
+ size_t retsize, size;
+ struct mtd_info *subdev = NULL;
+
+ while (len) {
+ err = raid->ops->logical_to_physical(raid, from, len, 0,
+ &devid, &subdev_off, &size);
+ if (err)
+ return err;
+
+ subdev = raid->devs[devid].mtd;
+ err = mtd_read(subdev, subdev_off, size, &retsize, buf);
+ /* Save information about bitflips! */
+ if (unlikely(err)) {
+ if (mtd_is_eccerr(err)) {
+ mtd->ecc_stats.failed++;
+ ret = err;
+ } else if (mtd_is_bitflip(err)) {
+ mtd->ecc_stats.corrected++;
+ /* Do not overwrite -EBADMSG !! */
+ if (!ret)
+ ret = err;
+ } else {
+ return err;
+ }
+ }
+
+ *retlen += retsize;
+ len -= retsize;
+ buf += retsize;
+ from += retsize;
+ }
+
+ return ret;
+}
+
+int mtd_raid_read(struct mtd_info *mtd, loff_t from, size_t len,
+ size_t *retlen, u_char *buf)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+
+ if (check_offs(mtd, from, len))
+ return -EINVAL;
+
+ if (need_async_reading(raid, from, len))
+ return __raid_read_async(mtd, from, len, retlen, buf);
+
+ return __raid_read_sync(mtd, from, len, retlen, buf);
+}
+
+/* Interface for mtd->_write() */
+int mtd_raid_write(struct mtd_info *mtd, loff_t to, size_t len,
+ size_t * retlen, const u_char * buf)
+{
+ int err = 0;
+ int i = 0;
+ int devid;
+ loff_t subdev_off;
+ size_t retsize, size;
+ struct mtd_info *subdev = NULL;
+ struct mtd_raid *raid = MTD_RAID(mtd);
+
+ if (!(mtd->flags & MTD_WRITEABLE))
+ return -EROFS;
+
+ if (check_offs(mtd, to, len))
+ return -EINVAL;
+
+ while (len) {
+ for (i = 0; i < raid->ncopies; i++) {
+ err = raid->ops->logical_to_physical(raid, to, len, i,
+ &devid, &subdev_off, &size);
+ if (err)
+ goto out;
+
+ subdev = raid->devs[devid].mtd;
+ err = mtd_write(subdev, subdev_off, size, &retsize, buf);
+ if (unlikely(err))
+ goto out;
+ }
+
+ *retlen += retsize;
+ len -= retsize;
+ buf += retsize;
+ to += retsize;
+ }
+
+out:
+ return err;
+}
+
+int __raid_read_oob_async(struct mtd_info *mtd, loff_t from, size_t readlen,
+ size_t oobsize, struct mtd_oob_ops *ops)
+{
+ size_t size;
+ loff_t subdev_off;
+ struct mtd_oob_ops devops = *ops;
+ struct mtd_raid_ctx *ctx;
+ struct mtd_raid_dev *subdev;
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ struct mtd_raid_read_oob_request *read_oob_req;
+ struct mtd_raid_request *request;
+ int i_copy = 0, devid = 0, i = 0, ret = 0;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ret = mtd_raid_ctx_init(ctx);
+ if (ret)
+ goto free;
+
+ while (readlen > 0) {
+ ret = raid->ops->logical_to_physical(raid, from, readlen, i,
+ &devid, &subdev_off, &size);
+ if (unlikely(ret))
+ goto out;
+
+ if (devops.datbuf) {
+ devops.len = size;
+ } else {
+ if (devops.ooblen > (size / mtd->writesize * oobsize))
+ devops.ooblen = size / mtd->writesize * oobsize;
+ }
+
+ /* Read data from subdev */
+ subdev = &raid->devs[devid];
+ ret = mtd_raid_dev_read_oob(ctx, subdev, subdev_off, size, &devops);
+ if (unlikely(ret))
+ goto out;
+
+ readlen -= size;
+ from += size;
+
+ if (devops.datbuf)
+ devops.datbuf += devops.len;
+
+ if (devops.oobbuf)
+ devops.oobbuf += devops.ooblen;
+ }
+
+wait:
+ ret = mtd_raid_ctx_wait(ctx);
+ if (ret) {
+ /* Not all request succeeded */
+ if (!list_empty(&ctx->error_list)) {
+ request = list_first_entry(&ctx->error_list,
+ struct mtd_raid_request, node);
+ read_oob_req = READ_OOB_REQUEST(request);
+ ret = read_oob_req->retval;
+ goto out;
+ } else if (!list_empty(&ctx->failed_list)) {
+ ret = -EBADMSG;
+ } else {
+ if (!ret)
+ ret = -EUCLEAN;
+ }
+ } else {
+ goto out;
+ }
+
+ if (++i_copy >= raid->ncopies)
+ goto out;
+
+ ret = mtd_raid_ctx_retry(ctx, i_copy);
+ if (ret)
+ goto out;
+ goto wait;
+out:
+ mtd->ecc_stats.failed += ctx->failed;
+ mtd->ecc_stats.corrected += ctx->corrected;
+ /* Fill retlen */
+ ops->retlen = ops->oobretlen = 0;
+ list_for_each_entry(request, &ctx->all_list, node_all) {
+ read_oob_req = READ_OOB_REQUEST(request);
+ if (read_oob_req->retval && !mtd_is_bitflip_or_eccerr(read_oob_req->retval))
+ break;
+ ops->retlen += read_oob_req->ops.retlen;
+ ops->oobretlen += read_oob_req->ops.oobretlen;
+ }
+ mtd_raid_ctx_destroy(ctx);
+free:
+ kfree(ctx);
+
+ return ret;
+}
+
+int __raid_read_oob_sync(struct mtd_info *mtd, loff_t from, size_t readlen,
+ size_t oobsize, struct mtd_oob_ops *ops)
+{
+ size_t size;
+ loff_t subdev_off;
+ struct mtd_oob_ops devops = *ops;
+ struct mtd_info *subdev;
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ int devid = 0, i = 0, ret = 0;
+
+ while (readlen > 0) {
+ ret = raid->ops->logical_to_physical(raid, from, readlen, i,
+ &devid, &subdev_off, &size);
+ if (unlikely(ret))
+ goto out;
+
+ if (devops.datbuf) {
+ devops.len = size;
+ } else {
+ if (devops.ooblen > (size / mtd->writesize * oobsize))
+ devops.ooblen = size / mtd->writesize * oobsize;
+ }
+
+ /* Read data from subdev */
+ subdev = raid->devs[devid].mtd;
+ ret = mtd_read_oob(subdev, subdev_off, &devops);
+ if (ret)
+ break;
+
+ readlen -= size;
+ from += size;
+
+ if (devops.datbuf) {
+ devops.datbuf += devops.len;
+ ops->retlen += devops.len;
+ }
+
+ if (devops.oobbuf) {
+ devops.oobbuf += devops.ooblen;
+ ops->oobretlen += devops.ooblen;
+ }
+ }
+out:
+ return ret;
+}
+
+int mtd_raid_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ size_t oobsize, readlen = ops->len;
+
+ /* Check parameters for reading oob */
+ if (ops->datbuf && check_offs(mtd, from, ops->len))
+ return -EINVAL;
+
+ /* Get oobsize depending on mode */
+ if (ops->mode == MTD_OPS_AUTO_OOB)
+ oobsize = mtd->oobavail;
+ else
+ oobsize = mtd->oobsize;
+
+ /* Check ooboffs */
+ if (ops->ooboffs >= oobsize)
+ return -EINVAL;
+
+ /* Check len and from */
+ oobsize -= ops->ooboffs;
+ if (ops->datbuf) {
+ readlen = ops->len;
+ } else {
+ readlen = DIV_ROUND_UP(ops->ooblen, oobsize);
+ readlen *= mtd->writesize;
+ }
+
+ if (readlen > mtd->size - from)
+ return -EINVAL;
+
+ if (need_async_reading(raid, from, readlen))
+ return __raid_read_oob_async(mtd, from, readlen, oobsize, ops);
+
+ return __raid_read_oob_sync(mtd, from, readlen, oobsize, ops);
+}
+
+int mtd_raid_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ struct mtd_oob_ops devops = *ops;
+ uint64_t oobsize;
+ int devid = 0, i = 0, ret = 0;
+ loff_t subdev_off;
+ size_t size, writelen = ops->len;
+ struct mtd_info *subdev = NULL;
+
+ if (!(mtd->flags & MTD_WRITEABLE))
+ return -EROFS;
+
+ if (ops->datbuf && check_offs(mtd, to, ops->len))
+ return -EINVAL;
+
+ ops->retlen = ops->oobretlen = 0;
+
+ if (ops->mode == MTD_OPS_AUTO_OOB)
+ oobsize = mtd->oobavail;
+ else
+ oobsize = mtd->oobsize;
+
+ if (to < 0 || to > mtd->size)
+ return -EINVAL;
+
+ if (devops.ooboffs >= oobsize)
+ return -EINVAL;
+
+ oobsize -= devops.ooboffs;
+
+ if (devops.datbuf) {
+ writelen = devops.len;
+ } else {
+ writelen = DIV_ROUND_UP(devops.ooblen, oobsize);
+ writelen *= mtd->writesize;
+ }
+
+ if (writelen > mtd->size - to)
+ return -EINVAL;
+
+ while (writelen > 0) {
+ for (i = 0; i < raid->ncopies; i++) {
+ ret = raid->ops->logical_to_physical(raid, to, writelen, i,
+ &devid, &subdev_off, &size);
+ if (unlikely(ret))
+ goto out;
+
+ if (devops.datbuf) {
+ devops.len = size;
+ } else {
+ if (devops.ooblen > (size / mtd->writesize * oobsize))
+ devops.ooblen = size / mtd->writesize * oobsize;
+ }
+
+ subdev = raid->devs[devid].mtd;
+ ret = mtd_write_oob(subdev, subdev_off, &devops);
+ if (ret)
+ goto out;
+
+ writelen -= size;
+ to += size;
+
+ if (devops.datbuf) {
+ devops.datbuf += devops.len;
+ ops->retlen += devops.len;
+ }
+
+ if (devops.oobbuf) {
+ devops.oobbuf += devops.ooblen;
+ ops->oobretlen += devops.ooblen;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+//TODO make it async and paralleled
+/* lblock means logical block */
+static int raid_erase_lblock(struct mtd_raid *raid, struct erase_info *instr)
+{
+ int devid;
+ size_t size;
+ loff_t subdev_off;
+ int i, icopy = 0;
+ int err = 0, ret = 0;
+ uint64_t addr, len;
+ struct mtd_raid_dev *subdev = NULL;
+
+ addr = instr->addr;
+ len = 0;
+ for (i = 0; i < raid->npebs_per_leb; i++) {
+ for (icopy = 0; icopy < raid->ncopies; icopy++) {
+ err = raid->ops->logical_to_physical(raid, addr, len, icopy, &devid, &subdev_off, &size);
+ if (err)
+ goto out;
+
+ subdev = &raid->devs[devid];
+ if (!(subdev->mtd->flags & MTD_WRITEABLE)) {
+ ret = -EROFS;
+ goto out;
+ }
+
+ instr->addr = subdev_off;
+
+ ret = mtd_raid_dev_erase(subdev, instr);
+ if (ret)
+ goto out;
+
+ if (instr->state != MTD_ERASE_DONE) {
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ addr += raid->substripe_size;
+ }
+
+out:
+ return ret;
+}
+
+int mtd_raid_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ struct erase_info *erase;
+ loff_t logical;
+ loff_t length;
+ int err = 0;
+
+ if (!(mtd->flags & MTD_WRITEABLE))
+ return -EROFS;
+
+ if (check_offs_aligned(mtd, instr->addr, instr->len, mtd->erasesize))
+ return -EINVAL;
+
+ instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
+
+ erase = kmalloc(sizeof(struct erase_info), GFP_KERNEL);
+ if (!erase)
+ return -ENOMEM;
+
+ *erase = *instr;
+ logical = instr->addr;
+ length = instr->len;
+
+ err = 0;
+ while (length > 0) {
+ erase->addr = logical;
+ err = raid_erase_lblock(raid, erase);
+ if (err)
+ break;
+
+ logical += raid->mtd.erasesize;
+ length -= raid->mtd.erasesize;
+ }
+
+ instr->state = erase->state;
+ kfree(erase);
+
+ if (err)
+ return err;
+
+ if (instr->callback)
+ instr->callback(instr);
+
+ return 0;
+
+}
+
+void mtd_raid_sync(struct mtd_info *mtd)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ int i;
+
+ for (i = 0; i < raid->dev_count; i++) {
+ struct mtd_info *subdev = raid->devs[i].mtd;
+ mtd_sync(subdev);
+ }
+}
+
+int mtd_raid_suspend(struct mtd_info *mtd)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ int i, err = 0;
+
+ for (i = 0; i < raid->dev_count; i++) {
+ struct mtd_info *subdev = raid->devs[i].mtd;
+ if ((err = mtd_suspend(subdev)) < 0)
+ return err;
+ }
+ return err;
+}
+
+void mtd_raid_resume(struct mtd_info *mtd)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ int i;
+
+ for (i = 0; i < raid->dev_count; i++) {
+ struct mtd_info *subdev = raid->devs[i].mtd;
+ mtd_resume(subdev);
+ }
+}
+
+int mtd_raid_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ int ret = 0, err = 0, i = 0;
+ int devid;
+ loff_t subdev_off;
+ size_t size;
+ struct mtd_info *subdev = NULL;
+
+ while (len) {
+ for (i = 0; i < raid->ncopies; i++) {
+ err = raid->ops->logical_to_physical(raid, ofs, len, i, &devid, &subdev_off, &size);
+ if (err) {
+ ret = err;
+ goto out;
+ }
+
+ subdev = raid->devs[devid].mtd;
+ err = mtd_lock(subdev, subdev_off, size);
+ if (unlikely(err)) {
+ ret = err;
+ goto out;
+ }
+ }
+
+ len -= size;
+ ofs += size;
+ }
+out:
+ return ret;
+}
+
+int mtd_raid_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ int ret = 0, err = 0, i = 0;
+ int devid;
+ loff_t subdev_off;
+ size_t size;
+ struct mtd_info *subdev = NULL;
+
+ while (len) {
+ for (i = 0; i < raid->ncopies; i++) {
+ err = raid->ops->logical_to_physical(raid, ofs, len, i, &devid, &subdev_off, &size);
+ if (err) {
+ ret = err;
+ goto out;
+ }
+
+ subdev = raid->devs[devid].mtd;
+ err = mtd_unlock(subdev, subdev_off, size);
+ if (unlikely(err)) {
+ ret = err;
+ goto out;
+ }
+ }
+
+ len -= size;
+ ofs += size;
+ }
+
+out:
+ return ret;
+}
+
+int mtd_raid_block_isbad(struct mtd_info *mtd, loff_t ofs)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ int i = 0;
+ loff_t from = ofs, subdev_off;
+ size_t len;
+ int devid;
+ size_t size;
+ struct mtd_info *subdev = NULL;
+
+ if (!mtd_can_have_bb(raid->devs[0].mtd))
+ return 0;
+
+ ofs -= do_div(from, mtd->erasesize);
+ from = ofs;
+ len = mtd->erasesize;
+ while (len) {
+ for (i = 0; i < raid->ncopies; i++) {
+ raid->ops->logical_to_physical(raid, from, len, i, &devid, &subdev_off, &size);
+
+ subdev = raid->devs[devid].mtd;
+ if (mtd_block_isbad(subdev, subdev_off))
+ return 1;
+ }
+
+ len -= size;
+ from += size;
+ }
+
+ return 0;
+}
+
+int mtd_raid_block_markbad(struct mtd_info *mtd, loff_t ofs)
+{
+ struct mtd_raid *raid = MTD_RAID(mtd);
+ int i, err = 0;
+ loff_t from = ofs;
+ size_t len;
+ int devid;
+ loff_t subdev_off;
+ size_t size;
+ struct mtd_info *subdev = NULL;
+
+ ofs -= do_div(from, mtd->erasesize);
+ from = ofs;
+ len = mtd->erasesize;
+ while (len) {
+ for (i = 0; i < raid->ncopies; i++) {
+ err = raid->ops->logical_to_physical(raid, from, len, i, &devid, &subdev_off, &size);
+ if (err)
+ goto out;
+
+ subdev = raid->devs[devid].mtd;
+ err = mtd_block_markbad(subdev, subdev_off);
+ if (err)
+ goto out;
+ else
+ mtd->ecc_stats.badblocks++;
+ }
+
+ len -= size;
+ from += size;
+ }
+
+out:
+ return err;
+}
+
+int mtd_raid_init(struct mtd_raid *raid, struct mtd_info **subdevs,
+ int dev_count, size_t substripe_size)
+{
+ struct mtd_info *subdev = NULL;
+ struct mtd_info *mtd = NULL;
+ int ret = 0;
+ int i = 0;
+
+ INIT_LIST_HEAD(&raid->node);
+ raid->substripe_size = substripe_size;
+ raid->dev_count = dev_count;
+ raid->subdevs = subdevs;
+
+ for (i = 0; i < dev_count; i++) {
+ raid->devs[i].mtd = raid->subdevs[i];
+ raid->devs[i].id = i;
+ }
+
+ mtd = &raid->mtd;
+ subdev = raid->devs[0].mtd;
+
+ if (raid->substripe_size == 0)
+ raid->substripe_size = subdev->writesize;
+
+ mtd->owner = THIS_MODULE;
+ mtd->type = subdev->type;
+ mtd->flags = subdev->flags;
+ mtd->writesize = subdev->writesize;
+ mtd->writebufsize = subdev->writebufsize;
+ mtd->subpage_sft = subdev->subpage_sft;
+ mtd->oobsize = subdev->oobsize;
+ mtd->oobavail = subdev->oobavail;
+ mtd->ecclayout = subdev->ecclayout;
+
+ mtd->_erase = mtd_raid_erase;
+ mtd->_read = mtd_raid_read;
+ mtd->_write = mtd_raid_write;
+ mtd->_sync = mtd_raid_sync;
+ mtd->_lock = mtd_raid_lock;
+ mtd->_unlock = mtd_raid_unlock;
+ mtd->_suspend = mtd_raid_suspend;
+ mtd->_resume = mtd_raid_resume;
+
+ if (subdev->_read_oob)
+ mtd->_read_oob = mtd_raid_read_oob;
+ if (subdev->_write_oob)
+ mtd->_write_oob = mtd_raid_write_oob;
+ if (subdev->_block_isbad)
+ mtd->_block_isbad = mtd_raid_block_isbad;
+ if (subdev->_block_markbad)
+ mtd->_block_markbad = mtd_raid_block_markbad;
+
+ for (i = 1; i < dev_count; i++) {
+ if (mtd->flags != raid->devs[i].mtd->flags) {
+ /*
+ * Expect all flags except MTD_WRITEABLE to be
+ * equal on all subdevices.
+ */
+ if ((mtd->flags ^ raid->devs[i].mtd->
+ flags) & ~MTD_WRITEABLE) {
+ printk("Incompatible device flags on \"%s\"\n",
+ raid->devs[i].mtd->name);
+ ret = -EINVAL;
+ goto out;
+ } else {
+ /* if writeable attribute differs,
+ make super device writeable */
+ mtd->flags |=
+ raid->devs[i].mtd->flags & MTD_WRITEABLE;
+ }
+ }
+
+ if (mtd->writesize != raid->devs[i].mtd->writesize ||
+ mtd->subpage_sft != raid->devs[i].mtd->subpage_sft ||
+ mtd->oobsize != raid->devs[i].mtd->oobsize ||
+ !mtd->_read_oob != !raid->devs[i].mtd->_read_oob ||
+ !mtd->_write_oob != !raid->devs[i].mtd->_write_oob) {
+ printk("Incompatible OOB or ECC data on \"%s\"\n",
+ raid->devs[i].mtd->name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (mtd->writebufsize != raid->devs[i].mtd->writebufsize) {
+ pr_err("Incompatible writebufsize on \"%s\"",
+ raid->devs[i].mtd->name);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (raid->ops->init) {
+ ret = raid->ops->init(raid, dev_count, substripe_size);
+ if (ret)
+ goto out;
+ }
+
+ for (i = 0; i < dev_count; i++) {
+ /*
+ * Init bg thread for each raid_dev to handle io requests.
+ */
+ INIT_LIST_HEAD(&raid->devs[i].list);
+ raid->devs[i].thread = kthread_create(mtd_raid_dev_thread, &raid->devs[i],
+ "%s_thread_%d", raid->name, i);
+ }
+
+ return 0;
+out:
+ return ret;
+}
+
+int mtd_raid_destroy(struct mtd_raid *raid);
+
+int mtd_raid_create(enum mtd_raid_level raid_level, struct mtd_info **subdevs,
+ int dev_count, int substripe_size)
+{
+ int ret = 0;
+ struct mtd_raid *raid = NULL;
+
+ switch (raid_level){
+ case MTD_RAID_LEVEL_SINGLE:
+ {
+ raid = mtd_raid_single_create(dev_count, substripe_size);
+ if (!raid) {
+ pr_err("MTD RAID: Failed to create raid single device.");
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ }
+ case MTD_RAID_LEVEL_RAID0:
+ {
+ raid = mtd_raid0_create(dev_count, substripe_size);
+ if (!raid) {
+ pr_err("MTD RAID: Failed to create raid0 device.");
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ }
+ case MTD_RAID_LEVEL_RAID1:
+ {
+ raid = mtd_raid1_create(dev_count, substripe_size);
+ if (!raid) {
+ pr_err("MTD RAID: Failed to create raid1 device.");
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ }
+ default:
+ pr_err("MTD RAID: Unsupported raid level: %d.", raid_level);
+ ret = -ENOTSUPP;
+ goto out;
+ }
+
+ ret = mtd_raid_init(raid, subdevs, dev_count, substripe_size);
+ if (ret)
+ goto destroy;
+
+ return mtd_device_register(&raid->mtd, NULL, 0);
+
+destroy:
+ mtd_raid_destroy(raid);
+out:
+ return ret;
+}
+
+int mtd_raid_destroy(struct mtd_raid *raid)
+{
+ int i = 0;
+ int ret = 0;
+
+ ret = mtd_device_unregister(&raid->mtd);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < raid->dev_count; i++) {
+ if (raid->devs[i].thread)
+ kthread_stop(raid->devs[i].thread);
+ }
+
+ if (raid->ops->destroy)
+ raid->ops->destroy(raid);
+out:
+ return ret;
+}
+
+static struct miscdevice mtd_raid_ctrl_cdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "mtd_raid_ctrl",
+ .fops = &mtd_raid_ctrl_cdev_operations,
+};
+
+static int __init init_mtd_raid(void)
+{
+ int err = 0;
+
+ err = mtd_raid_list_init();
+ if (err)
+ goto out;
+
+ err = misc_register(&mtd_raid_ctrl_cdev);
+ if (err) {
+ pr_err("MTD RAID error: cannot register device");
+ goto out;
+ }
+
+ return 0;
+out:
+ pr_err("MTD RAID error: cannot initialize MTD RAID, error %d", err);
+ return err;
+}
+
+static void __exit cleanup_mtd_raid(void)
+{
+ misc_deregister(&mtd_raid_ctrl_cdev);
+ mtd_raid_list_destroy();
+}
+
+module_init(init_mtd_raid);
+module_exit(cleanup_mtd_raid);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dongsheng Yang <yangds.fnst@cn.fujitsu.com>");
+MODULE_DESCRIPTION("Support for MTD RAID");
new file mode 100644
@@ -0,0 +1,273 @@
+/*
+ * This file is part of MTD RAID.
+ *
+ * Copyright (C) 2015 Dongsheng Yang. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
+ */
+
+#ifndef __MTD_RAID_H
+#define __MTD_RAID_H
+
+#include <linux/mtd/mtd.h>
+#include <mtd/mtd-abi.h>
+#include <mtd/mtd-raid-user.h>
+
+struct mtd_raid;
+
+/*
+ * Operations for different mtd raid structure, it
+ * differs in different raid level.
+ **/
+struct mtd_raid_operations {
+ int (*init)(struct mtd_raid *, int, size_t);
+ void (*destroy)(struct mtd_raid *);
+
+ /* logical_to_physical is the core function in mtd raid,
+ * when we get a logical address from user,
+ * we will call ->logical_to_physical() to get the phisical
+ * devid, address and length.*/
+ int (*logical_to_physical)(struct mtd_raid *, loff_t, size_t, int, int *, loff_t *, size_t *);
+ int (*physical_to_logical)(struct mtd_raid *, int, loff_t, size_t, loff_t *, size_t *);
+};
+
+/*
+ * struct mtd_raid_dev, one mtd_raid_dev represent a mtd device
+ * int the raid array.
+ **/
+struct mtd_raid_dev {
+ int id;
+ struct mtd_raid *raid;
+ struct mtd_info *mtd;
+
+ spinlock_t lock;
+ struct list_head list;
+ struct task_struct *thread;
+};
+
+/*
+ * Context of a IO, we will send lots of io request to
+ * mtd_raid_dev, and mtd_raid_dev will do it in thread.
+ * But in order to track the status of each request, we
+ * will register them in a mtd_raid_ctx.
+ */
+struct mtd_raid_ctx {
+ spinlock_t lock;
+ struct list_head all_list;
+
+ struct list_head submit_list;
+ struct list_head complete_list;
+ struct list_head failed_list;
+ struct list_head corrected_list;
+ struct list_head error_list;
+
+ unsigned int failed;
+ unsigned int corrected;
+ unsigned int errored;
+
+ struct task_struct *wait;
+};
+
+
+/*
+ * Type for each io request;
+ */
+enum mtd_raid_request_type {
+ MTD_RAID_REQ_READ = 0,
+ MTD_RAID_REQ_READ_OOB
+};
+
+struct mtd_raid_request;
+typedef void request_func_t(struct mtd_raid_request *);
+typedef void end_func_t(struct mtd_raid_request *);
+typedef int retry_func_t(struct mtd_raid_request *request, int i_copy);
+
+struct mtd_raid_request {
+ enum mtd_raid_request_type type;
+ struct mtd_raid_ctx *ctx;
+ struct mtd_raid_dev *raid_dev;
+
+ struct list_head node;
+ struct list_head node_all;
+ struct list_head node_request;
+
+ /* Main action of this request. */
+ request_func_t *func;
+ /*
+ * If this request failed, how to retry. NULL means
+ * don't retry for this request.
+ */
+ retry_func_t *retry_func;
+ /*
+ * When we want to destroy this request, what we need
+ * to do.
+ */
+ end_func_t *end_func;
+};
+
+/*
+ * Request for mtd_read.
+ */
+struct mtd_raid_read_request {
+ struct mtd_raid_request request;
+
+ loff_t from;
+ size_t len;
+ size_t retlen;
+ u_char *buf;
+ int retval;
+};
+
+/*
+ * Request for mtd_read_oob
+ */
+struct mtd_raid_read_oob_request {
+ struct mtd_raid_request request;
+
+ loff_t from;
+ size_t len;
+ struct mtd_oob_ops ops;
+ int retval;
+};
+
+/*
+ * structure to represent a RAID device
+ **/
+struct mtd_raid {
+ char name[32];
+ int ncopies;
+ int dev_count;
+ struct mtd_info **subdevs;
+ int npebs_per_leb;
+ int substripe_size;
+ /*
+ * This is the "superblock" for this RAID device.
+ * We will fill up it and register it.
+ **/
+ struct mtd_info mtd;
+ struct list_head node;
+ const struct mtd_raid_operations *ops;
+ enum mtd_raid_level raid_level;
+ struct mtd_raid_dev devs[0];
+};
+
+struct mtd_raid_single {
+ /*
+ * Please make the raid to be the last member,
+ * because we will alloc devs appending this structure.
+ */
+ struct mtd_raid raid;
+};
+
+struct mtd_raid0 {
+ // XXX Add reada support here.
+
+ /*
+ * Please make the raid to be the last member,
+ * because we will alloc devs appending this structure.
+ */
+ struct mtd_raid raid;
+};
+
+struct mtd_raid1 {
+ /*
+ * Please make the raid to be the last member,
+ * because we will alloc devs appending this structure.
+ */
+ struct mtd_raid raid;
+};
+
+/* Macros to get specified request pointers from generic request */
+#define READ_REQUEST(req) \
+ container_of(req, struct mtd_raid_read_request, request)
+
+#define READ_OOB_REQUEST(req) \
+ container_of(req, struct mtd_raid_read_oob_request, request)
+
+/* Macros to get specified mtd_raid pointers from mtd_info pointer */
+#define MTD_RAID(mtd) \
+ container_of(mtd, struct mtd_raid, mtd)
+
+#define MTD_RAID_SINGLE(mtd_raid) \
+ container_of(mtd_raid, struct mtd_raid_single, raid)
+
+#define MTD_RAID_RAID0(mtd_raid) \
+ container_of(mtd_raid, struct mtd_raid0, raid)
+
+#define MTD_RAID_RAID1(mtd_raid) \
+ container_of(mtd_raid, struct mtd_raid1, raid)
+
+/* ioctl.c */
+extern const struct file_operations mtd_raid_ctrl_cdev_operations;
+
+/* core.c */
+int mtd_raid_list_init(void);
+void mtd_raid_list_destroy(void);
+int mtd_raid_list_register(enum mtd_raid_level raid_level, struct mtd_raid *mtd_raid);
+struct mtd_raid *mtd_raid_list_get(int mtd_num);
+void mtd_raid_list_unregister(struct mtd_raid *mtd_raid);
+
+int mtd_raid_create(enum mtd_raid_level raid_level, struct mtd_info **subdevs,
+ int dev_count, int substripe_size);
+int mtd_raid_destroy(struct mtd_raid *mtd_raid);
+
+int mtd_raid_read(struct mtd_info *mtd, loff_t from, size_t len, size_t * retlen, u_char * buf);
+int mtd_raid_write(struct mtd_info *mtd, loff_t to, size_t len, size_t * retlen, const u_char * buf);
+int mtd_raid_erase(struct mtd_info *mtd, struct erase_info *instr);
+int mtd_raid_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops);
+int mtd_raid_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+int mtd_raid_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+void mtd_raid_resume(struct mtd_info *mtd);
+int mtd_raid_suspend(struct mtd_info *mtd);
+void mtd_raid_sync(struct mtd_info *mtd);
+int mtd_raid_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops);
+int mtd_raid_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops);
+int mtd_raid_block_isbad(struct mtd_info *mtd, loff_t ofs);
+int mtd_raid_block_markbad(struct mtd_info *mtd, loff_t ofs);
+
+/* raid_io.c */
+int mtd_raid_dev_thread(void *u);
+int mtd_raid_ctx_init(struct mtd_raid_ctx* ctx);
+int mtd_raid_ctx_wait(struct mtd_raid_ctx *ctx);
+void mtd_raid_ctx_destroy(struct mtd_raid_ctx *ctx);
+int mtd_raid_ctx_retry(struct mtd_raid_ctx *ctx, int i_copy);
+
+int mtd_raid_dev_read(struct mtd_raid_ctx *ctx, struct mtd_raid_dev *raid_dev,
+ loff_t from, size_t len, size_t *retlen, u_char *buf);
+int mtd_raid_dev_read_oob(struct mtd_raid_ctx *ctx, struct mtd_raid_dev *raid_dev,
+ loff_t from, size_t len, struct mtd_oob_ops *ops);
+int mtd_raid_dev_erase(struct mtd_raid_dev *raid_dev, struct erase_info *erase);
+
+/* raid_single.c */
+extern const struct mtd_raid_operations mtd_raid_single_ops;
+struct mtd_raid *mtd_raid_single_create(int dev_count, size_t substripe_size);
+
+/* raid0.c */
+extern const struct mtd_raid_operations mtd_raid0_ops;
+struct mtd_raid *mtd_raid0_create(int dev_count, size_t substripe_size);
+
+/* raid1.c */
+extern const struct mtd_raid_operations mtd_raid1_ops;
+struct mtd_raid *mtd_raid1_create(int dev_count, size_t substripe_size);
+
+/* inline functions */
+static inline int need_async_reading(struct mtd_raid *raid,
+ loff_t from, size_t len)
+{
+ return (len >= (raid->substripe_size * raid->dev_count));
+}
+
+#endif /* __MTD_RAID_H */
new file mode 100644
@@ -0,0 +1,449 @@
+/*
+ * This file is part of MTD RAID.
+ *
+ * Copyright (C) 2015 Dongsheng Yang. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
+ */
+
+/*
+ * This file handles the all io-related work.
+ */
+
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#include "mtd_raid.h"
+
+/**
+ * Context related operations:
+ * mtd_raid_ctx_init() --> Init a ctx
+ * --> Attach requests to ctx
+ * mtd_raid_ctx_wait() --> Wait all requests end
+ * mtd_raid_ctx_retry() --> Retry failed requests
+ * mtd_raid_ctx_destroy() --> Destroy the ctx
+ */
+int mtd_raid_ctx_init(struct mtd_raid_ctx* ctx)
+{
+ spin_lock_init(&ctx->lock);
+ INIT_LIST_HEAD(&ctx->all_list);
+ INIT_LIST_HEAD(&ctx->submit_list);
+ INIT_LIST_HEAD(&ctx->complete_list);
+ INIT_LIST_HEAD(&ctx->failed_list);
+ INIT_LIST_HEAD(&ctx->corrected_list);
+ INIT_LIST_HEAD(&ctx->error_list);
+
+ ctx->failed = ctx->corrected = ctx->errored = 0;
+ ctx->wait = current;
+
+ return 0;
+}
+
+int mtd_raid_ctx_wait(struct mtd_raid_ctx *ctx)
+{
+ int ret = 0;
+
+ while (1) {
+ spin_lock(&ctx->lock);
+ if (list_empty(&ctx->submit_list)) {
+ ret = ctx->failed + ctx->corrected + ctx->errored;
+ spin_unlock(&ctx->lock);
+ return ret;
+ }
+
+ spin_unlock(&ctx->lock);
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ set_current_state(TASK_RUNNING);
+
+ cond_resched();
+ }
+}
+
+void mtd_raid_ctx_destroy(struct mtd_raid_ctx *ctx)
+{
+ struct mtd_raid_request *request, *next;
+
+ list_for_each_entry_safe(request, next, &ctx->all_list, node_all) {
+ if (request->end_func)
+ request->end_func(request);
+ }
+}
+
+int mtd_raid_ctx_retry(struct mtd_raid_ctx *ctx, int i_copy)
+{
+ struct mtd_raid_request *request;
+ int ret = 0;
+
+ list_for_each_entry(request, &ctx->failed_list, node) {
+ ret = request->retry_func(request, i_copy);
+ if (ret)
+ goto out;
+ spin_lock(&ctx->lock);
+ list_move_tail(&request->node, &ctx->submit_list);
+ spin_unlock(&ctx->lock);
+ }
+out:
+ return ret;
+}
+
+/*
+ * hooks for each type of request
+ * */
+static void read_req_func(struct mtd_raid_request *request)
+{
+ struct mtd_raid_read_request *read_req;
+ struct mtd_raid_ctx *ctx;
+ struct mtd_raid_dev *raid_dev;
+ int ret = 0;
+
+ read_req = READ_REQUEST(request);
+ raid_dev = request->raid_dev;
+ ret = mtd_read(raid_dev->mtd, read_req->from, read_req->len,
+ &read_req->retlen, read_req->buf);
+
+ read_req->retval = ret;
+ ctx = request->ctx;
+ spin_lock(&ctx->lock);
+ list_del_init(&request->node);
+ if (unlikely(ret)) {
+ if (mtd_is_eccerr(ret)) {
+ ctx->failed++;
+ list_add_tail(&request->node, &ctx->failed_list);
+ } else if (mtd_is_bitflip(ret)) {
+ ctx->corrected++;
+ list_add_tail(&request->node, &ctx->corrected_list);
+ } else {
+ ctx->errored++;
+ list_add_tail(&request->node, &ctx->error_list);
+ }
+ } else {
+ list_add_tail(&request->node, &ctx->complete_list);
+ }
+ spin_unlock(&ctx->lock);
+ wake_up_process(ctx->wait);
+
+ return;
+}
+
+static void read_oob_req_func(struct mtd_raid_request *request)
+{
+ struct mtd_raid_read_oob_request *read_oob_req;
+ struct mtd_raid_ctx *ctx;
+ struct mtd_raid_dev *raid_dev;
+ int ret = 0;
+
+ read_oob_req = READ_OOB_REQUEST(request);
+ raid_dev = request->raid_dev;
+ ret = mtd_read_oob(raid_dev->mtd, read_oob_req->from, &read_oob_req->ops);
+
+ read_oob_req->retval = ret;
+ ctx = request->ctx;
+ spin_lock(&ctx->lock);
+ list_del_init(&request->node);
+ if (unlikely(ret)) {
+ if (mtd_is_eccerr(ret)) {
+ ctx->failed++;
+ list_add_tail(&request->node, &ctx->failed_list);
+ } else if (mtd_is_bitflip(ret)) {
+ ctx->corrected++;
+ list_add_tail(&request->node, &ctx->corrected_list);
+ } else {
+ ctx->errored++;
+ list_add_tail(&request->node, &ctx->error_list);
+ }
+ } else {
+ list_add_tail(&request->node, &ctx->complete_list);
+ }
+ spin_unlock(&ctx->lock);
+ wake_up_process(ctx->wait);
+
+ return;
+}
+
+static int read_req_retry_func(struct mtd_raid_request *request, int i_copy)
+{
+ struct mtd_raid *mtd_raid;
+ struct mtd_raid_read_request *read_req;
+ loff_t address, subdev_off;
+ size_t length, size;
+ int devid, ret = 0;
+
+ mtd_raid = request->raid_dev->raid;
+ read_req = READ_REQUEST(request);
+ if (!mtd_raid->ops->physical_to_logical || !mtd_raid->ops->logical_to_physical)
+ return -EINVAL;
+
+ subdev_off = read_req->from;
+ size = read_req->len;
+ devid = request->raid_dev->id;
+ ret = mtd_raid->ops->physical_to_logical(mtd_raid, devid, subdev_off, size,
+ &address, &length);
+ if (ret)
+ goto out;
+
+ ret = mtd_raid->ops->logical_to_physical(mtd_raid, address, length, i_copy,
+ &devid, &subdev_off, &size);
+ if (ret)
+ goto out;
+
+ /* Fill request with the address of new copy */
+ request->raid_dev = &mtd_raid->devs[devid];
+ read_req->from = subdev_off;
+ read_req->len = size;
+out:
+ return ret;
+}
+
+static int read_oob_req_retry_func(struct mtd_raid_request *request, int i_copy)
+{
+ struct mtd_raid *mtd_raid;
+ struct mtd_raid_read_oob_request *read_oob_req;
+ loff_t address, subdev_off;
+ size_t length, size;
+ int devid, ret = 0;
+
+ mtd_raid = request->raid_dev->raid;
+ read_oob_req = READ_OOB_REQUEST(request);
+ if (!mtd_raid->ops->physical_to_logical || !mtd_raid->ops->logical_to_physical)
+ return -EINVAL;
+
+ subdev_off = read_oob_req->from;
+ size = read_oob_req->len;
+ devid = request->raid_dev->id;
+ ret = mtd_raid->ops->physical_to_logical(mtd_raid, devid, subdev_off, size,
+ &address, &length);
+ if (ret)
+ goto out;
+
+ ret = mtd_raid->ops->logical_to_physical(mtd_raid, address, length, i_copy,
+ &devid, &subdev_off, &size);
+ if (ret)
+ goto out;
+
+ /* Fill request with the address of new copy */
+ request->raid_dev = &mtd_raid->devs[devid];
+ read_oob_req->from = subdev_off;
+ read_oob_req->len = size;
+out:
+ return ret;
+}
+
+/* Generic end_func for request */
+static void request_end_func(struct mtd_raid_request *request)
+{
+ struct mtd_raid_ctx *ctx = NULL;
+ struct mtd_raid_dev *raid_dev = NULL;
+
+ ctx = request->ctx;
+ spin_lock(&ctx->lock);
+ list_del(&request->node);
+ list_del(&request->node_all);
+ spin_unlock(&ctx->lock);
+
+ raid_dev = request->raid_dev;
+ spin_lock(&raid_dev->lock);
+ list_del(&request->node_request);
+ spin_unlock(&raid_dev->lock);
+}
+
+static void read_req_end_func(struct mtd_raid_request *request)
+{
+ struct mtd_raid_read_request *read_req;
+
+ read_req = READ_REQUEST(request);
+ request_end_func(request);
+ kfree(read_req);
+}
+
+static void read_oob_req_end_func(struct mtd_raid_request *request)
+{
+ struct mtd_raid_read_oob_request *read_oob_req;
+
+ read_oob_req = READ_OOB_REQUEST(request);
+ request_end_func(request);
+ kfree(read_oob_req);
+}
+
+/**
+ * Thread for each raid_dev.
+ *
+ * It get requests from raid_dev->list and do the
+ * requested work, until raid_dev to be empty. Then
+ * go to sleep.
+ */
+int mtd_raid_dev_thread(void *u)
+{
+ struct mtd_raid_dev *raid_dev = u;
+ struct mtd_raid_request *request;
+
+ set_freezable();
+ for (;;) {
+ if (kthread_should_stop())
+ break;
+
+ if (try_to_freeze())
+ continue;
+
+ spin_lock(&raid_dev->lock);
+ if (list_empty(&raid_dev->list)) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock(&raid_dev->lock);
+ schedule();
+ continue;
+ }
+ /*
+ * Get the first request from request list.
+ **/
+ request = list_first_entry(&raid_dev->list,
+ struct mtd_raid_request, node_request);
+ list_del_init(&request->node_request);
+ spin_unlock(&raid_dev->lock);
+
+ if (request->func)
+ request->func(request);
+ cond_resched();
+ }
+
+ return 0;
+}
+
+/* Interfaces of raid_dev */
+
+/*
+ * read interface for raid_dev.
+ * */
+int mtd_raid_dev_read(struct mtd_raid_ctx *ctx, struct mtd_raid_dev *raid_dev,
+ loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+ struct mtd_raid_read_request *read_req;
+ struct mtd_raid_request *request;
+ int ret = 0;
+
+ /* Alloc a request */
+ read_req = kzalloc(sizeof(*read_req), GFP_KERNEL);
+ if (!read_req)
+ goto out;
+
+ request = &read_req->request;
+ /* Init the request */
+ INIT_LIST_HEAD(&request->node);
+ INIT_LIST_HEAD(&request->node_all);
+ INIT_LIST_HEAD(&request->node_request);
+
+ request->ctx = ctx;
+ request->raid_dev = raid_dev;
+ request->type = MTD_RAID_REQ_READ;
+ request->func = read_req_func;
+ request->retry_func = read_req_retry_func;
+ request->end_func = read_req_end_func;
+
+ /* Init read_request */
+ read_req->from = from;
+ read_req->len = len;
+ read_req->buf = buf;
+
+ /* Add request to context */
+ spin_lock(&ctx->lock);
+ list_add_tail(&request->node, &ctx->submit_list);
+ list_add_tail(&request->node_all, &ctx->all_list);
+ spin_unlock(&ctx->lock);
+
+ /* Dispatch request to related raid_dev */
+ spin_lock(&raid_dev->lock);
+ list_add_tail(&request->node_request, &raid_dev->list);
+ spin_unlock(&raid_dev->lock);
+
+ /* Wakeup background thread to handle requests */
+ wake_up_process(raid_dev->thread);
+
+ *retlen = len;
+out:
+ return ret;
+}
+
+/*
+ * read_oob interface for raid_dev.
+ */
+int mtd_raid_dev_read_oob(struct mtd_raid_ctx *ctx, struct mtd_raid_dev *raid_dev,
+ loff_t from, size_t len, struct mtd_oob_ops *ops)
+{
+ struct mtd_raid_read_oob_request *read_oob_req;
+ struct mtd_raid_request *request;
+ int ret = 0;
+
+ /* Alloc a request */
+ read_oob_req = kzalloc(sizeof(*read_oob_req), GFP_KERNEL);
+ if (!read_oob_req)
+ goto out;
+
+ request = &read_oob_req->request;
+ /* Init the request */
+ INIT_LIST_HEAD(&request->node);
+ INIT_LIST_HEAD(&request->node_all);
+ INIT_LIST_HEAD(&request->node_request);
+
+ request->ctx = ctx;
+ request->raid_dev = raid_dev;
+ request->type = MTD_RAID_REQ_READ_OOB;
+ request->func = read_oob_req_func;
+ request->retry_func = read_oob_req_retry_func;
+ request->end_func = read_oob_req_end_func;
+
+ /* Init read_request */
+ read_oob_req->from = from;
+ read_oob_req->len = len;
+ memcpy(&read_oob_req->ops, ops, sizeof(*ops));
+
+ /* Add request to context */
+ spin_lock(&ctx->lock);
+ list_add_tail(&request->node, &ctx->submit_list);
+ list_add_tail(&request->node_all, &ctx->all_list);
+ spin_unlock(&ctx->lock);
+
+ /* Dispatch request to related raid_dev */
+ spin_lock(&raid_dev->lock);
+ list_add_tail(&request->node_request, &raid_dev->list);
+ spin_unlock(&raid_dev->lock);
+
+ /* Wakeup background thread to handle requests */
+ wake_up_process(raid_dev->thread);
+out:
+ return ret;
+}
+
+/*
+ * erase interface for raid_dev.
+ * */
+int mtd_raid_dev_erase(struct mtd_raid_dev *raid_dev, struct erase_info *erase)
+{
+ int err = 0;
+ struct mtd_info *mtd = raid_dev->mtd;
+
+ erase->mtd = mtd;
+ erase->len = mtd->erasesize;
+ err = mtd_erase(mtd, erase);
+ if (err)
+ goto out;
+
+ if (erase->state != MTD_ERASE_DONE) {
+ err = -EIO;
+ goto out;
+ }
+out:
+ return err;
+}
new file mode 100644
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2015, see mtd/mtd-raid for licensing and copyright details
+ */
+#ifndef __MTD_RAID_USER_H__
+#define __MTD_RAID_USER_H__
+
+#include <linux/types.h>
+#include <linux/magic.h>
+
+/* ioctl's command */
+#define MTD_RAID_IOC_CREATE _IOW(0xFE, 1, struct mtd_raid_create_req)
+#define MTD_RAID_IOC_DESTROY _IOW(0xFE, 2, struct mtd_raid_destroy_req)
+
+enum mtd_raid_level {
+ MTD_RAID_LEVEL_SINGLE = 0,
+ MTD_RAID_LEVEL_RAID0,
+ MTD_RAID_LEVEL_RAID1,
+ MTD_RAID_LEVEL_MAX
+};
+
+struct mtd_raid_create_req {
+ __u8 raid_level;
+ __u8 reserved[3];
+ __u32 dev_count;
+ __u64 substripe_size;
+ __u32 mtd_nums[0];
+};
+
+struct mtd_raid_destroy_req {
+ __u32 mtd_num;
+};
+
+#endif /* __MTD_RAID_USER_H__ */
MTD RAID layer is a generic raid layer in mtd subsystem. we can implement different raid level in this framework. Signed-off-by: Dongsheng Yang <yangds.fnst@cn.fujitsu.com> --- Documentation/ioctl/ioctl-number.txt | 1 + drivers/mtd/Kconfig | 2 + drivers/mtd/Makefile | 1 + drivers/mtd/mtd_raid/Kconfig | 12 + drivers/mtd/mtd_raid/Makefile | 3 + drivers/mtd/mtd_raid/core.c | 1103 ++++++++++++++++++++++++++++++++++ drivers/mtd/mtd_raid/mtd_raid.h | 273 +++++++++ drivers/mtd/mtd_raid/raid_io.c | 449 ++++++++++++++ include/uapi/mtd/mtd-raid-user.h | 33 + 9 files changed, 1877 insertions(+) create mode 100644 drivers/mtd/mtd_raid/Kconfig create mode 100644 drivers/mtd/mtd_raid/Makefile create mode 100644 drivers/mtd/mtd_raid/core.c create mode 100644 drivers/mtd/mtd_raid/mtd_raid.h create mode 100644 drivers/mtd/mtd_raid/raid_io.c create mode 100644 include/uapi/mtd/mtd-raid-user.h