From patchwork Wed Sep 2 08:51:10 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wen Congyang X-Patchwork-Id: 513412 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 5AF5C1401F0 for ; Wed, 2 Sep 2015 18:59:38 +1000 (AEST) Received: from localhost ([::1]:35324 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZX3tI-0003DI-FT for incoming@patchwork.ozlabs.org; Wed, 02 Sep 2015 04:59:36 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:50481) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZX3m5-0007Vz-OX for qemu-devel@nongnu.org; Wed, 02 Sep 2015 04:52:11 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZX3m4-0004TU-7P for qemu-devel@nongnu.org; Wed, 02 Sep 2015 04:52:09 -0400 Received: from [59.151.112.132] (port=26837 helo=heian.cn.fujitsu.com) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZX3m3-0004Bl-HW; Wed, 02 Sep 2015 04:52:08 -0400 X-IronPort-AV: E=Sophos;i="5.15,520,1432569600"; d="scan'208";a="100276472" Received: from unknown (HELO edo.cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 02 Sep 2015 16:55:03 +0800 Received: from G08CNEXCHPEKD01.g08.fujitsu.local (localhost.localdomain [127.0.0.1]) by edo.cn.fujitsu.com (8.14.3/8.13.1) with ESMTP id t828poR3020284; Wed, 2 Sep 2015 16:51:50 +0800 Received: from G08FNSTD140052.g08.fujitsu.local (10.167.226.52) by G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server (TLS) id 14.3.181.6; Wed, 2 Sep 2015 16:51:59 +0800 From: Wen Congyang To: qemu devel , Fam Zheng , Max Reitz , Paolo Bonzini , Stefan Hajnoczi Date: Wed, 2 Sep 2015 16:51:10 +0800 Message-ID: <1441183880-26993-7-git-send-email-wency@cn.fujitsu.com> X-Mailer: git-send-email 2.4.3 In-Reply-To: <1441183880-26993-1-git-send-email-wency@cn.fujitsu.com> References: <1441183880-26993-1-git-send-email-wency@cn.fujitsu.com> MIME-Version: 1.0 X-Originating-IP: [10.167.226.52] X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 59.151.112.132 Cc: Kevin Wolf , Alberto Garcia , qemu block , Jiang Yunhong , Dong Eddie , "Dr. David Alan Gilbert" , "Michael R. Hines" , Gonglei , Yang Hongyang , zhanghailiang Subject: [Qemu-devel] [PATCH 06/16] quorum: allow ignoring child errors X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org If the child is not ready, read/write/getlength/flush will return -errno. It is not critical error, and can be ignored: 1. read/write: Just not report the error event. 2. getlength: just ignore it. If all children's getlength return -errno, and be ignored, return -EIO. 3. flush: Just ignore it. If all children's getlength return -errno, and be ignored, return 0. Usage: children.x.ignore-errors=true Signed-off-by: Wen Congyang Signed-off-by: zhanghailiang Signed-off-by: Gonglei Cc: Alberto Garcia --- block/quorum.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++---- qapi/block-core.json | 5 ++- 2 files changed, 91 insertions(+), 8 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 8059861..f23dbb7 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -30,6 +30,7 @@ #define QUORUM_OPT_BLKVERIFY "blkverify" #define QUORUM_OPT_REWRITE "rewrite-corrupted" #define QUORUM_OPT_READ_PATTERN "read-pattern" +#define QUORUM_CHILDREN_OPT_IGNORE_ERRORS "ignore-errors" /* This union holds a vote hash value */ typedef union QuorumVoteValue { @@ -65,6 +66,7 @@ typedef struct QuorumVotes { /* the following structure holds the state of one quorum instance */ typedef struct BDRVQuorumState { BlockDriverState **bs; /* children BlockDriverStates */ + bool *ignore_errors; /* ignore children's error? */ int num_children; /* children count */ int max_children; /* The maximum children count, we need to reallocate * bs if num_children will larger than maximum. @@ -100,6 +102,7 @@ typedef struct QuorumChildRequest { uint8_t *buf; int ret; QuorumAIOCB *parent; + int index; } QuorumChildRequest; /* Quorum will use the following structure to track progress of each read/write @@ -212,6 +215,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, acb->qcrs[i].buf = NULL; acb->qcrs[i].ret = 0; acb->qcrs[i].parent = acb; + acb->qcrs[i].index = i; } return acb; @@ -305,7 +309,7 @@ static void quorum_aio_cb(void *opaque, int ret) acb->count++; if (ret == 0) { acb->success_count++; - } else { + } else if (!s->ignore_errors[sacb->index]) { quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret); } assert(acb->count <= s->num_children); @@ -716,19 +720,31 @@ static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs, static int64_t quorum_getlength(BlockDriverState *bs) { BDRVQuorumState *s = bs->opaque; - int64_t result; + int64_t result = -EIO; int i; /* check that all file have the same length */ - result = bdrv_getlength(s->bs[0]); - if (result < 0) { - return result; - } - for (i = 1; i < s->num_children; i++) { + for (i = 0; i < s->num_children; i++) { int64_t value = bdrv_getlength(s->bs[i]); + if (value < 0) { return value; } + + if (value == 0 && s->ignore_errors[i]) { + /* + * If the child is not ready, it cannot return -errno, + * otherwise refresh_total_sectors() will fail when + * we open the child. + */ + continue; + } + + if (result == -EIO) { + result = value; + continue; + } + if (value != result) { return -EIO; } @@ -766,6 +782,9 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs) for (i = 0; i < s->num_children; i++) { result = bdrv_co_flush(s->bs[i]); + if (result < 0 && s->ignore_errors[i]) { + result = 0; + } result_value.l = result; quorum_count_vote(&error_votes, &result_value, i); } @@ -840,6 +859,19 @@ static QemuOptsList quorum_runtime_opts = { }, }; +static QemuOptsList quorum_children_common_opts = { + .name = "quorum children", + .head = QTAILQ_HEAD_INITIALIZER(quorum_children_common_opts.head), + .desc = { + { + .name = QUORUM_CHILDREN_OPT_IGNORE_ERRORS, + .type = QEMU_OPT_BOOL, + .help = "ignore child I/O error", + }, + { /* end of list */ } + }, +}; + static int parse_read_pattern(const char *opt) { int i; @@ -858,6 +890,37 @@ static int parse_read_pattern(const char *opt) return -EINVAL; } +static int parse_children_options(BDRVQuorumState *s, QDict *options, + const char *indexstr, int index, + Error **errp) +{ + QemuOpts *children_opts = NULL; + Error *local_err = NULL; + int ret = 0; + bool value; + + children_opts = qemu_opts_create(&quorum_children_common_opts, NULL, 0, + &error_abort); + qemu_opts_absorb_qdict_by_index(children_opts, options, indexstr, + &local_err); + if (local_err) { + ret = -EINVAL; + goto out; + } + + value = qemu_opt_get_bool(children_opts, QUORUM_CHILDREN_OPT_IGNORE_ERRORS, + false); + s->ignore_errors[index] = value; + +out: + qemu_opts_del(children_opts); + /* propagate error */ + if (local_err) { + error_propagate(errp, local_err); + } + return ret; +} + static int quorum_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -929,12 +992,18 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, s->bs = g_new0(BlockDriverState *, s->num_children); opened = g_new0(bool, s->num_children); s->max_children = s->num_children; + s->ignore_errors = g_new0(bool, s->num_children); for (i = 0; i < s->num_children; i++) { char indexstr[32]; ret = snprintf(indexstr, 32, "children.%d", i); assert(ret < 32); + ret = parse_children_options(s, options, indexstr, i, &local_err); + if (ret < 0) { + goto close_exit; + } + ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, bs, &child_format, false, &local_err); if (ret < 0) { @@ -976,6 +1045,7 @@ static void quorum_close(BlockDriverState *bs) } g_free(s->bs); + g_free(s->ignore_errors); } static void quorum_detach_aio_context(BlockDriverState *bs) @@ -1014,10 +1084,18 @@ static void quorum_add_child(BlockDriverState *bs, QDict *options, Error **errp) } s->bs = g_renew(BlockDriverState *, s->bs, s->max_children + 1); + s->ignore_errors = g_renew(bool, s->ignore_errors, s->max_children + 1); s->bs[s->num_children] = NULL; s->max_children += 1; } + ret = parse_children_options(s, options, "child", s->num_children, + &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return; + } + ret = bdrv_open_image(&s->bs[s->num_children], NULL, options, "child", bs, &child_format, false, &local_err); if (ret < 0) { @@ -1058,6 +1136,8 @@ static void quorum_del_child(BlockDriverState *bs, BlockDriverState *child_bs, bdrv_drain(bs); /* We can safely remove this child now */ memmove(&s->bs[i], &s->bs[i+1], (s->num_children - i - 1) * sizeof(void *)); + memmove(&s->ignore_errors[i], &s->ignore_errors[i+1], + (s->num_children - i - 1) * sizeof(bool)); s->num_children--; s->bs[s->num_children] = NULL; bdrv_unref(child_bs); diff --git a/qapi/block-core.json b/qapi/block-core.json index bf141a2..24099ef 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1411,6 +1411,8 @@ # @allow-write-backing-file: #optional whether the backing file is opened in # read-write mode. It is only for backing file # (Since 2.5 default: false) +# @ignore-errors: #options whether the child's I/O error should be ignored. +# it is only for quorum's child.(Since 2.5 default: false) # # Since: 1.7 ## @@ -1425,7 +1427,8 @@ '*werror': 'BlockdevOnError', '*read-only': 'bool', '*detect-zeroes': 'BlockdevDetectZeroesOptions', - '*allow-write-backing-file': 'bool' } } + '*allow-write-backing-file': 'bool', + '*ignore-errors': 'bool' } } ## # @BlockdevOptionsFile