From patchwork Tue Nov 3 14:12:09 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Denis V. Lunev" X-Patchwork-Id: 539436 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 0D74A140311 for ; Wed, 4 Nov 2015 01:23:34 +1100 (AEDT) Received: from localhost ([::1]:48850 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZtcUm-0001lW-NO for incoming@patchwork.ozlabs.org; Tue, 03 Nov 2015 09:23:32 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:54348) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZtcKF-0000Kp-GH for qemu-devel@nongnu.org; Tue, 03 Nov 2015 09:12:41 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZtcKD-0007dN-St for qemu-devel@nongnu.org; Tue, 03 Nov 2015 09:12:39 -0500 Received: from mailhub.sw.ru ([195.214.232.25]:2761 helo=relay.sw.ru) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZtcK2-0007WA-1x; Tue, 03 Nov 2015 09:12:26 -0500 Received: from hades.sw.ru ([10.30.8.132]) by relay.sw.ru (8.13.4/8.13.4) with ESMTP id tA3ECDlS017329; Tue, 3 Nov 2015 17:12:24 +0300 (MSK) From: "Denis V. Lunev" To: Date: Tue, 3 Nov 2015 17:12:09 +0300 Message-Id: <1446559933-28965-7-git-send-email-den@openvz.org> X-Mailer: git-send-email 2.5.0 In-Reply-To: <1446559933-28965-1-git-send-email-den@openvz.org> References: <1446559933-28965-1-git-send-email-den@openvz.org> X-detected-operating-system: by eggs.gnu.org: OpenBSD 3.x X-Received-From: 195.214.232.25 Cc: Kevin Wolf , "Denis V. Lunev" , qemu-devel@nongnu.org, Stefan Hajnoczi , qemu-stable@nongnu.org Subject: [Qemu-devel] [PATCH 06/10] io: guard aio_poll with aio_context_acquire X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org There is no problem if this is called from iothread, when AioContext is properly acquired. Unfortunately, this code is called from HMP thread and this leads to a disaster. HMP thread IO thread (in aio_poll) | | qemu_coroutine_enter | while (rwco.ret == NOT_DONE) | aio_poll | aio_context_acquire | | ret from qemu_poll_ns | aio_context_acquire (nested = 2) | process bdrv_rw_co_entry, set rwco.ret | aio_context_release (nested = ) | reenters aio_poll, clear events | aio_context_release aio_context_release qemu_poll_ns In this case HMP thread will be never waked up. Alas. This means that all such patterns MUST be guarded with aio_context_is_owner checks, but this is terrible as if we'll find all such places we can fix them with ease. Another approach would be to take the lock at the very top (at the beginning of the operation) but this is much more difficult and leads to spreading of aio_context_acquire to a lot of unrelated pieces. Signed-off-by: Denis V. Lunev CC: Stefan Hajnoczi CC: Kevin Wolf --- block.c | 5 ++++- block/curl.c | 3 +++ block/io.c | 11 +++++++++++ block/iscsi.c | 2 ++ block/nfs.c | 5 +++++ block/qed-table.c | 20 ++++++++++++++++---- block/sheepdog.c | 2 ++ blockjob.c | 6 ++++++ qemu-io-cmds.c | 6 +++++- 9 files changed, 54 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index 98b0b66..cf858a7 100644 --- a/block.c +++ b/block.c @@ -359,11 +359,14 @@ int bdrv_create(BlockDriver *drv, const char* filename, /* Fast-path if already in coroutine context */ bdrv_create_co_entry(&cco); } else { + AioContext *ctx = qemu_get_aio_context(); co = qemu_coroutine_create(bdrv_create_co_entry); + aio_context_acquire(ctx); qemu_coroutine_enter(co, &cco); while (cco.ret == NOT_DONE) { - aio_poll(qemu_get_aio_context(), true); + aio_poll(ctx, true); } + aio_context_release(ctx); } ret = cco.ret; diff --git a/block/curl.c b/block/curl.c index 8994182..33c024d 100644 --- a/block/curl.c +++ b/block/curl.c @@ -378,6 +378,7 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s) { CURLState *state = NULL; int i, j; + AioContext *ctx = bdrv_get_aio_context(bs); do { for (i=0; iaio_context); iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status); while (status == -EINPROGRESS) { aio_poll(iscsilun->aio_context, true); } + aio_context_release(iscsilun->aio_context); return 0; default: diff --git a/block/nfs.c b/block/nfs.c index fd79f89..36ec1e1 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -462,6 +462,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) NFSClient *client = bs->opaque; NFSRPC task = {0}; struct stat st; + AioContext *ctx; if (bdrv_is_read_only(bs) && !(bs->open_flags & BDRV_O_NOCACHE)) { @@ -469,8 +470,11 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) } task.st = &st; + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb, &task) != 0) { + aio_context_release(ctx); return -ENOMEM; } @@ -478,6 +482,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) nfs_set_events(client); aio_poll(client->aio_context, true); } + aio_context_release(ctx); return (task.ret < 0 ? task.ret : st.st_blocks * 512); } diff --git a/block/qed-table.c b/block/qed-table.c index f4219b8..fa13aba 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -169,12 +169,15 @@ static void qed_sync_cb(void *opaque, int ret) int qed_read_l1_table_sync(BDRVQEDState *s) { int ret = -EINPROGRESS; + AioContext *ctx = bdrv_get_aio_context(s->bs); + aio_context_acquire(ctx); qed_read_table(s, s->header.l1_table_offset, s->l1_table, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { - aio_poll(bdrv_get_aio_context(s->bs), true); + aio_poll(ctx, true); } + aio_context_release(ctx); return ret; } @@ -191,11 +194,14 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, unsigned int n) { int ret = -EINPROGRESS; + AioContext *ctx = bdrv_get_aio_context(s->bs); + aio_context_acquire(ctx); qed_write_l1_table(s, index, n, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { - aio_poll(bdrv_get_aio_context(s->bs), true); + aio_poll(ctx, true); } + aio_context_release(ctx); return ret; } @@ -264,11 +270,14 @@ void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset, int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset) { int ret = -EINPROGRESS; + AioContext *ctx = bdrv_get_aio_context(s->bs); + aio_context_acquire(ctx); qed_read_l2_table(s, request, offset, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { - aio_poll(bdrv_get_aio_context(s->bs), true); + aio_poll(ctx, true); } + aio_context_release(ctx); return ret; } @@ -286,11 +295,14 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, unsigned int index, unsigned int n, bool flush) { int ret = -EINPROGRESS; + AioContext *ctx = bdrv_get_aio_context(s->bs); + aio_context_acquire(ctx); qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret); while (ret == -EINPROGRESS) { - aio_poll(bdrv_get_aio_context(s->bs), true); + aio_poll(ctx, true); } + aio_context_release(ctx); return ret; } diff --git a/block/sheepdog.c b/block/sheepdog.c index d80e4ed..038a385 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -715,11 +715,13 @@ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr, if (qemu_in_coroutine()) { do_co_req(&srco); } else { + aio_context_acquire(aio_context); co = qemu_coroutine_create(do_co_req); qemu_coroutine_enter(co, &srco); while (!srco.finished) { aio_poll(aio_context, true); } + aio_context_release(aio_context); } return srco.ret; diff --git a/blockjob.c b/blockjob.c index c02fe59..9ddb958 100644 --- a/blockjob.c +++ b/blockjob.c @@ -194,6 +194,7 @@ static int block_job_finish_sync(BlockJob *job, struct BlockFinishData data; BlockDriverState *bs = job->bs; Error *local_err = NULL; + AioContext *ctx; assert(bs->job == job); @@ -206,14 +207,19 @@ static int block_job_finish_sync(BlockJob *job, data.ret = -EINPROGRESS; job->cb = block_job_finish_cb; job->opaque = &data; + + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); finish(job, &local_err); if (local_err) { + aio_context_release(ctx); error_propagate(errp, local_err); return -EBUSY; } while (data.ret == -EINPROGRESS) { aio_poll(bdrv_get_aio_context(bs), true); } + aio_context_release(ctx); return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; } diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 6e5d1e4..45299cd 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -474,12 +474,16 @@ static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int count, .total = total, .done = false, }; + AioContext *ctx = blk_get_aio_context(blk); + aio_context_acquire(ctx); co = qemu_coroutine_create(co_write_zeroes_entry); qemu_coroutine_enter(co, &data); while (!data.done) { - aio_poll(blk_get_aio_context(blk), true); + aio_poll(ctx, true); } + aio_context_release(ctx); + if (data.ret < 0) { return data.ret; } else {