@@ -1296,6 +1296,7 @@ static void GRAPH_WRLOCK bdrv_backing_attach(BdrvChild *c)
parent->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
parent->backing_blocker);
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_INFO, parent->backing_blocker);
}
static void bdrv_backing_detach(BdrvChild *c)
@@ -1191,6 +1191,7 @@ static void mirror_complete(Job *job, Error **errp)
error_setg(&s->replace_blocker,
"block device is in use by block-job-complete");
bdrv_op_block_all(s->to_replace, s->replace_blocker);
+ bdrv_op_unblock(s->to_replace, BLOCK_OP_TYPE_INFO, s->replace_blocker);
bdrv_ref(s->to_replace);
}
@@ -2834,6 +2834,7 @@ qcow2_co_invalidate_cache(BlockDriverState *bs, Error **errp)
BdrvChild *data_file;
int flags = s->flags;
QCryptoBlock *crypto = NULL;
+ Error *blocker = NULL;
QDict *options;
int ret;
@@ -2845,6 +2846,17 @@ qcow2_co_invalidate_cache(BlockDriverState *bs, Error **errp)
crypto = s->crypto;
s->crypto = NULL;
+ /*
+ * When qcow2_do_open() below reads the qcow header, it yields to
+ * wait for the I/O which allows a concurrent QMP query-block
+ * command to be dispatched on the same context before
+ * BDRVQcow2State has been completely repopulated. Block the
+ * query-info operation during this window to avoid having
+ * qcow2_get_specific_info() access bogus values.
+ */
+ error_setg(&blocker, "invalidating cached metadata");
+ bdrv_op_block(bs, BLOCK_OP_TYPE_INFO, blocker);
+
/*
* Do not reopen s->data_file (i.e., have qcow2_do_close() not close it,
* and then prevent qcow2_do_open() from opening it), because this function
@@ -2864,6 +2876,8 @@ qcow2_co_invalidate_cache(BlockDriverState *bs, Error **errp)
qemu_co_mutex_lock(&s->lock);
ret = qcow2_do_open(bs, options, flags, false, errp);
qemu_co_mutex_unlock(&s->lock);
+ bdrv_op_unblock(bs, BLOCK_OP_TYPE_INFO, blocker);
+ g_free(blocker);
qobject_unref(options);
if (ret < 0) {
error_prepend(errp, "Could not reopen qcow2 layer: ");
@@ -5240,6 +5254,12 @@ qcow2_get_specific_info(BlockDriverState *bs, Error **errp)
ImageInfoSpecific *spec_info;
QCryptoBlockInfo *encrypt_info = NULL;
+ if (qemu_in_coroutine() &&
+ bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INFO, errp)) {
+ errp = NULL;
+ aio_co_reschedule_self(iohandler_get_aio_context());
+ }
+
if (s->crypto != NULL) {
encrypt_info = qcrypto_block_get_info(s->crypto, errp);
if (!encrypt_info) {
@@ -577,6 +577,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
}
bdrv_op_block_all(top_bs, s->blocker);
bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
+ bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_INFO, s->blocker);
bdrv_graph_wrunlock();
@@ -244,6 +244,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
job->nodes = g_slist_prepend(job->nodes, c);
bdrv_op_block_all(bs, job->blocker);
+ bdrv_op_unblock(bs, BLOCK_OP_TYPE_INFO, job->blocker);
return 0;
}
@@ -364,6 +364,7 @@ typedef enum BlockOpType {
BLOCK_OP_TYPE_RESIZE,
BLOCK_OP_TYPE_STREAM,
BLOCK_OP_TYPE_REPLACE,
+ BLOCK_OP_TYPE_INFO,
BLOCK_OP_TYPE_MAX,
} BlockOpType;
@@ -451,6 +451,7 @@ static int init_blk_migration(QEMUFile *f)
alloc_aio_bitmap(bmds);
error_setg(&bmds->blocker, "block device is in use by migration");
bdrv_op_block_all(bs, bmds->blocker);
+ bdrv_op_unblock(bs, BLOCK_OP_TYPE_INFO, bmds->blocker);
}
}
There is a small window at the end of block device migration when devices are being re-activated. This includes a resetting of some fields of BDRVQcow2State at qcow2_co_invalidate_cache(). A concurrent QMP query-block command can call qcow2_get_specific_info() during this window and see the cleared values, which leads to an assert: qcow2_get_specific_info: Assertion `false' failed This is the same issue as Gitlab #1933, which has already been resolved[1], but there the fix applied only to non-coroutine commands. Once we move query-block to a coroutine the problem will manifest again. Add an operation blocker to the invalidation function to block the query info path during this window. Instead of failing query-block, which would be disruptive to users, use the blocker to know when to reschedule the coroutine back into the iohandler so it doesn't run while the BDRVQcow2State is inconsistent. To avoid failing query-block when all block operations are blocked, unblock the INFO operation at various places. This preserves the prior situations where query-block used to work. 1 - https://gitlab.com/qemu-project/qemu/-/issues/1933 Signed-off-by: Fabiano Rosas <farosas@suse.de> --- block.c | 1 + block/mirror.c | 1 + block/qcow2.c | 20 ++++++++++++++++++++ block/replication.c | 1 + blockjob.c | 1 + include/block/block-common.h | 1 + migration/block.c | 1 + 7 files changed, 26 insertions(+)