@@ -207,8 +207,6 @@ void job_user_cancel(Job *job, bool force, Error **errp);
*
* Returns the return value from the job if the job actually completed
* during the call, or -ECANCELED if it was canceled.
- *
- * Callers must hold the AioContext lock of job->aio_context.
*/
int job_cancel_sync(Job *job, bool force);
@@ -233,8 +231,6 @@ void job_cancel_sync_all(void);
*
* Returns the return value from the job.
*
- * Callers must hold the AioContext lock of job->aio_context.
- *
* Called between job_lock and job_unlock.
*/
int job_complete_sync(Job *job, Error **errp);
@@ -266,8 +262,6 @@ void job_dismiss(Job **job, Error **errp);
* Returns 0 if the job is successfully completed, -ECANCELED if the job was
* cancelled before completing, and -errno in other error cases.
*
- * Callers must hold the AioContext lock of job->aio_context.
- *
* Called between job_lock and job_unlock, but it releases the lock temporarly.
*/
int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp);
@@ -728,9 +728,11 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
* disk, secondary disk in backup_job_completed().
*/
if (s->backup_job) {
+ aio_context_release(aio_context);
job_lock();
job_cancel_sync(&s->backup_job->job, true);
job_unlock();
+ aio_context_acquire(aio_context);
}
if (!failover) {
@@ -154,12 +154,7 @@ void blockdev_mark_auto_del(BlockBackend *blk)
for (job = block_job_next(NULL); job; job = block_job_next(job)) {
if (block_job_has_bdrv(job, blk_bs(blk))) {
- AioContext *aio_context = job->job.aio_context;
- aio_context_acquire(aio_context);
-
job_cancel(&job->job, false);
-
- aio_context_release(aio_context);
}
}
@@ -1843,16 +1838,9 @@ static void drive_backup_abort(BlkActionState *common)
DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
if (state->job) {
- AioContext *aio_context;
-
- aio_context = bdrv_get_aio_context(state->bs);
- aio_context_acquire(aio_context);
-
job_lock();
job_cancel_sync(&state->job->job, true);
job_unlock();
-
- aio_context_release(aio_context);
}
}
@@ -1946,16 +1934,9 @@ static void blockdev_backup_abort(BlkActionState *common)
BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
if (state->job) {
- AioContext *aio_context;
-
- aio_context = bdrv_get_aio_context(state->bs);
- aio_context_acquire(aio_context);
-
job_lock();
job_cancel_sync(&state->job->job, true);
job_unlock();
-
- aio_context_release(aio_context);
}
}
@@ -168,7 +168,6 @@ static int job_txn_apply(Job *job, int fn(Job *))
* break AIO_WAIT_WHILE from within fn.
*/
job_ref(job);
- aio_context_release(job->aio_context);
QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
rc = fn(other_job);
@@ -177,11 +176,6 @@ static int job_txn_apply(Job *job, int fn(Job *))
}
}
- /*
- * Note that job->aio_context might have been changed by calling fn, so we
- * can't use a local variable to cache it.
- */
- aio_context_acquire(job->aio_context);
job_unref(job);
return rc;
}
@@ -510,7 +504,10 @@ void job_unref(Job *job)
if (job->driver->free) {
job_unlock();
+ /* FIXME: aiocontext lock is required because cb calls blk_unref */
+ aio_context_acquire(job->aio_context);
job->driver->free(job);
+ aio_context_release(job->aio_context);
job_lock();
}
@@ -978,7 +975,6 @@ static void job_cancel_async(Job *job, bool force)
/* Called with job_mutex held. */
static void job_completed_txn_abort(Job *job)
{
- AioContext *ctx;
JobTxn *txn = job->txn;
Job *other_job;
@@ -991,54 +987,28 @@ static void job_completed_txn_abort(Job *job)
txn->aborting = true;
job_txn_ref(txn);
- /*
- * We can only hold the single job's AioContext lock while calling
- * job_finalize_single() because the finalization callbacks can involve
- * calls of AIO_WAIT_WHILE(), which could deadlock otherwise.
- * Note that the job's AioContext may change when it is finalized.
- */
- job_ref(job);
- aio_context_release(job->aio_context);
-
/* Other jobs are effectively cancelled by us, set the status for
* them; this job, however, may or may not be cancelled, depending
* on the caller, so leave it. */
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (other_job != job) {
- ctx = other_job->aio_context;
- aio_context_acquire(ctx);
/*
* This is a transaction: If one job failed, no result will matter.
* Therefore, pass force=true to terminate all other jobs as quickly
* as possible.
*/
job_cancel_async(other_job, true);
- aio_context_release(ctx);
}
}
while (!QLIST_EMPTY(&txn->jobs)) {
other_job = QLIST_FIRST(&txn->jobs);
- /*
- * The job's AioContext may change, so store it in @ctx so we
- * release the same context that we have acquired before.
- */
- ctx = other_job->aio_context;
- aio_context_acquire(ctx);
if (!job_is_completed(other_job)) {
assert(job_cancel_requested_locked(other_job));
job_finish_sync(other_job, NULL, NULL);
}
job_finalize_single(other_job);
- aio_context_release(ctx);
}
- /*
- * Use job_ref()/job_unref() so we can read the AioContext here
- * even if the job went away during job_finalize_single().
- */
- aio_context_acquire(job->aio_context);
- job_unref(job);
-
job_txn_unref(txn);
}
@@ -1159,11 +1129,8 @@ static void job_completed(Job *job)
static void job_exit(void *opaque)
{
Job *job = (Job *)opaque;
- AioContext *ctx;
job_lock();
- job_ref(job);
- aio_context_acquire(job->aio_context);
/* This is a lie, we're not quiescent, but still doing the completion
* callbacks. However, completion callbacks tend to involve operations that
@@ -1174,16 +1141,7 @@ static void job_exit(void *opaque)
job_completed(job);
- /*
- * Note that calling job_completed can move the job to a different
- * aio_context, so we cannot cache from above. job_txn_apply takes care of
- * acquiring the new lock, and we ref/unref to avoid job_completed freeing
- * the job underneath us.
- */
- ctx = job->aio_context;
- job_unref(job);
job_unlock();
- aio_context_release(ctx);
}
/**
@@ -1297,14 +1255,10 @@ int job_cancel_sync(Job *job, bool force)
void job_cancel_sync_all(void)
{
Job *job;
- AioContext *aio_context;
job_lock();
while ((job = job_next(NULL))) {
- aio_context = job->aio_context;
- aio_context_acquire(aio_context);
job_cancel_sync(job, true);
- aio_context_release(aio_context);
}
job_unlock();
}
@@ -1350,8 +1304,9 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
}
job_unlock();
- AIO_WAIT_WHILE(job->aio_context,
- (job_enter(job), !job_is_completed_unlocked(job)));
+ AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
+ (job_enter(job),
+ !job_is_completed_unlocked(job)));
job_lock();
ret = (job_is_cancelled_locked(job) && job->ret == 0) ?
@@ -928,9 +928,9 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
tjob->prepare_ret = -EIO;
break;
}
+ aio_context_release(ctx);
job_start(&job->job);
- aio_context_release(ctx);
if (use_iothread) {
/* job_co_entry() is run in the I/O thread, wait for the actual job
@@ -994,12 +994,12 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
g_assert_false(job_get_paused(&job->job));
g_assert_true(job_get_busy(&job->job)); /* We're in qemu_co_sleep_ns() */
- aio_context_acquire(ctx);
job_lock();
ret = job_complete_sync(&job->job, &error_abort);
job_unlock();
g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO));
+ aio_context_acquire(ctx);
if (use_iothread) {
blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort);
assert(blk_get_aio_context(blk_target) == qemu_get_aio_context());
@@ -455,10 +455,10 @@ static void test_attach_blockjob(void)
aio_poll(qemu_get_aio_context(), false);
}
- aio_context_acquire(ctx);
job_lock();
job_complete_sync(&tjob->common.job, &error_abort);
job_unlock();
+ aio_context_acquire(ctx);
blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
aio_context_release(ctx);
@@ -228,10 +228,6 @@ static void cancel_common(CancelJob *s)
BlockJob *job = &s->common;
BlockBackend *blk = s->blk;
JobStatus sts = job->job.status;
- AioContext *ctx;
-
- ctx = job->job.aio_context;
- aio_context_acquire(ctx);
job_lock();
job_cancel_sync(&job->job, true);
@@ -244,7 +240,6 @@ static void cancel_common(CancelJob *s)
job_unlock();
destroy_blk(blk);
- aio_context_release(ctx);
}
static void test_cancel_created(void)
@@ -381,11 +376,9 @@ static void test_cancel_concluded(void)
aio_poll(qemu_get_aio_context(), true);
assert(job_get_status(job) == JOB_STATUS_PENDING);
- aio_context_acquire(job->aio_context);
job_lock();
job_finalize(job, &error_abort);
job_unlock();
- aio_context_release(job->aio_context);
assert(job_get_status(job) == JOB_STATUS_CONCLUDED);
cancel_common(s);
@@ -478,9 +471,7 @@ static void test_complete_in_standby(void)
/* Wait for the job to become READY */
job_start(job);
- aio_context_acquire(ctx);
- AIO_WAIT_WHILE(ctx, job_get_status(job) != JOB_STATUS_READY);
- aio_context_release(ctx);
+ AIO_WAIT_WHILE_UNLOCKED(ctx, job_get_status(job) != JOB_STATUS_READY);
/* Begin the drained section, pausing the job */
bdrv_drain_all_begin();
@@ -498,6 +489,7 @@ static void test_complete_in_standby(void)
job_complete(job, &error_abort);
/* The test is done now, clean up. */
+ aio_context_release(ctx);
job_finish_sync(job, NULL, &error_abort);
assert(job->status == JOB_STATUS_PENDING);
@@ -507,6 +499,7 @@ static void test_complete_in_standby(void)
job_dismiss(&job, &error_abort);
job_unlock();
+ aio_context_acquire(ctx);
destroy_blk(blk);
aio_context_release(ctx);
iothread_join(iothread);
Now that we removed the aiocontext in find_* functions, we need to remove it also in all other functions of the job API that assumed the lock was held. Therefore: - when possible, remove completely the aiocontext lock/unlock pair - if it is used by some other funcitons too, just leave the job API outside the aiocontext lock by adding unlock/lock pairs. There is only one JobDriver callback, ->free() that assumes that the aiocontext lock is held (because it calls bdrv_unref), so for now keep that under aiocontext lock. Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> --- include/qemu/job-monitor.h | 6 ---- block/replication.c | 2 ++ blockdev.c | 19 ----------- job.c | 57 ++++---------------------------- tests/unit/test-bdrv-drain.c | 4 +-- tests/unit/test-block-iothread.c | 2 +- tests/unit/test-blockjob.c | 13 ++------ 7 files changed, 14 insertions(+), 89 deletions(-)