diff mbox

[2/7] block: Framework for reopening files safely

Message ID e4b2728c331323bf0a46f30279f66bf91f6d3657.1346352124.git.jcody@redhat.com
State New
Headers show

Commit Message

Jeff Cody Aug. 30, 2012, 6:47 p.m. UTC
This is based heavily on Supriya Kannery's bdrv_reopen()
patch series.

This provides a transactional method to reopen multiple
images files safely.

Image files are queue for reopen via bdrv_reopen_queue(), and the
reopen occurs when bdrv_reopen_multiple() is called.  Changes are
staged in bdrv_reopen_prepare() and in the equivalent driver level
functions.  If any of the staged images fails a prepare, then all
of the images left untouched, and the staged changes for each image
abandoned.

Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block.c       | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 block.h       |  15 ++++
 block_int.h   |  13 ++++
 qemu-common.h |   1 +
 4 files changed, 255 insertions(+)

Comments

Kevin Wolf Sept. 5, 2012, 3:09 p.m. UTC | #1
Am 30.08.2012 20:47, schrieb Jeff Cody:
> This is based heavily on Supriya Kannery's bdrv_reopen()
> patch series.
> 
> This provides a transactional method to reopen multiple
> images files safely.
> 
> Image files are queue for reopen via bdrv_reopen_queue(), and the
> reopen occurs when bdrv_reopen_multiple() is called.  Changes are
> staged in bdrv_reopen_prepare() and in the equivalent driver level
> functions.  If any of the staged images fails a prepare, then all
> of the images left untouched, and the staged changes for each image
> abandoned.
> 
> Signed-off-by: Jeff Cody <jcody@redhat.com>

> +/*
> + * Reopen multiple BlockDriverStates atomically & transactionally.
> + *
> + * The queue passed in (bs_queue) must have been built up previous
> + * via bdrv_reopen_queue().
> + *
> + * Reopens all BDS specified in the queue, with the appropriate
> + * flags.  All devices are prepared for reopen, and failure of any
> + * device will cause all device changes to be abandonded, and intermediate
> + * data cleaned up.
> + *
> + * If all devices prepare successfully, then the changes are committed
> + * to all devices.
> + *
> + */
> +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
> +{
> +    int ret = -1;
> +    BlockReopenQueueEntry *bs_entry;
> +    Error *local_err = NULL;
> +
> +    assert(bs_queue != NULL);
> +
> +    bdrv_drain_all();
> +
> +    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
> +        if (bdrv_reopen_prepare(bs_entry->state, &local_err)) {
> +            error_propagate(errp, local_err);
> +            goto cleanup;
> +        }
> +        bs_entry->prepared = true;
> +    }
> +
> +    /* If we reach this point, we have success and just need to apply the
> +     * changes
> +     */
> +    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
> +        bdrv_reopen_commit(bs_entry->state);
> +    }
> +
> +    ret = 0;
> +
> +cleanup:
> +    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
> +        if (ret && bs_entry->prepared) {
> +            bdrv_reopen_abort(bs_entry->state);
> +        }
> +        g_free(bs_entry->state);
> +        g_free(bs_entry);
> +    }

Without QSIMPLEQ_FOREACH_SAFE, isn't this a use after free?

> +    g_free(bs_queue);
> +    return ret;
> +}
> +
> +
> +/* Reopen a single BlockDriverState with the specified flags. */
> +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
> +{
> +    int ret = -1;
> +    Error *local_err = NULL;
> +    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
> +
> +    ret = bdrv_reopen_multiple(queue, &local_err);
> +    if (local_err != NULL) {
> +        error_propagate(errp, local_err);
> +    }
> +    return ret;
> +}
> +
> +
> +/*
> + * Prepares a BlockDriverState for reopen. All changes are staged in the
> + * 'reopen_state' field of the BlockDriverState, which must be NULL when
> + * entering (all previous reopens must have completed for the BDS).
> + *
> + * bs is the BlockDriverState to reopen
> + * flags are the new open flags
> + *
> + * Returns 0 on success, non-zero on error.  On error errp will be set
> + * as well.
> + *
> + * On failure, bdrv_reopen_abort() will be called to clean up any data.
> + * It is the responsibility of the caller to then call the abort() or
> + * commit() for any other BDS that have been left in a prepare() state
> + *
> + */
> +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp)
> +{
> +    int ret = -1;
> +    Error *local_err = NULL;
> +    BlockDriver *drv;
> +
> +    assert(reopen_state != NULL);
> +    assert(reopen_state->bs->drv != NULL);
> +    drv = reopen_state->bs->drv;
> +
> +    /* if we are to stay read-only, do not allow permission change
> +     * to r/w */
> +    if (reopen_state->bs->keep_read_only &&

Just for completeness, we decided to use the flag here instead of
keep_read_only.

> +        reopen_state->flags & BDRV_O_RDWR) {
> +        error_set(errp, QERR_DEVICE_IS_READ_ONLY,
> +                  reopen_state->bs->device_name);
> +        goto error;
> +    }
> +
> +
> +    ret = bdrv_flush(reopen_state->bs);
> +    if (ret) {
> +        error_set(errp, QERR_IO_ERROR);
> +        goto error;
> +    }

This throws the error code away. Bad. We should probably change
QERR_IO_ERROR so that you can include strerror(-ret).

> +
> +    if (drv->bdrv_reopen_prepare) {
> +        ret = drv->bdrv_reopen_prepare(reopen_state, &local_err);
> +        if (ret) {
> +            if (local_err != NULL) {
> +                error_propagate(errp, local_err);
> +            } else {
> +                error_set(errp, QERR_OPEN_FILE_FAILED,
> +                          reopen_state->bs->filename);
> +            }
> +            goto error;
> +        }
> +    } else {
> +        /* It is currently mandatory to have a bdrv_reopen_prepare()
> +         * handler for each supported drv. */
> +        error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
> +                  drv->format_name, reopen_state->bs->device_name,
> +                 "reopening of file");
> +        ret = -1;
> +        goto error;
> +    }
> +
> +    return 0;
> +
> +error:
> +    bdrv_reopen_abort(reopen_state);

This is unexpected for me. Shouldn't .bdrv_reopen_prepare() clean up
before returning an error, like any other function does? (Which could
actually be a call to bdrv_reopen_abort() where it makes sense.)

If you use .bdrv_reopen_abort() for it, block drivers must take care to
write this function in way that doesn't assume that
.bdrv_reopen_prepare() has completed. Sounds rather nasty to me.

> +    return ret;
> +}
> +
> +/*
> + * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
> + * makes them final by swapping the staging BlockDriverState contents into
> + * the active BlockDriverState contents.
> + */
> +void bdrv_reopen_commit(BDRVReopenState *reopen_state)
> +{
> +    BlockDriver *drv;
> +
> +    assert(reopen_state != NULL);
> +    drv = reopen_state->bs->drv;
> +    assert(drv != NULL);
> +
> +    /* If there are any driver level actions to take */
> +    if (drv->bdrv_reopen_commit) {
> +        drv->bdrv_reopen_commit(reopen_state);
> +    }
> +
> +    /* set BDS specific flags now */
> +    reopen_state->bs->open_flags         = reopen_state->flags;
> +    reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
> +                                              BDRV_O_CACHE_WB);
> +    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);

Hm, I wonder if these three lines can somehow be shared with the normal
bdrv_open so that they stay in sync.

> +}
> +
> +/*
> + * Abort the reopen, and delete and free the staged changes in
> + * reopen_state
> + */
> +void bdrv_reopen_abort(BDRVReopenState *reopen_state)
> +{
> +    BlockDriver *drv;
> +
> +    assert(reopen_state != NULL);
> +    drv = reopen_state->bs->drv;
> +    assert(drv != NULL);
> +
> +    if (drv->bdrv_reopen_abort) {
> +        drv->bdrv_reopen_abort(reopen_state);
> +    }
> +}
> +
> +
>  void bdrv_close(BlockDriverState *bs)
>  {
>      bdrv_flush(bs);
> diff --git a/block.h b/block.h
> index 4d919c2..db812b1 100644
> --- a/block.h
> +++ b/block.h
> @@ -97,6 +97,14 @@ typedef enum {
>      BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
>  } BlockQMPEventAction;
>  
> +typedef struct BlockReopenQueueEntry {
> +     bool prepared;
> +     BDRVReopenState *state;

As discussed on IRC, this can be directly embedded instead of using a
pointer.

> +     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
> +} BlockReopenQueueEntry;


Kevin
Jeff Cody Sept. 5, 2012, 4:38 p.m. UTC | #2
On 09/05/2012 11:09 AM, Kevin Wolf wrote:
> Am 30.08.2012 20:47, schrieb Jeff Cody:
>> This is based heavily on Supriya Kannery's bdrv_reopen()
>> patch series.
>>
>> This provides a transactional method to reopen multiple
>> images files safely.
>>
>> Image files are queue for reopen via bdrv_reopen_queue(), and the
>> reopen occurs when bdrv_reopen_multiple() is called.  Changes are
>> staged in bdrv_reopen_prepare() and in the equivalent driver level
>> functions.  If any of the staged images fails a prepare, then all
>> of the images left untouched, and the staged changes for each image
>> abandoned.
>>
>> Signed-off-by: Jeff Cody <jcody@redhat.com>
> 
>> +/*
>> + * Reopen multiple BlockDriverStates atomically & transactionally.
>> + *
>> + * The queue passed in (bs_queue) must have been built up previous
>> + * via bdrv_reopen_queue().
>> + *
>> + * Reopens all BDS specified in the queue, with the appropriate
>> + * flags.  All devices are prepared for reopen, and failure of any
>> + * device will cause all device changes to be abandonded, and intermediate
>> + * data cleaned up.
>> + *
>> + * If all devices prepare successfully, then the changes are committed
>> + * to all devices.
>> + *
>> + */
>> +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
>> +{
>> +    int ret = -1;
>> +    BlockReopenQueueEntry *bs_entry;
>> +    Error *local_err = NULL;
>> +
>> +    assert(bs_queue != NULL);
>> +
>> +    bdrv_drain_all();
>> +
>> +    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
>> +        if (bdrv_reopen_prepare(bs_entry->state, &local_err)) {
>> +            error_propagate(errp, local_err);
>> +            goto cleanup;
>> +        }
>> +        bs_entry->prepared = true;
>> +    }
>> +
>> +    /* If we reach this point, we have success and just need to apply the
>> +     * changes
>> +     */
>> +    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
>> +        bdrv_reopen_commit(bs_entry->state);
>> +    }
>> +
>> +    ret = 0;
>> +
>> +cleanup:
>> +    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
>> +        if (ret && bs_entry->prepared) {
>> +            bdrv_reopen_abort(bs_entry->state);
>> +        }
>> +        g_free(bs_entry->state);
>> +        g_free(bs_entry);
>> +    }
> 
> Without QSIMPLEQ_FOREACH_SAFE, isn't this a use after free?
> 

Yes - that needs to be a QSIMPLEQ_FOREACH_SAFE.


>> +    g_free(bs_queue);
>> +    return ret;
>> +}
>> +
>> +
>> +/* Reopen a single BlockDriverState with the specified flags. */
>> +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
>> +{
>> +    int ret = -1;
>> +    Error *local_err = NULL;
>> +    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
>> +
>> +    ret = bdrv_reopen_multiple(queue, &local_err);
>> +    if (local_err != NULL) {
>> +        error_propagate(errp, local_err);
>> +    }
>> +    return ret;
>> +}
>> +
>> +
>> +/*
>> + * Prepares a BlockDriverState for reopen. All changes are staged in the
>> + * 'reopen_state' field of the BlockDriverState, which must be NULL when
>> + * entering (all previous reopens must have completed for the BDS).
>> + *
>> + * bs is the BlockDriverState to reopen
>> + * flags are the new open flags
>> + *
>> + * Returns 0 on success, non-zero on error.  On error errp will be set
>> + * as well.
>> + *
>> + * On failure, bdrv_reopen_abort() will be called to clean up any data.
>> + * It is the responsibility of the caller to then call the abort() or
>> + * commit() for any other BDS that have been left in a prepare() state
>> + *
>> + */
>> +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp)
>> +{
>> +    int ret = -1;
>> +    Error *local_err = NULL;
>> +    BlockDriver *drv;
>> +
>> +    assert(reopen_state != NULL);
>> +    assert(reopen_state->bs->drv != NULL);
>> +    drv = reopen_state->bs->drv;
>> +
>> +    /* if we are to stay read-only, do not allow permission change
>> +     * to r/w */
>> +    if (reopen_state->bs->keep_read_only &&
> 
> Just for completeness, we decided to use the flag here instead of
> keep_read_only.
> 
>> +        reopen_state->flags & BDRV_O_RDWR) {
>> +        error_set(errp, QERR_DEVICE_IS_READ_ONLY,
>> +                  reopen_state->bs->device_name);
>> +        goto error;
>> +    }
>> +
>> +
>> +    ret = bdrv_flush(reopen_state->bs);
>> +    if (ret) {
>> +        error_set(errp, QERR_IO_ERROR);
>> +        goto error;
>> +    }
> 
> This throws the error code away. Bad. We should probably change
> QERR_IO_ERROR so that you can include strerror(-ret).
> 

Or, I could use the new error_setg() here, and pass along all relevant
information.

>> +
>> +    if (drv->bdrv_reopen_prepare) {
>> +        ret = drv->bdrv_reopen_prepare(reopen_state, &local_err);
>> +        if (ret) {
>> +            if (local_err != NULL) {
>> +                error_propagate(errp, local_err);
>> +            } else {
>> +                error_set(errp, QERR_OPEN_FILE_FAILED,
>> +                          reopen_state->bs->filename);
>> +            }
>> +            goto error;
>> +        }
>> +    } else {
>> +        /* It is currently mandatory to have a bdrv_reopen_prepare()
>> +         * handler for each supported drv. */
>> +        error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
>> +                  drv->format_name, reopen_state->bs->device_name,
>> +                 "reopening of file");
>> +        ret = -1;
>> +        goto error;
>> +    }
>> +
>> +    return 0;
>> +
>> +error:
>> +    bdrv_reopen_abort(reopen_state);
> 
> This is unexpected for me. Shouldn't .bdrv_reopen_prepare() clean up
> before returning an error, like any other function does? (Which could
> actually be a call to bdrv_reopen_abort() where it makes sense.)
> 
> If you use .bdrv_reopen_abort() for it, block drivers must take care to
> write this function in way that doesn't assume that
> .bdrv_reopen_prepare() has completed. Sounds rather nasty to me.
> 

Hmm.  Yes, I can see that - and there is no cleanup this function needs
to do itself, so it can just assume that .bdrv_reopen_prepare() will
clean up after itself (which, as you mentioned, will likely be the
driver calling its own .bdrv_reopen_abort()).

Although, I think it should always be good form for the block drivers to
not make such assumptions, if possible.


>> +    return ret;
>> +}
>> +
>> +/*
>> + * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
>> + * makes them final by swapping the staging BlockDriverState contents into
>> + * the active BlockDriverState contents.
>> + */
>> +void bdrv_reopen_commit(BDRVReopenState *reopen_state)
>> +{
>> +    BlockDriver *drv;
>> +
>> +    assert(reopen_state != NULL);
>> +    drv = reopen_state->bs->drv;
>> +    assert(drv != NULL);
>> +
>> +    /* If there are any driver level actions to take */
>> +    if (drv->bdrv_reopen_commit) {
>> +        drv->bdrv_reopen_commit(reopen_state);
>> +    }
>> +
>> +    /* set BDS specific flags now */
>> +    reopen_state->bs->open_flags         = reopen_state->flags;
>> +    reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
>> +                                              BDRV_O_CACHE_WB);
>> +    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
> 
> Hm, I wonder if these three lines can somehow be shared with the normal
> bdrv_open so that they stay in sync.

Sure, especially setting enable_write_cache.

> 
>> +}
>> +
>> +/*
>> + * Abort the reopen, and delete and free the staged changes in
>> + * reopen_state
>> + */
>> +void bdrv_reopen_abort(BDRVReopenState *reopen_state)
>> +{
>> +    BlockDriver *drv;
>> +
>> +    assert(reopen_state != NULL);
>> +    drv = reopen_state->bs->drv;
>> +    assert(drv != NULL);
>> +
>> +    if (drv->bdrv_reopen_abort) {
>> +        drv->bdrv_reopen_abort(reopen_state);
>> +    }
>> +}
>> +
>> +
>>  void bdrv_close(BlockDriverState *bs)
>>  {
>>      bdrv_flush(bs);
>> diff --git a/block.h b/block.h
>> index 4d919c2..db812b1 100644
>> --- a/block.h
>> +++ b/block.h
>> @@ -97,6 +97,14 @@ typedef enum {
>>      BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
>>  } BlockQMPEventAction;
>>  
>> +typedef struct BlockReopenQueueEntry {
>> +     bool prepared;
>> +     BDRVReopenState *state;
> 
> As discussed on IRC, this can be directly embedded instead of using a
> pointer.
> 
>> +     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
>> +} BlockReopenQueueEntry;
> 
> 
> Kevin
>
Jeff Cody Sept. 11, 2012, 2:57 p.m. UTC | #3
On 08/30/2012 02:47 PM, Jeff Cody wrote:
> This is based heavily on Supriya Kannery's bdrv_reopen()
> patch series.
> 
> This provides a transactional method to reopen multiple
> images files safely.
> 
> Image files are queue for reopen via bdrv_reopen_queue(), and the
> reopen occurs when bdrv_reopen_multiple() is called.  Changes are
> staged in bdrv_reopen_prepare() and in the equivalent driver level
> functions.  If any of the staged images fails a prepare, then all
> of the images left untouched, and the staged changes for each image
> abandoned.
>

Open question (my assumption is yes):

Is it safe to assume that reopen() should always enable BDRV_O_CACHE_WB
(without affecting enable_write_cache), so as to not undo what was done
by Paolo's commit e1e9b0ac?

> Signed-off-by: Jeff Cody <jcody@redhat.com>
> ---
>  block.c       | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  block.h       |  15 ++++
>  block_int.h   |  13 ++++
>  qemu-common.h |   1 +
>  4 files changed, 255 insertions(+)
> 
> diff --git a/block.c b/block.c
> index e31b76f..9470319 100644
> --- a/block.c
> +++ b/block.c
> @@ -857,6 +857,232 @@ unlink_and_fail:
>      return ret;
>  }
>  
> +/*
> + * Adds a BlockDriverState to a simple queue for an atomic, transactional
> + * reopen of multiple devices.
> + *
> + * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
> + * already performed, or alternatively may be NULL a new BlockReopenQueue will
> + * be created and initialized. This newly created BlockReopenQueue should be
> + * passed back in for subsequent calls that are intended to be of the same
> + * atomic 'set'.
> + *
> + * bs is the BlockDriverState to add to the reopen queue.
> + *
> + * flags contains the open flags for the associated bs
> + *
> + * returns a pointer to bs_queue, which is either the newly allocated
> + * bs_queue, or the existing bs_queue being used.
> + *
> + */
> +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
> +                                    BlockDriverState *bs, int flags)
> +{
> +    assert(bs != NULL);
> +
> +    BlockReopenQueueEntry *bs_entry;
> +    if (bs_queue == NULL) {
> +        bs_queue = g_new0(BlockReopenQueue, 1);
> +        QSIMPLEQ_INIT(bs_queue);
> +    }
> +
> +    if (bs->file) {
> +        bdrv_reopen_queue(bs_queue, bs->file, flags);
> +    }
> +
> +    bs_entry = g_new0(BlockReopenQueueEntry, 1);
> +    QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
> +
> +    bs_entry->state = g_new0(BDRVReopenState, 1);
> +    bs_entry->state->bs = bs;
> +    bs_entry->state->flags = flags;
> +
> +    return bs_queue;
> +}
> +
> +/*
> + * Reopen multiple BlockDriverStates atomically & transactionally.
> + *
> + * The queue passed in (bs_queue) must have been built up previous
> + * via bdrv_reopen_queue().
> + *
> + * Reopens all BDS specified in the queue, with the appropriate
> + * flags.  All devices are prepared for reopen, and failure of any
> + * device will cause all device changes to be abandonded, and intermediate
> + * data cleaned up.
> + *
> + * If all devices prepare successfully, then the changes are committed
> + * to all devices.
> + *
> + */
> +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
> +{
> +    int ret = -1;
> +    BlockReopenQueueEntry *bs_entry;
> +    Error *local_err = NULL;
> +
> +    assert(bs_queue != NULL);
> +
> +    bdrv_drain_all();
> +
> +    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
> +        if (bdrv_reopen_prepare(bs_entry->state, &local_err)) {
> +            error_propagate(errp, local_err);
> +            goto cleanup;
> +        }
> +        bs_entry->prepared = true;
> +    }
> +
> +    /* If we reach this point, we have success and just need to apply the
> +     * changes
> +     */
> +    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
> +        bdrv_reopen_commit(bs_entry->state);
> +    }
> +
> +    ret = 0;
> +
> +cleanup:
> +    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
> +        if (ret && bs_entry->prepared) {
> +            bdrv_reopen_abort(bs_entry->state);
> +        }
> +        g_free(bs_entry->state);
> +        g_free(bs_entry);
> +    }
> +    g_free(bs_queue);
> +    return ret;
> +}
> +
> +
> +/* Reopen a single BlockDriverState with the specified flags. */
> +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
> +{
> +    int ret = -1;
> +    Error *local_err = NULL;
> +    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
> +
> +    ret = bdrv_reopen_multiple(queue, &local_err);
> +    if (local_err != NULL) {
> +        error_propagate(errp, local_err);
> +    }
> +    return ret;
> +}
> +
> +
> +/*
> + * Prepares a BlockDriverState for reopen. All changes are staged in the
> + * 'reopen_state' field of the BlockDriverState, which must be NULL when
> + * entering (all previous reopens must have completed for the BDS).
> + *
> + * bs is the BlockDriverState to reopen
> + * flags are the new open flags
> + *
> + * Returns 0 on success, non-zero on error.  On error errp will be set
> + * as well.
> + *
> + * On failure, bdrv_reopen_abort() will be called to clean up any data.
> + * It is the responsibility of the caller to then call the abort() or
> + * commit() for any other BDS that have been left in a prepare() state
> + *
> + */
> +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp)
> +{
> +    int ret = -1;
> +    Error *local_err = NULL;
> +    BlockDriver *drv;
> +
> +    assert(reopen_state != NULL);
> +    assert(reopen_state->bs->drv != NULL);
> +    drv = reopen_state->bs->drv;
> +
> +    /* if we are to stay read-only, do not allow permission change
> +     * to r/w */
> +    if (reopen_state->bs->keep_read_only &&
> +        reopen_state->flags & BDRV_O_RDWR) {
> +        error_set(errp, QERR_DEVICE_IS_READ_ONLY,
> +                  reopen_state->bs->device_name);
> +        goto error;
> +    }
> +
> +
> +    ret = bdrv_flush(reopen_state->bs);
> +    if (ret) {
> +        error_set(errp, QERR_IO_ERROR);
> +        goto error;
> +    }
> +
> +    if (drv->bdrv_reopen_prepare) {
> +        ret = drv->bdrv_reopen_prepare(reopen_state, &local_err);
> +        if (ret) {
> +            if (local_err != NULL) {
> +                error_propagate(errp, local_err);
> +            } else {
> +                error_set(errp, QERR_OPEN_FILE_FAILED,
> +                          reopen_state->bs->filename);
> +            }
> +            goto error;
> +        }
> +    } else {
> +        /* It is currently mandatory to have a bdrv_reopen_prepare()
> +         * handler for each supported drv. */
> +        error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
> +                  drv->format_name, reopen_state->bs->device_name,
> +                 "reopening of file");
> +        ret = -1;
> +        goto error;
> +    }
> +
> +    return 0;
> +
> +error:
> +    bdrv_reopen_abort(reopen_state);
> +    return ret;
> +}
> +
> +/*
> + * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
> + * makes them final by swapping the staging BlockDriverState contents into
> + * the active BlockDriverState contents.
> + */
> +void bdrv_reopen_commit(BDRVReopenState *reopen_state)
> +{
> +    BlockDriver *drv;
> +
> +    assert(reopen_state != NULL);
> +    drv = reopen_state->bs->drv;
> +    assert(drv != NULL);
> +
> +    /* If there are any driver level actions to take */
> +    if (drv->bdrv_reopen_commit) {
> +        drv->bdrv_reopen_commit(reopen_state);
> +    }
> +
> +    /* set BDS specific flags now */
> +    reopen_state->bs->open_flags         = reopen_state->flags;
> +    reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
> +                                              BDRV_O_CACHE_WB);
> +    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
> +}
> +
> +/*
> + * Abort the reopen, and delete and free the staged changes in
> + * reopen_state
> + */
> +void bdrv_reopen_abort(BDRVReopenState *reopen_state)
> +{
> +    BlockDriver *drv;
> +
> +    assert(reopen_state != NULL);
> +    drv = reopen_state->bs->drv;
> +    assert(drv != NULL);
> +
> +    if (drv->bdrv_reopen_abort) {
> +        drv->bdrv_reopen_abort(reopen_state);
> +    }
> +}
> +
> +
>  void bdrv_close(BlockDriverState *bs)
>  {
>      bdrv_flush(bs);
> diff --git a/block.h b/block.h
> index 4d919c2..db812b1 100644
> --- a/block.h
> +++ b/block.h
> @@ -97,6 +97,14 @@ typedef enum {
>      BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
>  } BlockQMPEventAction;
>  
> +typedef struct BlockReopenQueueEntry {
> +     bool prepared;
> +     BDRVReopenState *state;
> +     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
> +} BlockReopenQueueEntry;
> +
> +typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
> +
>  void bdrv_iostatus_enable(BlockDriverState *bs);
>  void bdrv_iostatus_reset(BlockDriverState *bs);
>  void bdrv_iostatus_disable(BlockDriverState *bs);
> @@ -131,6 +139,13 @@ int bdrv_parse_cache_flags(const char *mode, int *flags);
>  int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
>  int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
>                BlockDriver *drv);
> +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
> +                                    BlockDriverState *bs, int flags);
> +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
> +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
> +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp);
> +void bdrv_reopen_commit(BDRVReopenState *reopen_state);
> +void bdrv_reopen_abort(BDRVReopenState *reopen_state);
>  void bdrv_close(BlockDriverState *bs);
>  int bdrv_attach_dev(BlockDriverState *bs, void *dev);
>  void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
> diff --git a/block_int.h b/block_int.h
> index 4452f6f..7a4e226 100644
> --- a/block_int.h
> +++ b/block_int.h
> @@ -139,6 +139,12 @@ struct BlockDriver {
>      int instance_size;
>      int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
>      int (*bdrv_probe_device)(const char *filename);
> +
> +    /* For handling image reopen for split or non-split files */
> +    int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, Error **errp);
> +    void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
> +    void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
> +
>      int (*bdrv_open)(BlockDriverState *bs, int flags);
>      int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags);
>      int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
> @@ -336,6 +342,13 @@ struct BlockDriverState {
>  
>      /* long-running background operation */
>      BlockJob *job;
> +
> +};
> +
> +struct BDRVReopenState {
> +    BlockDriverState *bs;
> +    int flags;
> +    void *opaque;
>  };
>  
>  int get_tmp_filename(char *filename, int size);
> diff --git a/qemu-common.h b/qemu-common.h
> index e5c2bcd..6a6181c 100644
> --- a/qemu-common.h
> +++ b/qemu-common.h
> @@ -245,6 +245,7 @@ typedef struct NICInfo NICInfo;
>  typedef struct HCIInfo HCIInfo;
>  typedef struct AudioState AudioState;
>  typedef struct BlockDriverState BlockDriverState;
> +typedef struct BDRVReopenState BDRVReopenState;
>  typedef struct DriveInfo DriveInfo;
>  typedef struct DisplayState DisplayState;
>  typedef struct DisplayChangeListener DisplayChangeListener;
>
Kevin Wolf Sept. 11, 2012, 3:14 p.m. UTC | #4
Am 11.09.2012 16:57, schrieb Jeff Cody:
> On 08/30/2012 02:47 PM, Jeff Cody wrote:
>> This is based heavily on Supriya Kannery's bdrv_reopen()
>> patch series.
>>
>> This provides a transactional method to reopen multiple
>> images files safely.
>>
>> Image files are queue for reopen via bdrv_reopen_queue(), and the
>> reopen occurs when bdrv_reopen_multiple() is called.  Changes are
>> staged in bdrv_reopen_prepare() and in the equivalent driver level
>> functions.  If any of the staged images fails a prepare, then all
>> of the images left untouched, and the staged changes for each image
>> abandoned.
>>
> 
> Open question (my assumption is yes):
> 
> Is it safe to assume that reopen() should always enable BDRV_O_CACHE_WB
> (without affecting enable_write_cache), so as to not undo what was done
> by Paolo's commit e1e9b0ac?

I think it makes sense to behave the same as bdrv_open_common(), so I
guess yes. But now I'm wondering if we also need other code from there,
like filtering out BDRV_O_SNAPSHOT/NO_BACKING etc.

Kevin
Jeff Cody Sept. 11, 2012, 3:36 p.m. UTC | #5
On 09/11/2012 11:14 AM, Kevin Wolf wrote:
> Am 11.09.2012 16:57, schrieb Jeff Cody:
>> On 08/30/2012 02:47 PM, Jeff Cody wrote:
>>> This is based heavily on Supriya Kannery's bdrv_reopen()
>>> patch series.
>>>
>>> This provides a transactional method to reopen multiple
>>> images files safely.
>>>
>>> Image files are queue for reopen via bdrv_reopen_queue(), and the
>>> reopen occurs when bdrv_reopen_multiple() is called.  Changes are
>>> staged in bdrv_reopen_prepare() and in the equivalent driver level
>>> functions.  If any of the staged images fails a prepare, then all
>>> of the images left untouched, and the staged changes for each image
>>> abandoned.
>>>
>>
>> Open question (my assumption is yes):
>>
>> Is it safe to assume that reopen() should always enable BDRV_O_CACHE_WB
>> (without affecting enable_write_cache), so as to not undo what was done
>> by Paolo's commit e1e9b0ac?
> 
> I think it makes sense to behave the same as bdrv_open_common(), so I
> guess yes. But now I'm wondering if we also need other code from there,
> like filtering out BDRV_O_SNAPSHOT/NO_BACKING etc.
>

I was wondering the same thing w/regards to BDRV_O_SNAPSHOT/NO_BACKING,
but I fell on the side of 'no'. Mainly because the raw drivers (raw-posix,
raw-win32) actively parse the passed flags to determine the actual open
flags, and so spurious flags such as those are ignored.  However,
BDRV_O_CACHE_WB is used in their flag parsing logic, so I think it needs
to be preserved.
Kevin Wolf Sept. 11, 2012, 3:41 p.m. UTC | #6
Am 11.09.2012 17:36, schrieb Jeff Cody:
> On 09/11/2012 11:14 AM, Kevin Wolf wrote:
>> Am 11.09.2012 16:57, schrieb Jeff Cody:
>>> On 08/30/2012 02:47 PM, Jeff Cody wrote:
>>>> This is based heavily on Supriya Kannery's bdrv_reopen()
>>>> patch series.
>>>>
>>>> This provides a transactional method to reopen multiple
>>>> images files safely.
>>>>
>>>> Image files are queue for reopen via bdrv_reopen_queue(), and the
>>>> reopen occurs when bdrv_reopen_multiple() is called.  Changes are
>>>> staged in bdrv_reopen_prepare() and in the equivalent driver level
>>>> functions.  If any of the staged images fails a prepare, then all
>>>> of the images left untouched, and the staged changes for each image
>>>> abandoned.
>>>>
>>>
>>> Open question (my assumption is yes):
>>>
>>> Is it safe to assume that reopen() should always enable BDRV_O_CACHE_WB
>>> (without affecting enable_write_cache), so as to not undo what was done
>>> by Paolo's commit e1e9b0ac?
>>
>> I think it makes sense to behave the same as bdrv_open_common(), so I
>> guess yes. But now I'm wondering if we also need other code from there,
>> like filtering out BDRV_O_SNAPSHOT/NO_BACKING etc.
>>
> 
> I was wondering the same thing w/regards to BDRV_O_SNAPSHOT/NO_BACKING,
> but I fell on the side of 'no'. Mainly because the raw drivers (raw-posix,
> raw-win32) actively parse the passed flags to determine the actual open
> flags, and so spurious flags such as those are ignored.  However,
> BDRV_O_CACHE_WB is used in their flag parsing logic, so I think it needs
> to be preserved.

That's probably logic that should be removed in raw-posix/win32.c as it
is unused now.

Kevin
diff mbox

Patch

diff --git a/block.c b/block.c
index e31b76f..9470319 100644
--- a/block.c
+++ b/block.c
@@ -857,6 +857,232 @@  unlink_and_fail:
     return ret;
 }
 
+/*
+ * Adds a BlockDriverState to a simple queue for an atomic, transactional
+ * reopen of multiple devices.
+ *
+ * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
+ * already performed, or alternatively may be NULL a new BlockReopenQueue will
+ * be created and initialized. This newly created BlockReopenQueue should be
+ * passed back in for subsequent calls that are intended to be of the same
+ * atomic 'set'.
+ *
+ * bs is the BlockDriverState to add to the reopen queue.
+ *
+ * flags contains the open flags for the associated bs
+ *
+ * returns a pointer to bs_queue, which is either the newly allocated
+ * bs_queue, or the existing bs_queue being used.
+ *
+ */
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+                                    BlockDriverState *bs, int flags)
+{
+    assert(bs != NULL);
+
+    BlockReopenQueueEntry *bs_entry;
+    if (bs_queue == NULL) {
+        bs_queue = g_new0(BlockReopenQueue, 1);
+        QSIMPLEQ_INIT(bs_queue);
+    }
+
+    if (bs->file) {
+        bdrv_reopen_queue(bs_queue, bs->file, flags);
+    }
+
+    bs_entry = g_new0(BlockReopenQueueEntry, 1);
+    QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+
+    bs_entry->state = g_new0(BDRVReopenState, 1);
+    bs_entry->state->bs = bs;
+    bs_entry->state->flags = flags;
+
+    return bs_queue;
+}
+
+/*
+ * Reopen multiple BlockDriverStates atomically & transactionally.
+ *
+ * The queue passed in (bs_queue) must have been built up previous
+ * via bdrv_reopen_queue().
+ *
+ * Reopens all BDS specified in the queue, with the appropriate
+ * flags.  All devices are prepared for reopen, and failure of any
+ * device will cause all device changes to be abandonded, and intermediate
+ * data cleaned up.
+ *
+ * If all devices prepare successfully, then the changes are committed
+ * to all devices.
+ *
+ */
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
+{
+    int ret = -1;
+    BlockReopenQueueEntry *bs_entry;
+    Error *local_err = NULL;
+
+    assert(bs_queue != NULL);
+
+    bdrv_drain_all();
+
+    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+        if (bdrv_reopen_prepare(bs_entry->state, &local_err)) {
+            error_propagate(errp, local_err);
+            goto cleanup;
+        }
+        bs_entry->prepared = true;
+    }
+
+    /* If we reach this point, we have success and just need to apply the
+     * changes
+     */
+    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+        bdrv_reopen_commit(bs_entry->state);
+    }
+
+    ret = 0;
+
+cleanup:
+    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+        if (ret && bs_entry->prepared) {
+            bdrv_reopen_abort(bs_entry->state);
+        }
+        g_free(bs_entry->state);
+        g_free(bs_entry);
+    }
+    g_free(bs_queue);
+    return ret;
+}
+
+
+/* Reopen a single BlockDriverState with the specified flags. */
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
+{
+    int ret = -1;
+    Error *local_err = NULL;
+    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
+
+    ret = bdrv_reopen_multiple(queue, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+    }
+    return ret;
+}
+
+
+/*
+ * Prepares a BlockDriverState for reopen. All changes are staged in the
+ * 'reopen_state' field of the BlockDriverState, which must be NULL when
+ * entering (all previous reopens must have completed for the BDS).
+ *
+ * bs is the BlockDriverState to reopen
+ * flags are the new open flags
+ *
+ * Returns 0 on success, non-zero on error.  On error errp will be set
+ * as well.
+ *
+ * On failure, bdrv_reopen_abort() will be called to clean up any data.
+ * It is the responsibility of the caller to then call the abort() or
+ * commit() for any other BDS that have been left in a prepare() state
+ *
+ */
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp)
+{
+    int ret = -1;
+    Error *local_err = NULL;
+    BlockDriver *drv;
+
+    assert(reopen_state != NULL);
+    assert(reopen_state->bs->drv != NULL);
+    drv = reopen_state->bs->drv;
+
+    /* if we are to stay read-only, do not allow permission change
+     * to r/w */
+    if (reopen_state->bs->keep_read_only &&
+        reopen_state->flags & BDRV_O_RDWR) {
+        error_set(errp, QERR_DEVICE_IS_READ_ONLY,
+                  reopen_state->bs->device_name);
+        goto error;
+    }
+
+
+    ret = bdrv_flush(reopen_state->bs);
+    if (ret) {
+        error_set(errp, QERR_IO_ERROR);
+        goto error;
+    }
+
+    if (drv->bdrv_reopen_prepare) {
+        ret = drv->bdrv_reopen_prepare(reopen_state, &local_err);
+        if (ret) {
+            if (local_err != NULL) {
+                error_propagate(errp, local_err);
+            } else {
+                error_set(errp, QERR_OPEN_FILE_FAILED,
+                          reopen_state->bs->filename);
+            }
+            goto error;
+        }
+    } else {
+        /* It is currently mandatory to have a bdrv_reopen_prepare()
+         * handler for each supported drv. */
+        error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+                  drv->format_name, reopen_state->bs->device_name,
+                 "reopening of file");
+        ret = -1;
+        goto error;
+    }
+
+    return 0;
+
+error:
+    bdrv_reopen_abort(reopen_state);
+    return ret;
+}
+
+/*
+ * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
+ * makes them final by swapping the staging BlockDriverState contents into
+ * the active BlockDriverState contents.
+ */
+void bdrv_reopen_commit(BDRVReopenState *reopen_state)
+{
+    BlockDriver *drv;
+
+    assert(reopen_state != NULL);
+    drv = reopen_state->bs->drv;
+    assert(drv != NULL);
+
+    /* If there are any driver level actions to take */
+    if (drv->bdrv_reopen_commit) {
+        drv->bdrv_reopen_commit(reopen_state);
+    }
+
+    /* set BDS specific flags now */
+    reopen_state->bs->open_flags         = reopen_state->flags;
+    reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
+                                              BDRV_O_CACHE_WB);
+    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
+}
+
+/*
+ * Abort the reopen, and delete and free the staged changes in
+ * reopen_state
+ */
+void bdrv_reopen_abort(BDRVReopenState *reopen_state)
+{
+    BlockDriver *drv;
+
+    assert(reopen_state != NULL);
+    drv = reopen_state->bs->drv;
+    assert(drv != NULL);
+
+    if (drv->bdrv_reopen_abort) {
+        drv->bdrv_reopen_abort(reopen_state);
+    }
+}
+
+
 void bdrv_close(BlockDriverState *bs)
 {
     bdrv_flush(bs);
diff --git a/block.h b/block.h
index 4d919c2..db812b1 100644
--- a/block.h
+++ b/block.h
@@ -97,6 +97,14 @@  typedef enum {
     BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
 } BlockQMPEventAction;
 
+typedef struct BlockReopenQueueEntry {
+     bool prepared;
+     BDRVReopenState *state;
+     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
+} BlockReopenQueueEntry;
+
+typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
 void bdrv_iostatus_enable(BlockDriverState *bs);
 void bdrv_iostatus_reset(BlockDriverState *bs);
 void bdrv_iostatus_disable(BlockDriverState *bs);
@@ -131,6 +139,13 @@  int bdrv_parse_cache_flags(const char *mode, int *flags);
 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
               BlockDriver *drv);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+                                    BlockDriverState *bs, int flags);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp);
+void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+void bdrv_reopen_abort(BDRVReopenState *reopen_state);
 void bdrv_close(BlockDriverState *bs);
 int bdrv_attach_dev(BlockDriverState *bs, void *dev);
 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
diff --git a/block_int.h b/block_int.h
index 4452f6f..7a4e226 100644
--- a/block_int.h
+++ b/block_int.h
@@ -139,6 +139,12 @@  struct BlockDriver {
     int instance_size;
     int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
     int (*bdrv_probe_device)(const char *filename);
+
+    /* For handling image reopen for split or non-split files */
+    int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, Error **errp);
+    void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+    void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+
     int (*bdrv_open)(BlockDriverState *bs, int flags);
     int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags);
     int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
@@ -336,6 +342,13 @@  struct BlockDriverState {
 
     /* long-running background operation */
     BlockJob *job;
+
+};
+
+struct BDRVReopenState {
+    BlockDriverState *bs;
+    int flags;
+    void *opaque;
 };
 
 int get_tmp_filename(char *filename, int size);
diff --git a/qemu-common.h b/qemu-common.h
index e5c2bcd..6a6181c 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -245,6 +245,7 @@  typedef struct NICInfo NICInfo;
 typedef struct HCIInfo HCIInfo;
 typedef struct AudioState AudioState;
 typedef struct BlockDriverState BlockDriverState;
+typedef struct BDRVReopenState BDRVReopenState;
 typedef struct DriveInfo DriveInfo;
 typedef struct DisplayState DisplayState;
 typedef struct DisplayChangeListener DisplayChangeListener;