Message ID | e4b2728c331323bf0a46f30279f66bf91f6d3657.1346352124.git.jcody@redhat.com |
---|---|
State | New |
Headers | show |
Am 30.08.2012 20:47, schrieb Jeff Cody: > This is based heavily on Supriya Kannery's bdrv_reopen() > patch series. > > This provides a transactional method to reopen multiple > images files safely. > > Image files are queue for reopen via bdrv_reopen_queue(), and the > reopen occurs when bdrv_reopen_multiple() is called. Changes are > staged in bdrv_reopen_prepare() and in the equivalent driver level > functions. If any of the staged images fails a prepare, then all > of the images left untouched, and the staged changes for each image > abandoned. > > Signed-off-by: Jeff Cody <jcody@redhat.com> > +/* > + * Reopen multiple BlockDriverStates atomically & transactionally. > + * > + * The queue passed in (bs_queue) must have been built up previous > + * via bdrv_reopen_queue(). > + * > + * Reopens all BDS specified in the queue, with the appropriate > + * flags. All devices are prepared for reopen, and failure of any > + * device will cause all device changes to be abandonded, and intermediate > + * data cleaned up. > + * > + * If all devices prepare successfully, then the changes are committed > + * to all devices. > + * > + */ > +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) > +{ > + int ret = -1; > + BlockReopenQueueEntry *bs_entry; > + Error *local_err = NULL; > + > + assert(bs_queue != NULL); > + > + bdrv_drain_all(); > + > + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { > + if (bdrv_reopen_prepare(bs_entry->state, &local_err)) { > + error_propagate(errp, local_err); > + goto cleanup; > + } > + bs_entry->prepared = true; > + } > + > + /* If we reach this point, we have success and just need to apply the > + * changes > + */ > + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { > + bdrv_reopen_commit(bs_entry->state); > + } > + > + ret = 0; > + > +cleanup: > + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { > + if (ret && bs_entry->prepared) { > + bdrv_reopen_abort(bs_entry->state); > + } > + g_free(bs_entry->state); > + g_free(bs_entry); > + } Without QSIMPLEQ_FOREACH_SAFE, isn't this a use after free? > + g_free(bs_queue); > + return ret; > +} > + > + > +/* Reopen a single BlockDriverState with the specified flags. */ > +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) > +{ > + int ret = -1; > + Error *local_err = NULL; > + BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); > + > + ret = bdrv_reopen_multiple(queue, &local_err); > + if (local_err != NULL) { > + error_propagate(errp, local_err); > + } > + return ret; > +} > + > + > +/* > + * Prepares a BlockDriverState for reopen. All changes are staged in the > + * 'reopen_state' field of the BlockDriverState, which must be NULL when > + * entering (all previous reopens must have completed for the BDS). > + * > + * bs is the BlockDriverState to reopen > + * flags are the new open flags > + * > + * Returns 0 on success, non-zero on error. On error errp will be set > + * as well. > + * > + * On failure, bdrv_reopen_abort() will be called to clean up any data. > + * It is the responsibility of the caller to then call the abort() or > + * commit() for any other BDS that have been left in a prepare() state > + * > + */ > +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp) > +{ > + int ret = -1; > + Error *local_err = NULL; > + BlockDriver *drv; > + > + assert(reopen_state != NULL); > + assert(reopen_state->bs->drv != NULL); > + drv = reopen_state->bs->drv; > + > + /* if we are to stay read-only, do not allow permission change > + * to r/w */ > + if (reopen_state->bs->keep_read_only && Just for completeness, we decided to use the flag here instead of keep_read_only. > + reopen_state->flags & BDRV_O_RDWR) { > + error_set(errp, QERR_DEVICE_IS_READ_ONLY, > + reopen_state->bs->device_name); > + goto error; > + } > + > + > + ret = bdrv_flush(reopen_state->bs); > + if (ret) { > + error_set(errp, QERR_IO_ERROR); > + goto error; > + } This throws the error code away. Bad. We should probably change QERR_IO_ERROR so that you can include strerror(-ret). > + > + if (drv->bdrv_reopen_prepare) { > + ret = drv->bdrv_reopen_prepare(reopen_state, &local_err); > + if (ret) { > + if (local_err != NULL) { > + error_propagate(errp, local_err); > + } else { > + error_set(errp, QERR_OPEN_FILE_FAILED, > + reopen_state->bs->filename); > + } > + goto error; > + } > + } else { > + /* It is currently mandatory to have a bdrv_reopen_prepare() > + * handler for each supported drv. */ > + error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, > + drv->format_name, reopen_state->bs->device_name, > + "reopening of file"); > + ret = -1; > + goto error; > + } > + > + return 0; > + > +error: > + bdrv_reopen_abort(reopen_state); This is unexpected for me. Shouldn't .bdrv_reopen_prepare() clean up before returning an error, like any other function does? (Which could actually be a call to bdrv_reopen_abort() where it makes sense.) If you use .bdrv_reopen_abort() for it, block drivers must take care to write this function in way that doesn't assume that .bdrv_reopen_prepare() has completed. Sounds rather nasty to me. > + return ret; > +} > + > +/* > + * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and > + * makes them final by swapping the staging BlockDriverState contents into > + * the active BlockDriverState contents. > + */ > +void bdrv_reopen_commit(BDRVReopenState *reopen_state) > +{ > + BlockDriver *drv; > + > + assert(reopen_state != NULL); > + drv = reopen_state->bs->drv; > + assert(drv != NULL); > + > + /* If there are any driver level actions to take */ > + if (drv->bdrv_reopen_commit) { > + drv->bdrv_reopen_commit(reopen_state); > + } > + > + /* set BDS specific flags now */ > + reopen_state->bs->open_flags = reopen_state->flags; > + reopen_state->bs->enable_write_cache = !!(reopen_state->flags & > + BDRV_O_CACHE_WB); > + reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); Hm, I wonder if these three lines can somehow be shared with the normal bdrv_open so that they stay in sync. > +} > + > +/* > + * Abort the reopen, and delete and free the staged changes in > + * reopen_state > + */ > +void bdrv_reopen_abort(BDRVReopenState *reopen_state) > +{ > + BlockDriver *drv; > + > + assert(reopen_state != NULL); > + drv = reopen_state->bs->drv; > + assert(drv != NULL); > + > + if (drv->bdrv_reopen_abort) { > + drv->bdrv_reopen_abort(reopen_state); > + } > +} > + > + > void bdrv_close(BlockDriverState *bs) > { > bdrv_flush(bs); > diff --git a/block.h b/block.h > index 4d919c2..db812b1 100644 > --- a/block.h > +++ b/block.h > @@ -97,6 +97,14 @@ typedef enum { > BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP > } BlockQMPEventAction; > > +typedef struct BlockReopenQueueEntry { > + bool prepared; > + BDRVReopenState *state; As discussed on IRC, this can be directly embedded instead of using a pointer. > + QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; > +} BlockReopenQueueEntry; Kevin
On 09/05/2012 11:09 AM, Kevin Wolf wrote: > Am 30.08.2012 20:47, schrieb Jeff Cody: >> This is based heavily on Supriya Kannery's bdrv_reopen() >> patch series. >> >> This provides a transactional method to reopen multiple >> images files safely. >> >> Image files are queue for reopen via bdrv_reopen_queue(), and the >> reopen occurs when bdrv_reopen_multiple() is called. Changes are >> staged in bdrv_reopen_prepare() and in the equivalent driver level >> functions. If any of the staged images fails a prepare, then all >> of the images left untouched, and the staged changes for each image >> abandoned. >> >> Signed-off-by: Jeff Cody <jcody@redhat.com> > >> +/* >> + * Reopen multiple BlockDriverStates atomically & transactionally. >> + * >> + * The queue passed in (bs_queue) must have been built up previous >> + * via bdrv_reopen_queue(). >> + * >> + * Reopens all BDS specified in the queue, with the appropriate >> + * flags. All devices are prepared for reopen, and failure of any >> + * device will cause all device changes to be abandonded, and intermediate >> + * data cleaned up. >> + * >> + * If all devices prepare successfully, then the changes are committed >> + * to all devices. >> + * >> + */ >> +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) >> +{ >> + int ret = -1; >> + BlockReopenQueueEntry *bs_entry; >> + Error *local_err = NULL; >> + >> + assert(bs_queue != NULL); >> + >> + bdrv_drain_all(); >> + >> + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { >> + if (bdrv_reopen_prepare(bs_entry->state, &local_err)) { >> + error_propagate(errp, local_err); >> + goto cleanup; >> + } >> + bs_entry->prepared = true; >> + } >> + >> + /* If we reach this point, we have success and just need to apply the >> + * changes >> + */ >> + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { >> + bdrv_reopen_commit(bs_entry->state); >> + } >> + >> + ret = 0; >> + >> +cleanup: >> + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { >> + if (ret && bs_entry->prepared) { >> + bdrv_reopen_abort(bs_entry->state); >> + } >> + g_free(bs_entry->state); >> + g_free(bs_entry); >> + } > > Without QSIMPLEQ_FOREACH_SAFE, isn't this a use after free? > Yes - that needs to be a QSIMPLEQ_FOREACH_SAFE. >> + g_free(bs_queue); >> + return ret; >> +} >> + >> + >> +/* Reopen a single BlockDriverState with the specified flags. */ >> +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) >> +{ >> + int ret = -1; >> + Error *local_err = NULL; >> + BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); >> + >> + ret = bdrv_reopen_multiple(queue, &local_err); >> + if (local_err != NULL) { >> + error_propagate(errp, local_err); >> + } >> + return ret; >> +} >> + >> + >> +/* >> + * Prepares a BlockDriverState for reopen. All changes are staged in the >> + * 'reopen_state' field of the BlockDriverState, which must be NULL when >> + * entering (all previous reopens must have completed for the BDS). >> + * >> + * bs is the BlockDriverState to reopen >> + * flags are the new open flags >> + * >> + * Returns 0 on success, non-zero on error. On error errp will be set >> + * as well. >> + * >> + * On failure, bdrv_reopen_abort() will be called to clean up any data. >> + * It is the responsibility of the caller to then call the abort() or >> + * commit() for any other BDS that have been left in a prepare() state >> + * >> + */ >> +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp) >> +{ >> + int ret = -1; >> + Error *local_err = NULL; >> + BlockDriver *drv; >> + >> + assert(reopen_state != NULL); >> + assert(reopen_state->bs->drv != NULL); >> + drv = reopen_state->bs->drv; >> + >> + /* if we are to stay read-only, do not allow permission change >> + * to r/w */ >> + if (reopen_state->bs->keep_read_only && > > Just for completeness, we decided to use the flag here instead of > keep_read_only. > >> + reopen_state->flags & BDRV_O_RDWR) { >> + error_set(errp, QERR_DEVICE_IS_READ_ONLY, >> + reopen_state->bs->device_name); >> + goto error; >> + } >> + >> + >> + ret = bdrv_flush(reopen_state->bs); >> + if (ret) { >> + error_set(errp, QERR_IO_ERROR); >> + goto error; >> + } > > This throws the error code away. Bad. We should probably change > QERR_IO_ERROR so that you can include strerror(-ret). > Or, I could use the new error_setg() here, and pass along all relevant information. >> + >> + if (drv->bdrv_reopen_prepare) { >> + ret = drv->bdrv_reopen_prepare(reopen_state, &local_err); >> + if (ret) { >> + if (local_err != NULL) { >> + error_propagate(errp, local_err); >> + } else { >> + error_set(errp, QERR_OPEN_FILE_FAILED, >> + reopen_state->bs->filename); >> + } >> + goto error; >> + } >> + } else { >> + /* It is currently mandatory to have a bdrv_reopen_prepare() >> + * handler for each supported drv. */ >> + error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, >> + drv->format_name, reopen_state->bs->device_name, >> + "reopening of file"); >> + ret = -1; >> + goto error; >> + } >> + >> + return 0; >> + >> +error: >> + bdrv_reopen_abort(reopen_state); > > This is unexpected for me. Shouldn't .bdrv_reopen_prepare() clean up > before returning an error, like any other function does? (Which could > actually be a call to bdrv_reopen_abort() where it makes sense.) > > If you use .bdrv_reopen_abort() for it, block drivers must take care to > write this function in way that doesn't assume that > .bdrv_reopen_prepare() has completed. Sounds rather nasty to me. > Hmm. Yes, I can see that - and there is no cleanup this function needs to do itself, so it can just assume that .bdrv_reopen_prepare() will clean up after itself (which, as you mentioned, will likely be the driver calling its own .bdrv_reopen_abort()). Although, I think it should always be good form for the block drivers to not make such assumptions, if possible. >> + return ret; >> +} >> + >> +/* >> + * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and >> + * makes them final by swapping the staging BlockDriverState contents into >> + * the active BlockDriverState contents. >> + */ >> +void bdrv_reopen_commit(BDRVReopenState *reopen_state) >> +{ >> + BlockDriver *drv; >> + >> + assert(reopen_state != NULL); >> + drv = reopen_state->bs->drv; >> + assert(drv != NULL); >> + >> + /* If there are any driver level actions to take */ >> + if (drv->bdrv_reopen_commit) { >> + drv->bdrv_reopen_commit(reopen_state); >> + } >> + >> + /* set BDS specific flags now */ >> + reopen_state->bs->open_flags = reopen_state->flags; >> + reopen_state->bs->enable_write_cache = !!(reopen_state->flags & >> + BDRV_O_CACHE_WB); >> + reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); > > Hm, I wonder if these three lines can somehow be shared with the normal > bdrv_open so that they stay in sync. Sure, especially setting enable_write_cache. > >> +} >> + >> +/* >> + * Abort the reopen, and delete and free the staged changes in >> + * reopen_state >> + */ >> +void bdrv_reopen_abort(BDRVReopenState *reopen_state) >> +{ >> + BlockDriver *drv; >> + >> + assert(reopen_state != NULL); >> + drv = reopen_state->bs->drv; >> + assert(drv != NULL); >> + >> + if (drv->bdrv_reopen_abort) { >> + drv->bdrv_reopen_abort(reopen_state); >> + } >> +} >> + >> + >> void bdrv_close(BlockDriverState *bs) >> { >> bdrv_flush(bs); >> diff --git a/block.h b/block.h >> index 4d919c2..db812b1 100644 >> --- a/block.h >> +++ b/block.h >> @@ -97,6 +97,14 @@ typedef enum { >> BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP >> } BlockQMPEventAction; >> >> +typedef struct BlockReopenQueueEntry { >> + bool prepared; >> + BDRVReopenState *state; > > As discussed on IRC, this can be directly embedded instead of using a > pointer. > >> + QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; >> +} BlockReopenQueueEntry; > > > Kevin >
On 08/30/2012 02:47 PM, Jeff Cody wrote: > This is based heavily on Supriya Kannery's bdrv_reopen() > patch series. > > This provides a transactional method to reopen multiple > images files safely. > > Image files are queue for reopen via bdrv_reopen_queue(), and the > reopen occurs when bdrv_reopen_multiple() is called. Changes are > staged in bdrv_reopen_prepare() and in the equivalent driver level > functions. If any of the staged images fails a prepare, then all > of the images left untouched, and the staged changes for each image > abandoned. > Open question (my assumption is yes): Is it safe to assume that reopen() should always enable BDRV_O_CACHE_WB (without affecting enable_write_cache), so as to not undo what was done by Paolo's commit e1e9b0ac? > Signed-off-by: Jeff Cody <jcody@redhat.com> > --- > block.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > block.h | 15 ++++ > block_int.h | 13 ++++ > qemu-common.h | 1 + > 4 files changed, 255 insertions(+) > > diff --git a/block.c b/block.c > index e31b76f..9470319 100644 > --- a/block.c > +++ b/block.c > @@ -857,6 +857,232 @@ unlink_and_fail: > return ret; > } > > +/* > + * Adds a BlockDriverState to a simple queue for an atomic, transactional > + * reopen of multiple devices. > + * > + * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT > + * already performed, or alternatively may be NULL a new BlockReopenQueue will > + * be created and initialized. This newly created BlockReopenQueue should be > + * passed back in for subsequent calls that are intended to be of the same > + * atomic 'set'. > + * > + * bs is the BlockDriverState to add to the reopen queue. > + * > + * flags contains the open flags for the associated bs > + * > + * returns a pointer to bs_queue, which is either the newly allocated > + * bs_queue, or the existing bs_queue being used. > + * > + */ > +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, > + BlockDriverState *bs, int flags) > +{ > + assert(bs != NULL); > + > + BlockReopenQueueEntry *bs_entry; > + if (bs_queue == NULL) { > + bs_queue = g_new0(BlockReopenQueue, 1); > + QSIMPLEQ_INIT(bs_queue); > + } > + > + if (bs->file) { > + bdrv_reopen_queue(bs_queue, bs->file, flags); > + } > + > + bs_entry = g_new0(BlockReopenQueueEntry, 1); > + QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); > + > + bs_entry->state = g_new0(BDRVReopenState, 1); > + bs_entry->state->bs = bs; > + bs_entry->state->flags = flags; > + > + return bs_queue; > +} > + > +/* > + * Reopen multiple BlockDriverStates atomically & transactionally. > + * > + * The queue passed in (bs_queue) must have been built up previous > + * via bdrv_reopen_queue(). > + * > + * Reopens all BDS specified in the queue, with the appropriate > + * flags. All devices are prepared for reopen, and failure of any > + * device will cause all device changes to be abandonded, and intermediate > + * data cleaned up. > + * > + * If all devices prepare successfully, then the changes are committed > + * to all devices. > + * > + */ > +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) > +{ > + int ret = -1; > + BlockReopenQueueEntry *bs_entry; > + Error *local_err = NULL; > + > + assert(bs_queue != NULL); > + > + bdrv_drain_all(); > + > + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { > + if (bdrv_reopen_prepare(bs_entry->state, &local_err)) { > + error_propagate(errp, local_err); > + goto cleanup; > + } > + bs_entry->prepared = true; > + } > + > + /* If we reach this point, we have success and just need to apply the > + * changes > + */ > + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { > + bdrv_reopen_commit(bs_entry->state); > + } > + > + ret = 0; > + > +cleanup: > + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { > + if (ret && bs_entry->prepared) { > + bdrv_reopen_abort(bs_entry->state); > + } > + g_free(bs_entry->state); > + g_free(bs_entry); > + } > + g_free(bs_queue); > + return ret; > +} > + > + > +/* Reopen a single BlockDriverState with the specified flags. */ > +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) > +{ > + int ret = -1; > + Error *local_err = NULL; > + BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); > + > + ret = bdrv_reopen_multiple(queue, &local_err); > + if (local_err != NULL) { > + error_propagate(errp, local_err); > + } > + return ret; > +} > + > + > +/* > + * Prepares a BlockDriverState for reopen. All changes are staged in the > + * 'reopen_state' field of the BlockDriverState, which must be NULL when > + * entering (all previous reopens must have completed for the BDS). > + * > + * bs is the BlockDriverState to reopen > + * flags are the new open flags > + * > + * Returns 0 on success, non-zero on error. On error errp will be set > + * as well. > + * > + * On failure, bdrv_reopen_abort() will be called to clean up any data. > + * It is the responsibility of the caller to then call the abort() or > + * commit() for any other BDS that have been left in a prepare() state > + * > + */ > +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp) > +{ > + int ret = -1; > + Error *local_err = NULL; > + BlockDriver *drv; > + > + assert(reopen_state != NULL); > + assert(reopen_state->bs->drv != NULL); > + drv = reopen_state->bs->drv; > + > + /* if we are to stay read-only, do not allow permission change > + * to r/w */ > + if (reopen_state->bs->keep_read_only && > + reopen_state->flags & BDRV_O_RDWR) { > + error_set(errp, QERR_DEVICE_IS_READ_ONLY, > + reopen_state->bs->device_name); > + goto error; > + } > + > + > + ret = bdrv_flush(reopen_state->bs); > + if (ret) { > + error_set(errp, QERR_IO_ERROR); > + goto error; > + } > + > + if (drv->bdrv_reopen_prepare) { > + ret = drv->bdrv_reopen_prepare(reopen_state, &local_err); > + if (ret) { > + if (local_err != NULL) { > + error_propagate(errp, local_err); > + } else { > + error_set(errp, QERR_OPEN_FILE_FAILED, > + reopen_state->bs->filename); > + } > + goto error; > + } > + } else { > + /* It is currently mandatory to have a bdrv_reopen_prepare() > + * handler for each supported drv. */ > + error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, > + drv->format_name, reopen_state->bs->device_name, > + "reopening of file"); > + ret = -1; > + goto error; > + } > + > + return 0; > + > +error: > + bdrv_reopen_abort(reopen_state); > + return ret; > +} > + > +/* > + * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and > + * makes them final by swapping the staging BlockDriverState contents into > + * the active BlockDriverState contents. > + */ > +void bdrv_reopen_commit(BDRVReopenState *reopen_state) > +{ > + BlockDriver *drv; > + > + assert(reopen_state != NULL); > + drv = reopen_state->bs->drv; > + assert(drv != NULL); > + > + /* If there are any driver level actions to take */ > + if (drv->bdrv_reopen_commit) { > + drv->bdrv_reopen_commit(reopen_state); > + } > + > + /* set BDS specific flags now */ > + reopen_state->bs->open_flags = reopen_state->flags; > + reopen_state->bs->enable_write_cache = !!(reopen_state->flags & > + BDRV_O_CACHE_WB); > + reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); > +} > + > +/* > + * Abort the reopen, and delete and free the staged changes in > + * reopen_state > + */ > +void bdrv_reopen_abort(BDRVReopenState *reopen_state) > +{ > + BlockDriver *drv; > + > + assert(reopen_state != NULL); > + drv = reopen_state->bs->drv; > + assert(drv != NULL); > + > + if (drv->bdrv_reopen_abort) { > + drv->bdrv_reopen_abort(reopen_state); > + } > +} > + > + > void bdrv_close(BlockDriverState *bs) > { > bdrv_flush(bs); > diff --git a/block.h b/block.h > index 4d919c2..db812b1 100644 > --- a/block.h > +++ b/block.h > @@ -97,6 +97,14 @@ typedef enum { > BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP > } BlockQMPEventAction; > > +typedef struct BlockReopenQueueEntry { > + bool prepared; > + BDRVReopenState *state; > + QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; > +} BlockReopenQueueEntry; > + > +typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; > + > void bdrv_iostatus_enable(BlockDriverState *bs); > void bdrv_iostatus_reset(BlockDriverState *bs); > void bdrv_iostatus_disable(BlockDriverState *bs); > @@ -131,6 +139,13 @@ int bdrv_parse_cache_flags(const char *mode, int *flags); > int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags); > int bdrv_open(BlockDriverState *bs, const char *filename, int flags, > BlockDriver *drv); > +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, > + BlockDriverState *bs, int flags); > +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); > +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp); > +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp); > +void bdrv_reopen_commit(BDRVReopenState *reopen_state); > +void bdrv_reopen_abort(BDRVReopenState *reopen_state); > void bdrv_close(BlockDriverState *bs); > int bdrv_attach_dev(BlockDriverState *bs, void *dev); > void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev); > diff --git a/block_int.h b/block_int.h > index 4452f6f..7a4e226 100644 > --- a/block_int.h > +++ b/block_int.h > @@ -139,6 +139,12 @@ struct BlockDriver { > int instance_size; > int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); > int (*bdrv_probe_device)(const char *filename); > + > + /* For handling image reopen for split or non-split files */ > + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, Error **errp); > + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); > + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); > + > int (*bdrv_open)(BlockDriverState *bs, int flags); > int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags); > int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num, > @@ -336,6 +342,13 @@ struct BlockDriverState { > > /* long-running background operation */ > BlockJob *job; > + > +}; > + > +struct BDRVReopenState { > + BlockDriverState *bs; > + int flags; > + void *opaque; > }; > > int get_tmp_filename(char *filename, int size); > diff --git a/qemu-common.h b/qemu-common.h > index e5c2bcd..6a6181c 100644 > --- a/qemu-common.h > +++ b/qemu-common.h > @@ -245,6 +245,7 @@ typedef struct NICInfo NICInfo; > typedef struct HCIInfo HCIInfo; > typedef struct AudioState AudioState; > typedef struct BlockDriverState BlockDriverState; > +typedef struct BDRVReopenState BDRVReopenState; > typedef struct DriveInfo DriveInfo; > typedef struct DisplayState DisplayState; > typedef struct DisplayChangeListener DisplayChangeListener; >
Am 11.09.2012 16:57, schrieb Jeff Cody: > On 08/30/2012 02:47 PM, Jeff Cody wrote: >> This is based heavily on Supriya Kannery's bdrv_reopen() >> patch series. >> >> This provides a transactional method to reopen multiple >> images files safely. >> >> Image files are queue for reopen via bdrv_reopen_queue(), and the >> reopen occurs when bdrv_reopen_multiple() is called. Changes are >> staged in bdrv_reopen_prepare() and in the equivalent driver level >> functions. If any of the staged images fails a prepare, then all >> of the images left untouched, and the staged changes for each image >> abandoned. >> > > Open question (my assumption is yes): > > Is it safe to assume that reopen() should always enable BDRV_O_CACHE_WB > (without affecting enable_write_cache), so as to not undo what was done > by Paolo's commit e1e9b0ac? I think it makes sense to behave the same as bdrv_open_common(), so I guess yes. But now I'm wondering if we also need other code from there, like filtering out BDRV_O_SNAPSHOT/NO_BACKING etc. Kevin
On 09/11/2012 11:14 AM, Kevin Wolf wrote: > Am 11.09.2012 16:57, schrieb Jeff Cody: >> On 08/30/2012 02:47 PM, Jeff Cody wrote: >>> This is based heavily on Supriya Kannery's bdrv_reopen() >>> patch series. >>> >>> This provides a transactional method to reopen multiple >>> images files safely. >>> >>> Image files are queue for reopen via bdrv_reopen_queue(), and the >>> reopen occurs when bdrv_reopen_multiple() is called. Changes are >>> staged in bdrv_reopen_prepare() and in the equivalent driver level >>> functions. If any of the staged images fails a prepare, then all >>> of the images left untouched, and the staged changes for each image >>> abandoned. >>> >> >> Open question (my assumption is yes): >> >> Is it safe to assume that reopen() should always enable BDRV_O_CACHE_WB >> (without affecting enable_write_cache), so as to not undo what was done >> by Paolo's commit e1e9b0ac? > > I think it makes sense to behave the same as bdrv_open_common(), so I > guess yes. But now I'm wondering if we also need other code from there, > like filtering out BDRV_O_SNAPSHOT/NO_BACKING etc. > I was wondering the same thing w/regards to BDRV_O_SNAPSHOT/NO_BACKING, but I fell on the side of 'no'. Mainly because the raw drivers (raw-posix, raw-win32) actively parse the passed flags to determine the actual open flags, and so spurious flags such as those are ignored. However, BDRV_O_CACHE_WB is used in their flag parsing logic, so I think it needs to be preserved.
Am 11.09.2012 17:36, schrieb Jeff Cody: > On 09/11/2012 11:14 AM, Kevin Wolf wrote: >> Am 11.09.2012 16:57, schrieb Jeff Cody: >>> On 08/30/2012 02:47 PM, Jeff Cody wrote: >>>> This is based heavily on Supriya Kannery's bdrv_reopen() >>>> patch series. >>>> >>>> This provides a transactional method to reopen multiple >>>> images files safely. >>>> >>>> Image files are queue for reopen via bdrv_reopen_queue(), and the >>>> reopen occurs when bdrv_reopen_multiple() is called. Changes are >>>> staged in bdrv_reopen_prepare() and in the equivalent driver level >>>> functions. If any of the staged images fails a prepare, then all >>>> of the images left untouched, and the staged changes for each image >>>> abandoned. >>>> >>> >>> Open question (my assumption is yes): >>> >>> Is it safe to assume that reopen() should always enable BDRV_O_CACHE_WB >>> (without affecting enable_write_cache), so as to not undo what was done >>> by Paolo's commit e1e9b0ac? >> >> I think it makes sense to behave the same as bdrv_open_common(), so I >> guess yes. But now I'm wondering if we also need other code from there, >> like filtering out BDRV_O_SNAPSHOT/NO_BACKING etc. >> > > I was wondering the same thing w/regards to BDRV_O_SNAPSHOT/NO_BACKING, > but I fell on the side of 'no'. Mainly because the raw drivers (raw-posix, > raw-win32) actively parse the passed flags to determine the actual open > flags, and so spurious flags such as those are ignored. However, > BDRV_O_CACHE_WB is used in their flag parsing logic, so I think it needs > to be preserved. That's probably logic that should be removed in raw-posix/win32.c as it is unused now. Kevin
diff --git a/block.c b/block.c index e31b76f..9470319 100644 --- a/block.c +++ b/block.c @@ -857,6 +857,232 @@ unlink_and_fail: return ret; } +/* + * Adds a BlockDriverState to a simple queue for an atomic, transactional + * reopen of multiple devices. + * + * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT + * already performed, or alternatively may be NULL a new BlockReopenQueue will + * be created and initialized. This newly created BlockReopenQueue should be + * passed back in for subsequent calls that are intended to be of the same + * atomic 'set'. + * + * bs is the BlockDriverState to add to the reopen queue. + * + * flags contains the open flags for the associated bs + * + * returns a pointer to bs_queue, which is either the newly allocated + * bs_queue, or the existing bs_queue being used. + * + */ +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, int flags) +{ + assert(bs != NULL); + + BlockReopenQueueEntry *bs_entry; + if (bs_queue == NULL) { + bs_queue = g_new0(BlockReopenQueue, 1); + QSIMPLEQ_INIT(bs_queue); + } + + if (bs->file) { + bdrv_reopen_queue(bs_queue, bs->file, flags); + } + + bs_entry = g_new0(BlockReopenQueueEntry, 1); + QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); + + bs_entry->state = g_new0(BDRVReopenState, 1); + bs_entry->state->bs = bs; + bs_entry->state->flags = flags; + + return bs_queue; +} + +/* + * Reopen multiple BlockDriverStates atomically & transactionally. + * + * The queue passed in (bs_queue) must have been built up previous + * via bdrv_reopen_queue(). + * + * Reopens all BDS specified in the queue, with the appropriate + * flags. All devices are prepared for reopen, and failure of any + * device will cause all device changes to be abandonded, and intermediate + * data cleaned up. + * + * If all devices prepare successfully, then the changes are committed + * to all devices. + * + */ +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) +{ + int ret = -1; + BlockReopenQueueEntry *bs_entry; + Error *local_err = NULL; + + assert(bs_queue != NULL); + + bdrv_drain_all(); + + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { + if (bdrv_reopen_prepare(bs_entry->state, &local_err)) { + error_propagate(errp, local_err); + goto cleanup; + } + bs_entry->prepared = true; + } + + /* If we reach this point, we have success and just need to apply the + * changes + */ + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { + bdrv_reopen_commit(bs_entry->state); + } + + ret = 0; + +cleanup: + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { + if (ret && bs_entry->prepared) { + bdrv_reopen_abort(bs_entry->state); + } + g_free(bs_entry->state); + g_free(bs_entry); + } + g_free(bs_queue); + return ret; +} + + +/* Reopen a single BlockDriverState with the specified flags. */ +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) +{ + int ret = -1; + Error *local_err = NULL; + BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); + + ret = bdrv_reopen_multiple(queue, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + } + return ret; +} + + +/* + * Prepares a BlockDriverState for reopen. All changes are staged in the + * 'reopen_state' field of the BlockDriverState, which must be NULL when + * entering (all previous reopens must have completed for the BDS). + * + * bs is the BlockDriverState to reopen + * flags are the new open flags + * + * Returns 0 on success, non-zero on error. On error errp will be set + * as well. + * + * On failure, bdrv_reopen_abort() will be called to clean up any data. + * It is the responsibility of the caller to then call the abort() or + * commit() for any other BDS that have been left in a prepare() state + * + */ +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp) +{ + int ret = -1; + Error *local_err = NULL; + BlockDriver *drv; + + assert(reopen_state != NULL); + assert(reopen_state->bs->drv != NULL); + drv = reopen_state->bs->drv; + + /* if we are to stay read-only, do not allow permission change + * to r/w */ + if (reopen_state->bs->keep_read_only && + reopen_state->flags & BDRV_O_RDWR) { + error_set(errp, QERR_DEVICE_IS_READ_ONLY, + reopen_state->bs->device_name); + goto error; + } + + + ret = bdrv_flush(reopen_state->bs); + if (ret) { + error_set(errp, QERR_IO_ERROR); + goto error; + } + + if (drv->bdrv_reopen_prepare) { + ret = drv->bdrv_reopen_prepare(reopen_state, &local_err); + if (ret) { + if (local_err != NULL) { + error_propagate(errp, local_err); + } else { + error_set(errp, QERR_OPEN_FILE_FAILED, + reopen_state->bs->filename); + } + goto error; + } + } else { + /* It is currently mandatory to have a bdrv_reopen_prepare() + * handler for each supported drv. */ + error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, + drv->format_name, reopen_state->bs->device_name, + "reopening of file"); + ret = -1; + goto error; + } + + return 0; + +error: + bdrv_reopen_abort(reopen_state); + return ret; +} + +/* + * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and + * makes them final by swapping the staging BlockDriverState contents into + * the active BlockDriverState contents. + */ +void bdrv_reopen_commit(BDRVReopenState *reopen_state) +{ + BlockDriver *drv; + + assert(reopen_state != NULL); + drv = reopen_state->bs->drv; + assert(drv != NULL); + + /* If there are any driver level actions to take */ + if (drv->bdrv_reopen_commit) { + drv->bdrv_reopen_commit(reopen_state); + } + + /* set BDS specific flags now */ + reopen_state->bs->open_flags = reopen_state->flags; + reopen_state->bs->enable_write_cache = !!(reopen_state->flags & + BDRV_O_CACHE_WB); + reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); +} + +/* + * Abort the reopen, and delete and free the staged changes in + * reopen_state + */ +void bdrv_reopen_abort(BDRVReopenState *reopen_state) +{ + BlockDriver *drv; + + assert(reopen_state != NULL); + drv = reopen_state->bs->drv; + assert(drv != NULL); + + if (drv->bdrv_reopen_abort) { + drv->bdrv_reopen_abort(reopen_state); + } +} + + void bdrv_close(BlockDriverState *bs) { bdrv_flush(bs); diff --git a/block.h b/block.h index 4d919c2..db812b1 100644 --- a/block.h +++ b/block.h @@ -97,6 +97,14 @@ typedef enum { BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP } BlockQMPEventAction; +typedef struct BlockReopenQueueEntry { + bool prepared; + BDRVReopenState *state; + QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; +} BlockReopenQueueEntry; + +typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; + void bdrv_iostatus_enable(BlockDriverState *bs); void bdrv_iostatus_reset(BlockDriverState *bs); void bdrv_iostatus_disable(BlockDriverState *bs); @@ -131,6 +139,13 @@ int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags); int bdrv_open(BlockDriverState *bs, const char *filename, int flags, BlockDriver *drv); +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, int flags); +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp); +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, Error **errp); +void bdrv_reopen_commit(BDRVReopenState *reopen_state); +void bdrv_reopen_abort(BDRVReopenState *reopen_state); void bdrv_close(BlockDriverState *bs); int bdrv_attach_dev(BlockDriverState *bs, void *dev); void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev); diff --git a/block_int.h b/block_int.h index 4452f6f..7a4e226 100644 --- a/block_int.h +++ b/block_int.h @@ -139,6 +139,12 @@ struct BlockDriver { int instance_size; int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); int (*bdrv_probe_device)(const char *filename); + + /* For handling image reopen for split or non-split files */ + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, Error **errp); + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + int (*bdrv_open)(BlockDriverState *bs, int flags); int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags); int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num, @@ -336,6 +342,13 @@ struct BlockDriverState { /* long-running background operation */ BlockJob *job; + +}; + +struct BDRVReopenState { + BlockDriverState *bs; + int flags; + void *opaque; }; int get_tmp_filename(char *filename, int size); diff --git a/qemu-common.h b/qemu-common.h index e5c2bcd..6a6181c 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -245,6 +245,7 @@ typedef struct NICInfo NICInfo; typedef struct HCIInfo HCIInfo; typedef struct AudioState AudioState; typedef struct BlockDriverState BlockDriverState; +typedef struct BDRVReopenState BDRVReopenState; typedef struct DriveInfo DriveInfo; typedef struct DisplayState DisplayState; typedef struct DisplayChangeListener DisplayChangeListener;
This is based heavily on Supriya Kannery's bdrv_reopen() patch series. This provides a transactional method to reopen multiple images files safely. Image files are queue for reopen via bdrv_reopen_queue(), and the reopen occurs when bdrv_reopen_multiple() is called. Changes are staged in bdrv_reopen_prepare() and in the equivalent driver level functions. If any of the staged images fails a prepare, then all of the images left untouched, and the staged changes for each image abandoned. Signed-off-by: Jeff Cody <jcody@redhat.com> --- block.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ block.h | 15 ++++ block_int.h | 13 ++++ qemu-common.h | 1 + 4 files changed, 255 insertions(+)