Message ID | 1364810491-21404-3-git-send-email-xiawenc@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
Am 01.04.2013 um 12:01 hat Wenchao Xia geschrieben: > Now code for external snapshot are packaged as one case > in qmp_transaction, so later other operation could be added. > The logic in qmp_transaction is changed a bit: Original code > tries to create all images first and then update all *bdrv > once together, new code create and update one *bdrv one time, > and use bdrv_deappend() to rollback on fail. This allows mixing > different kind of requests in qmp_transaction() later. > > Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com> > --- > blockdev.c | 250 +++++++++++++++++++++++++++++++++++++----------------------- > 1 files changed, 153 insertions(+), 97 deletions(-) > > diff --git a/blockdev.c b/blockdev.c > index 8cdc9ce..75416fb 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -779,9 +779,155 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, > > > /* New and old BlockDriverState structs for group snapshots */ > -typedef struct BlkTransactionStates { > +typedef struct BdrvActionOps { > + int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp); > + void (*rollback)(BlockdevAction *action, void *opaque); > + void (*clean)(BlockdevAction *action, void *opaque); > +} BdrvActionOps; You don't really implement the transactional pattern that was used by the original code (and is used elsewhere). It should work like this: 1. Prepare: This stage performs all operations that can fail. They are not made active yet. For example with snapshotting, open a new BlockDriver state, but don't change the backing file chain yet. 2. Commit: Activate the change. This operation can never fail. For this reason, you never have to undo anything done here. 3. Rollback: Basically just free everything that prepare did up to the error. If you do it your way, you get into serious trouble if rollback involves operations that can fail. In the original code, here start the prepare: > @@ -806,125 +950,37 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) > /* We don't do anything in this loop that commits us to the snapshot */ > while (NULL != dev_entry) { > BlockdevAction *dev_info = NULL; > - BlockDriver *proto_drv; > - BlockDriver *drv; > - int flags; > - enum NewImageMode mode; > - const char *new_image_file; > - const char *device; > - const char *format = "qcow2"; > - > dev_info = dev_entry->value; > dev_entry = dev_entry->next; > > states = g_malloc0(sizeof(BlkTransactionStates)); > QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry); > > + states->action = dev_info; > switch (dev_info->kind) { > case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: > - device = dev_info->blockdev_snapshot_sync->device; > - if (!dev_info->blockdev_snapshot_sync->has_mode) { > - dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; > - } > - new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file; > - if (dev_info->blockdev_snapshot_sync->has_format) { > - format = dev_info->blockdev_snapshot_sync->format; > - } > - mode = dev_info->blockdev_snapshot_sync->mode; > + states->ops = &external_snapshot_ops; > break; > default: > abort(); > } > > - drv = bdrv_find_format(format); > - if (!drv) { > - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); > - goto delete_and_fail; > - } > - > - states->old_bs = bdrv_find(device); > - if (!states->old_bs) { > - error_set(errp, QERR_DEVICE_NOT_FOUND, device); > - goto delete_and_fail; > - } > - > - if (!bdrv_is_inserted(states->old_bs)) { > - error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); > - goto delete_and_fail; > - } > - > - if (bdrv_in_use(states->old_bs)) { > - error_set(errp, QERR_DEVICE_IN_USE, device); > - goto delete_and_fail; > - } > - > - if (!bdrv_is_read_only(states->old_bs)) { > - if (bdrv_flush(states->old_bs)) { > - error_set(errp, QERR_IO_ERROR); > - goto delete_and_fail; > - } > - } > - > - flags = states->old_bs->open_flags; > - > - proto_drv = bdrv_find_protocol(new_image_file); > - if (!proto_drv) { > - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); > - goto delete_and_fail; > - } > - > - /* create new image w/backing file */ > - if (mode != NEW_IMAGE_MODE_EXISTING) { > - bdrv_img_create(new_image_file, format, > - states->old_bs->filename, > - states->old_bs->drv->format_name, > - NULL, -1, flags, &local_err, false); > - if (error_is_set(&local_err)) { > - error_propagate(errp, local_err); > - goto delete_and_fail; > - } > - } > - > - /* We will manually add the backing_hd field to the bs later */ > - states->new_bs = bdrv_new(""); > - /* TODO Inherit bs->options or only take explicit options with an > - * extended QMP command? */ > - ret = bdrv_open(states->new_bs, new_image_file, NULL, > - flags | BDRV_O_NO_BACKING, drv); > - if (ret != 0) { > - error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); > + if (states->ops->commit(states->action, &states->opaque, errp)) { > goto delete_and_fail; > } > } The following block is the commit: > - > - /* Now we are going to do the actual pivot. Everything up to this point > - * is reversible, but we are committed at this point */ > - QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { > - /* This removes our old bs from the bdrv_states, and adds the new bs */ > - bdrv_append(states->new_bs, states->old_bs); > - /* We don't need (or want) to use the transactional > - * bdrv_reopen_multiple() across all the entries at once, because we > - * don't want to abort all of them if one of them fails the reopen */ > - bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR, > - NULL); > - } > - > /* success */ > goto exit; And this is rollback: > delete_and_fail: > - /* > - * failure, and it is all-or-none; abandon each new bs, and keep using > - * the original bs for all images > - */ > QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { > - if (states->new_bs) { > - bdrv_delete(states->new_bs); > - } > + states->ops->rollback(states->action, states->opaque); > } Kevin
于 2013-4-2 21:55, Kevin Wolf 写道: > Am 01.04.2013 um 12:01 hat Wenchao Xia geschrieben: >> Now code for external snapshot are packaged as one case >> in qmp_transaction, so later other operation could be added. >> The logic in qmp_transaction is changed a bit: Original code >> tries to create all images first and then update all *bdrv >> once together, new code create and update one *bdrv one time, >> and use bdrv_deappend() to rollback on fail. This allows mixing >> different kind of requests in qmp_transaction() later. >> >> Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com> >> --- >> blockdev.c | 250 +++++++++++++++++++++++++++++++++++++----------------------- >> 1 files changed, 153 insertions(+), 97 deletions(-) >> >> diff --git a/blockdev.c b/blockdev.c >> index 8cdc9ce..75416fb 100644 >> --- a/blockdev.c >> +++ b/blockdev.c >> @@ -779,9 +779,155 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, >> >> >> /* New and old BlockDriverState structs for group snapshots */ >> -typedef struct BlkTransactionStates { >> +typedef struct BdrvActionOps { >> + int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp); >> + void (*rollback)(BlockdevAction *action, void *opaque); >> + void (*clean)(BlockdevAction *action, void *opaque); >> +} BdrvActionOps; > > You don't really implement the transactional pattern that was used by > the original code (and is used elsewhere). It should work like this: > > 1. Prepare: This stage performs all operations that can fail. They are > not made active yet. For example with snapshotting, open a new > BlockDriver state, but don't change the backing file chain yet. > > 2. Commit: Activate the change. This operation can never fail. For this > reason, you never have to undo anything done here. > > 3. Rollback: Basically just free everything that prepare did up to the > error. > > If you do it your way, you get into serious trouble if rollback involves > operations that can fail. > > In the original code, here start the prepare: > That is a clear comment, thanks. I changed the model since expecting commit operation may need rollback, if not I am confident to keep original model. Since bdrv_snapshot_delete() can fail, I guess assertion of its success should be used in the rollback later. >> @@ -806,125 +950,37 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) >> /* We don't do anything in this loop that commits us to the snapshot */ >> while (NULL != dev_entry) { >> BlockdevAction *dev_info = NULL; >> - BlockDriver *proto_drv; >> - BlockDriver *drv; >> - int flags; >> - enum NewImageMode mode; >> - const char *new_image_file; >> - const char *device; >> - const char *format = "qcow2"; >> - >> dev_info = dev_entry->value; >> dev_entry = dev_entry->next; >> >> states = g_malloc0(sizeof(BlkTransactionStates)); >> QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry); >> >> + states->action = dev_info; >> switch (dev_info->kind) { >> case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: >> - device = dev_info->blockdev_snapshot_sync->device; >> - if (!dev_info->blockdev_snapshot_sync->has_mode) { >> - dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; >> - } >> - new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file; >> - if (dev_info->blockdev_snapshot_sync->has_format) { >> - format = dev_info->blockdev_snapshot_sync->format; >> - } >> - mode = dev_info->blockdev_snapshot_sync->mode; >> + states->ops = &external_snapshot_ops; >> break; >> default: >> abort(); >> } >> >> - drv = bdrv_find_format(format); >> - if (!drv) { >> - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); >> - goto delete_and_fail; >> - } >> - >> - states->old_bs = bdrv_find(device); >> - if (!states->old_bs) { >> - error_set(errp, QERR_DEVICE_NOT_FOUND, device); >> - goto delete_and_fail; >> - } >> - >> - if (!bdrv_is_inserted(states->old_bs)) { >> - error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); >> - goto delete_and_fail; >> - } >> - >> - if (bdrv_in_use(states->old_bs)) { >> - error_set(errp, QERR_DEVICE_IN_USE, device); >> - goto delete_and_fail; >> - } >> - >> - if (!bdrv_is_read_only(states->old_bs)) { >> - if (bdrv_flush(states->old_bs)) { >> - error_set(errp, QERR_IO_ERROR); >> - goto delete_and_fail; >> - } >> - } >> - >> - flags = states->old_bs->open_flags; >> - >> - proto_drv = bdrv_find_protocol(new_image_file); >> - if (!proto_drv) { >> - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); >> - goto delete_and_fail; >> - } >> - >> - /* create new image w/backing file */ >> - if (mode != NEW_IMAGE_MODE_EXISTING) { >> - bdrv_img_create(new_image_file, format, >> - states->old_bs->filename, >> - states->old_bs->drv->format_name, >> - NULL, -1, flags, &local_err, false); >> - if (error_is_set(&local_err)) { >> - error_propagate(errp, local_err); >> - goto delete_and_fail; >> - } >> - } >> - >> - /* We will manually add the backing_hd field to the bs later */ >> - states->new_bs = bdrv_new(""); >> - /* TODO Inherit bs->options or only take explicit options with an >> - * extended QMP command? */ >> - ret = bdrv_open(states->new_bs, new_image_file, NULL, >> - flags | BDRV_O_NO_BACKING, drv); >> - if (ret != 0) { >> - error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); >> + if (states->ops->commit(states->action, &states->opaque, errp)) { >> goto delete_and_fail; >> } >> } > > The following block is the commit: > >> - >> - /* Now we are going to do the actual pivot. Everything up to this point >> - * is reversible, but we are committed at this point */ >> - QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { >> - /* This removes our old bs from the bdrv_states, and adds the new bs */ >> - bdrv_append(states->new_bs, states->old_bs); >> - /* We don't need (or want) to use the transactional >> - * bdrv_reopen_multiple() across all the entries at once, because we >> - * don't want to abort all of them if one of them fails the reopen */ >> - bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR, >> - NULL); >> - } >> - >> /* success */ >> goto exit; > > And this is rollback: > >> delete_and_fail: >> - /* >> - * failure, and it is all-or-none; abandon each new bs, and keep using >> - * the original bs for all images >> - */ >> QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { >> - if (states->new_bs) { >> - bdrv_delete(states->new_bs); >> - } >> + states->ops->rollback(states->action, states->opaque); >> } > > Kevin >
----- Messaggio originale ----- > Da: "Wenchao Xia" <xiawenc@linux.vnet.ibm.com> > A: "Kevin Wolf" <kwolf@redhat.com> > Cc: pbonzini@redhat.com, qemu-devel@nongnu.org, dietmar@proxmox.com, stefanha@gmail.com > Inviato: Mercoledì, 3 aprile 2013 7:51:43 > Oggetto: Re: [Qemu-devel] [PATCH 2/3] block: adjust qmp_transaction to be extendable > > 于 2013-4-2 21:55, Kevin Wolf 写道: > > Am 01.04.2013 um 12:01 hat Wenchao Xia geschrieben: > >> Now code for external snapshot are packaged as one case > >> in qmp_transaction, so later other operation could be added. > >> The logic in qmp_transaction is changed a bit: Original code > >> tries to create all images first and then update all *bdrv > >> once together, new code create and update one *bdrv one time, > >> and use bdrv_deappend() to rollback on fail. This allows mixing > >> different kind of requests in qmp_transaction() later. > >> > >> Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com> > >> --- > >> blockdev.c | 250 > >> +++++++++++++++++++++++++++++++++++++----------------------- > >> 1 files changed, 153 insertions(+), 97 deletions(-) > >> > >> diff --git a/blockdev.c b/blockdev.c > >> index 8cdc9ce..75416fb 100644 > >> --- a/blockdev.c > >> +++ b/blockdev.c > >> @@ -779,9 +779,155 @@ void qmp_blockdev_snapshot_sync(const char *device, > >> const char *snapshot_file, > >> > >> > >> /* New and old BlockDriverState structs for group snapshots */ > >> -typedef struct BlkTransactionStates { > >> +typedef struct BdrvActionOps { > >> + int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp); > >> + void (*rollback)(BlockdevAction *action, void *opaque); > >> + void (*clean)(BlockdevAction *action, void *opaque); > >> +} BdrvActionOps; > > > > You don't really implement the transactional pattern that was used by > > the original code (and is used elsewhere). It should work like this: > > > > 1. Prepare: This stage performs all operations that can fail. They are > > not made active yet. For example with snapshotting, open a new > > BlockDriver state, but don't change the backing file chain yet. > > > > 2. Commit: Activate the change. This operation can never fail. For this > > reason, you never have to undo anything done here. > > > > 3. Rollback: Basically just free everything that prepare did up to the > > error. > > > > If you do it your way, you get into serious trouble if rollback involves > > operations that can fail. > > > > In the original code, here start the prepare: > > That is a clear comment, thanks. I changed the model since expecting > commit operation may need rollback, if not I am confident to keep > original model. > Since bdrv_snapshot_delete() can fail, I guess assertion of its > success should be used in the rollback later. No, if bdrv_snapshot_delete() can fail, you need to split it in two parts: one that can fail, and one that cannot. If you cannot, then there are two possibilities: - if the failures are minor and could be repaired with "qemu-img check -r" (e.g. lost clusters), then this is not optimal but can still be done; - otherwise, the operation simply cannot be made transactionable. In the case of qcow2_snapshot_delete, everything except ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), &header_data, sizeof(header_data)); if (ret < 0) { goto fail; } must be in the prepare phase. Everything after "fail" (which right now is nothing, but it should at least undo the qcow2_alloc_clusters operation) must be in the rollback phase. Everything in the middle is the commit phase. Paolo > > >> @@ -806,125 +950,37 @@ void qmp_transaction(BlockdevActionList *dev_list, > >> Error **errp) > >> /* We don't do anything in this loop that commits us to the snapshot > >> */ > >> while (NULL != dev_entry) { > >> BlockdevAction *dev_info = NULL; > >> - BlockDriver *proto_drv; > >> - BlockDriver *drv; > >> - int flags; > >> - enum NewImageMode mode; > >> - const char *new_image_file; > >> - const char *device; > >> - const char *format = "qcow2"; > >> - > >> dev_info = dev_entry->value; > >> dev_entry = dev_entry->next; > >> > >> states = g_malloc0(sizeof(BlkTransactionStates)); > >> QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry); > >> > >> + states->action = dev_info; > >> switch (dev_info->kind) { > >> case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: > >> - device = dev_info->blockdev_snapshot_sync->device; > >> - if (!dev_info->blockdev_snapshot_sync->has_mode) { > >> - dev_info->blockdev_snapshot_sync->mode = > >> NEW_IMAGE_MODE_ABSOLUTE_PATHS; > >> - } > >> - new_image_file = > >> dev_info->blockdev_snapshot_sync->snapshot_file; > >> - if (dev_info->blockdev_snapshot_sync->has_format) { > >> - format = dev_info->blockdev_snapshot_sync->format; > >> - } > >> - mode = dev_info->blockdev_snapshot_sync->mode; > >> + states->ops = &external_snapshot_ops; > >> break; > >> default: > >> abort(); > >> } > >> > >> - drv = bdrv_find_format(format); > >> - if (!drv) { > >> - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); > >> - goto delete_and_fail; > >> - } > >> - > >> - states->old_bs = bdrv_find(device); > >> - if (!states->old_bs) { > >> - error_set(errp, QERR_DEVICE_NOT_FOUND, device); > >> - goto delete_and_fail; > >> - } > >> - > >> - if (!bdrv_is_inserted(states->old_bs)) { > >> - error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); > >> - goto delete_and_fail; > >> - } > >> - > >> - if (bdrv_in_use(states->old_bs)) { > >> - error_set(errp, QERR_DEVICE_IN_USE, device); > >> - goto delete_and_fail; > >> - } > >> - > >> - if (!bdrv_is_read_only(states->old_bs)) { > >> - if (bdrv_flush(states->old_bs)) { > >> - error_set(errp, QERR_IO_ERROR); > >> - goto delete_and_fail; > >> - } > >> - } > >> - > >> - flags = states->old_bs->open_flags; > >> - > >> - proto_drv = bdrv_find_protocol(new_image_file); > >> - if (!proto_drv) { > >> - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); > >> - goto delete_and_fail; > >> - } > >> - > >> - /* create new image w/backing file */ > >> - if (mode != NEW_IMAGE_MODE_EXISTING) { > >> - bdrv_img_create(new_image_file, format, > >> - states->old_bs->filename, > >> - states->old_bs->drv->format_name, > >> - NULL, -1, flags, &local_err, false); > >> - if (error_is_set(&local_err)) { > >> - error_propagate(errp, local_err); > >> - goto delete_and_fail; > >> - } > >> - } > >> - > >> - /* We will manually add the backing_hd field to the bs later */ > >> - states->new_bs = bdrv_new(""); > >> - /* TODO Inherit bs->options or only take explicit options with an > >> - * extended QMP command? */ > >> - ret = bdrv_open(states->new_bs, new_image_file, NULL, > >> - flags | BDRV_O_NO_BACKING, drv); > >> - if (ret != 0) { > >> - error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); > >> + if (states->ops->commit(states->action, &states->opaque, errp)) { > >> goto delete_and_fail; > >> } > >> } > > > > The following block is the commit: > > > >> - > >> - /* Now we are going to do the actual pivot. Everything up to this > >> point > >> - * is reversible, but we are committed at this point */ > >> - QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { > >> - /* This removes our old bs from the bdrv_states, and adds the new > >> bs */ > >> - bdrv_append(states->new_bs, states->old_bs); > >> - /* We don't need (or want) to use the transactional > >> - * bdrv_reopen_multiple() across all the entries at once, because > >> we > >> - * don't want to abort all of them if one of them fails the > >> reopen */ > >> - bdrv_reopen(states->new_bs, states->new_bs->open_flags & > >> ~BDRV_O_RDWR, > >> - NULL); > >> - } > >> - > >> /* success */ > >> goto exit; > > > > And this is rollback: > > > >> delete_and_fail: > >> - /* > >> - * failure, and it is all-or-none; abandon each new bs, and keep using > >> - * the original bs for all images > >> - */ > >> QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { > >> - if (states->new_bs) { > >> - bdrv_delete(states->new_bs); > >> - } > >> + states->ops->rollback(states->action, states->opaque); > >> } > > > > Kevin > > > > > -- > Best Regards > > Wenchao Xia > >
> > No, if bdrv_snapshot_delete() can fail, you need to split it in two > parts: one that can fail, and one that cannot. If you cannot, then > there are two possibilities: > > - if the failures are minor and could be repaired with "qemu-img check -r" > (e.g. lost clusters), then this is not optimal but can still be done; > > - otherwise, the operation simply cannot be made transactionable. > > In the case of qcow2_snapshot_delete, everything except > > ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), > &header_data, sizeof(header_data)); > if (ret < 0) { > goto fail; > } > > must be in the prepare phase. Everything after "fail" (which right now > is nothing, but it should at least undo the qcow2_alloc_clusters operation) > must be in the rollback phase. Everything in the middle is the commit > phase. > > Paolo > Sorry I haven't state it clearly. What about bdrv_snapshot_create() operation? If it need to be rolled back, I think bdrv_snapshot_delete() will get called and it may fail. But in most case if bdrv_snapshot_create() succeed before, the bdrv_snapshot_delete should succeed also, so if fail there may be unexpected error below, could assert be used for this? >> >>>> @@ -806,125 +950,37 @@ void qmp_transaction(BlockdevActionList *dev_list, >>>> Error **errp) >>>> /* We don't do anything in this loop that commits us to the snapshot >>>> */ >>>> while (NULL != dev_entry) { >>>> BlockdevAction *dev_info = NULL; >>>> - BlockDriver *proto_drv; >>>> - BlockDriver *drv; >>>> - int flags; >>>> - enum NewImageMode mode; >>>> - const char *new_image_file; >>>> - const char *device; >>>> - const char *format = "qcow2"; >>>> - >>>> dev_info = dev_entry->value; >>>> dev_entry = dev_entry->next; >>>> >>>> states = g_malloc0(sizeof(BlkTransactionStates)); >>>> QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry); >>>> >>>> + states->action = dev_info; >>>> switch (dev_info->kind) { >>>> case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: >>>> - device = dev_info->blockdev_snapshot_sync->device; >>>> - if (!dev_info->blockdev_snapshot_sync->has_mode) { >>>> - dev_info->blockdev_snapshot_sync->mode = >>>> NEW_IMAGE_MODE_ABSOLUTE_PATHS; >>>> - } >>>> - new_image_file = >>>> dev_info->blockdev_snapshot_sync->snapshot_file; >>>> - if (dev_info->blockdev_snapshot_sync->has_format) { >>>> - format = dev_info->blockdev_snapshot_sync->format; >>>> - } >>>> - mode = dev_info->blockdev_snapshot_sync->mode; >>>> + states->ops = &external_snapshot_ops; >>>> break; >>>> default: >>>> abort(); >>>> } >>>> >>>> - drv = bdrv_find_format(format); >>>> - if (!drv) { >>>> - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); >>>> - goto delete_and_fail; >>>> - } >>>> - >>>> - states->old_bs = bdrv_find(device); >>>> - if (!states->old_bs) { >>>> - error_set(errp, QERR_DEVICE_NOT_FOUND, device); >>>> - goto delete_and_fail; >>>> - } >>>> - >>>> - if (!bdrv_is_inserted(states->old_bs)) { >>>> - error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); >>>> - goto delete_and_fail; >>>> - } >>>> - >>>> - if (bdrv_in_use(states->old_bs)) { >>>> - error_set(errp, QERR_DEVICE_IN_USE, device); >>>> - goto delete_and_fail; >>>> - } >>>> - >>>> - if (!bdrv_is_read_only(states->old_bs)) { >>>> - if (bdrv_flush(states->old_bs)) { >>>> - error_set(errp, QERR_IO_ERROR); >>>> - goto delete_and_fail; >>>> - } >>>> - } >>>> - >>>> - flags = states->old_bs->open_flags; >>>> - >>>> - proto_drv = bdrv_find_protocol(new_image_file); >>>> - if (!proto_drv) { >>>> - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); >>>> - goto delete_and_fail; >>>> - } >>>> - >>>> - /* create new image w/backing file */ >>>> - if (mode != NEW_IMAGE_MODE_EXISTING) { >>>> - bdrv_img_create(new_image_file, format, >>>> - states->old_bs->filename, >>>> - states->old_bs->drv->format_name, >>>> - NULL, -1, flags, &local_err, false); >>>> - if (error_is_set(&local_err)) { >>>> - error_propagate(errp, local_err); >>>> - goto delete_and_fail; >>>> - } >>>> - } >>>> - >>>> - /* We will manually add the backing_hd field to the bs later */ >>>> - states->new_bs = bdrv_new(""); >>>> - /* TODO Inherit bs->options or only take explicit options with an >>>> - * extended QMP command? */ >>>> - ret = bdrv_open(states->new_bs, new_image_file, NULL, >>>> - flags | BDRV_O_NO_BACKING, drv); >>>> - if (ret != 0) { >>>> - error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); >>>> + if (states->ops->commit(states->action, &states->opaque, errp)) { >>>> goto delete_and_fail; >>>> } >>>> } >>> >>> The following block is the commit: >>> >>>> - >>>> - /* Now we are going to do the actual pivot. Everything up to this >>>> point >>>> - * is reversible, but we are committed at this point */ >>>> - QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { >>>> - /* This removes our old bs from the bdrv_states, and adds the new >>>> bs */ >>>> - bdrv_append(states->new_bs, states->old_bs); >>>> - /* We don't need (or want) to use the transactional >>>> - * bdrv_reopen_multiple() across all the entries at once, because >>>> we >>>> - * don't want to abort all of them if one of them fails the >>>> reopen */ >>>> - bdrv_reopen(states->new_bs, states->new_bs->open_flags & >>>> ~BDRV_O_RDWR, >>>> - NULL); >>>> - } >>>> - >>>> /* success */ >>>> goto exit; >>> >>> And this is rollback: >>> >>>> delete_and_fail: >>>> - /* >>>> - * failure, and it is all-or-none; abandon each new bs, and keep using >>>> - * the original bs for all images >>>> - */ >>>> QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { >>>> - if (states->new_bs) { >>>> - bdrv_delete(states->new_bs); >>>> - } >>>> + states->ops->rollback(states->action, states->opaque); >>>> } >>> >>> Kevin >>> >> >> >> -- >> Best Regards >> >> Wenchao Xia >> >> >
Il 03/04/2013 11:02, Wenchao Xia ha scritto: >> > Sorry I haven't state it clearly. What about bdrv_snapshot_create() > operation? If it need to be rolled back, I think bdrv_snapshot_delete() > will get called and it may fail. But in most case if > bdrv_snapshot_create() succeed before, the bdrv_snapshot_delete should > succeed also, so if fail there may be unexpected error below, could > assert be used for this? No. Transactionable means that commit and rollback cannot fail. For bdrv_snapshot_create() it is the same as for bdrv_snapshot_delete() (the overview I wrote was for qcow2_write_snapshots, which is the common part of bdrv_snapshot_create() and bdrv_snapshot_delete()). Please do one thing at a time. Split the series in multiple pieces if possible. Otherwise you will just fail, and you will have wasted a lot of your and other people's time. Paolo
Am 03.04.2013 um 11:02 hat Wenchao Xia geschrieben: > > > >No, if bdrv_snapshot_delete() can fail, you need to split it in two > >parts: one that can fail, and one that cannot. If you cannot, then > >there are two possibilities: > > > >- if the failures are minor and could be repaired with "qemu-img check -r" > >(e.g. lost clusters), then this is not optimal but can still be done; > > > >- otherwise, the operation simply cannot be made transactionable. > > > >In the case of qcow2_snapshot_delete, everything except > > > > ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), > > &header_data, sizeof(header_data)); > > if (ret < 0) { > > goto fail; > > } > > > >must be in the prepare phase. Everything after "fail" (which right now > >is nothing, but it should at least undo the qcow2_alloc_clusters operation) > >must be in the rollback phase. Everything in the middle is the commit > >phase. > > > >Paolo > > > Sorry I haven't state it clearly. What about bdrv_snapshot_create() > operation? If it need to be rolled back, I think bdrv_snapshot_delete() > will get called and it may fail. This means that you're doing it wrong. Instead of deleting the snapshot on rollback, you shouldn't complete the creation until commit. If the one bdrv_pwrite_sync() in the commit stage fails, we cannot maintain transactional semantics. I guess its unlikely enough. > But in most case if > bdrv_snapshot_create() succeed before, the bdrv_snapshot_delete should > succeed also, so if fail there may be unexpected error below, could > assert be used for this? assert() is only for programming errors, not for things like hardware failure. Kevin
于 2013-4-3 17:02, Wenchao Xia 写道: >> >> No, if bdrv_snapshot_delete() can fail, you need to split it in two >> parts: one that can fail, and one that cannot. If you cannot, then >> there are two possibilities: >> >> - if the failures are minor and could be repaired with "qemu-img check >> -r" >> (e.g. lost clusters), then this is not optimal but can still be done; >> >> - otherwise, the operation simply cannot be made transactionable. >> >> In the case of qcow2_snapshot_delete, everything except >> >> ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), >> &header_data, sizeof(header_data)); >> if (ret < 0) { >> goto fail; >> } >> >> must be in the prepare phase. Everything after "fail" (which right now >> is nothing, but it should at least undo the qcow2_alloc_clusters >> operation) >> must be in the rollback phase. Everything in the middle is the commit >> phase. >> >> Paolo >> > Sorry I haven't state it clearly. What about bdrv_snapshot_create() > operation? If it need to be rolled back, I think bdrv_snapshot_delete() > will get called and it may fail. But in most case if > bdrv_snapshot_create() succeed before, the bdrv_snapshot_delete should > succeed also, so if fail there may be unexpected error below, could > assert be used for this? > After consideration again, I think bdrv_snapshot_create() should be split apart like above, thank u for the tip.
On Mon, Apr 01, 2013 at 06:01:30PM +0800, Wenchao Xia wrote: > /* New and old BlockDriverState structs for group snapshots */ > -typedef struct BlkTransactionStates { > +typedef struct BdrvActionOps { > + int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp); > + void (*rollback)(BlockdevAction *action, void *opaque); > + void (*clean)(BlockdevAction *action, void *opaque); Please document these functions. > +const BdrvActionOps external_snapshot_ops = { > + .commit = external_snapshot_commit, > + .rollback = external_snapshot_rollback, > + .clean = external_snapshot_clean, > +}; > + > +typedef struct BlkTransactionStates { > + BlockdevAction *action; > + void *opaque; > + const BdrvActionOps *ops; > QSIMPLEQ_ENTRY(BlkTransactionStates) entry; > } BlkTransactionStates; The relationship between BlkTransactionStates and ExternalSnapshotState can be simplified: typedef struct { int (*commit)(BlkTransactionState *state, Error **errp); void (*rollback)(BlkTransactionState *state); void (*clean)(BlkTransactionState *state); size_t instance_size; } BdrvActionOps; typedef struct BlkTransactionState { BlockDevAction *action; const BdrvActionOps *ops; QSIMPLEQ_ENTRY(BlkTransactionState) entry; } BlkTransactionState; typedef struct { BlkTransactionState common; BlockDriverState *old_bs; BlockDriverState *new_bs; } ExternalSnapshotState; const BdrvActionOps external_snapshot_ops = { .commit = external_snapshot_commit, .rollback = external_snapshot_rollback, .clean = external_snapshot_clean, .instance_size = sizeof(ExternalSnapshotState); }; This eliminates the opaque pointer and g_free(state) can be handled by qmp_transaction(). This way action types no longer need to free opaque.
于 2013-4-17 22:42, Stefan Hajnoczi 写道: > On Mon, Apr 01, 2013 at 06:01:30PM +0800, Wenchao Xia wrote: >> /* New and old BlockDriverState structs for group snapshots */ >> -typedef struct BlkTransactionStates { >> +typedef struct BdrvActionOps { >> + int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp); >> + void (*rollback)(BlockdevAction *action, void *opaque); >> + void (*clean)(BlockdevAction *action, void *opaque); > > Please document these functions. > OK. >> +const BdrvActionOps external_snapshot_ops = { >> + .commit = external_snapshot_commit, >> + .rollback = external_snapshot_rollback, >> + .clean = external_snapshot_clean, >> +}; >> + >> +typedef struct BlkTransactionStates { >> + BlockdevAction *action; >> + void *opaque; >> + const BdrvActionOps *ops; >> QSIMPLEQ_ENTRY(BlkTransactionStates) entry; >> } BlkTransactionStates; > > The relationship between BlkTransactionStates and ExternalSnapshotState > can be simplified: > > typedef struct { > int (*commit)(BlkTransactionState *state, Error **errp); > void (*rollback)(BlkTransactionState *state); > void (*clean)(BlkTransactionState *state); > size_t instance_size; > } BdrvActionOps; > > typedef struct BlkTransactionState { > BlockDevAction *action; > const BdrvActionOps *ops; > QSIMPLEQ_ENTRY(BlkTransactionState) entry; > } BlkTransactionState; > > typedef struct { > BlkTransactionState common; > BlockDriverState *old_bs; > BlockDriverState *new_bs; > } ExternalSnapshotState; > > const BdrvActionOps external_snapshot_ops = { > .commit = external_snapshot_commit, > .rollback = external_snapshot_rollback, > .clean = external_snapshot_clean, > .instance_size = sizeof(ExternalSnapshotState); > }; > > This eliminates the opaque pointer and g_free(state) can be handled by > qmp_transaction(). This way action types no longer need to free opaque. > Where should be ExternalSnapshotState* placed, insert it into BlkTransactionState, or BdrvActionOps?
On Thu, Apr 18, 2013 at 5:00 AM, Wenchao Xia <xiawenc@linux.vnet.ibm.com> wrote: > 于 2013-4-17 22:42, Stefan Hajnoczi 写道: > >> On Mon, Apr 01, 2013 at 06:01:30PM +0800, Wenchao Xia wrote: >>> >>> /* New and old BlockDriverState structs for group snapshots */ >>> -typedef struct BlkTransactionStates { >>> +typedef struct BdrvActionOps { >>> + int (*commit)(BlockdevAction *action, void **p_opaque, Error >>> **errp); >>> + void (*rollback)(BlockdevAction *action, void *opaque); >>> + void (*clean)(BlockdevAction *action, void *opaque); >> >> >> Please document these functions. >> > OK. > > >>> +const BdrvActionOps external_snapshot_ops = { >>> + .commit = external_snapshot_commit, >>> + .rollback = external_snapshot_rollback, >>> + .clean = external_snapshot_clean, >>> +}; >>> + >>> +typedef struct BlkTransactionStates { >>> + BlockdevAction *action; >>> + void *opaque; >>> + const BdrvActionOps *ops; >>> QSIMPLEQ_ENTRY(BlkTransactionStates) entry; >>> } BlkTransactionStates; >> >> >> The relationship between BlkTransactionStates and ExternalSnapshotState >> can be simplified: >> >> typedef struct { >> int (*commit)(BlkTransactionState *state, Error **errp); >> void (*rollback)(BlkTransactionState *state); >> void (*clean)(BlkTransactionState *state); >> size_t instance_size; >> } BdrvActionOps; >> >> typedef struct BlkTransactionState { >> BlockDevAction *action; >> const BdrvActionOps *ops; >> QSIMPLEQ_ENTRY(BlkTransactionState) entry; >> } BlkTransactionState; >> >> typedef struct { >> BlkTransactionState common; >> BlockDriverState *old_bs; >> BlockDriverState *new_bs; >> } ExternalSnapshotState; >> >> const BdrvActionOps external_snapshot_ops = { >> .commit = external_snapshot_commit, >> .rollback = external_snapshot_rollback, >> .clean = external_snapshot_clean, >> .instance_size = sizeof(ExternalSnapshotState); >> }; >> >> This eliminates the opaque pointer and g_free(state) can be handled by >> qmp_transaction(). This way action types no longer need to free opaque. >> > Where should be ExternalSnapshotState* placed, insert it into > BlkTransactionState, or BdrvActionOps? No explicit pointer is needed since ExternalSnapshotState embeds BlkTransactionState. See container_of() or DO_UPCAST(). Stefan
diff --git a/blockdev.c b/blockdev.c index 8cdc9ce..75416fb 100644 --- a/blockdev.c +++ b/blockdev.c @@ -779,9 +779,155 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, /* New and old BlockDriverState structs for group snapshots */ -typedef struct BlkTransactionStates { +typedef struct BdrvActionOps { + int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp); + void (*rollback)(BlockdevAction *action, void *opaque); + void (*clean)(BlockdevAction *action, void *opaque); +} BdrvActionOps; + +typedef struct ExternalSnapshotState { BlockDriverState *old_bs; BlockDriverState *new_bs; +} ExternalSnapshotState; + +static int external_snapshot_commit(BlockdevAction *action, + void **p_opaque, + Error **errp) +{ + const char *device; + const char *new_image_file; + const char *format = "qcow2"; + enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + BlockDriver *drv, *proto_drv; + ExternalSnapshotState *states = NULL; + int flags, ret; + Error *local_err = NULL; + + g_assert(action->kind == BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC); + + /* get parameters */ + device = action->blockdev_snapshot_sync->device; + new_image_file = action->blockdev_snapshot_sync->snapshot_file; + if (action->blockdev_snapshot_sync->has_format) { + format = action->blockdev_snapshot_sync->format; + } + if (action->blockdev_snapshot_sync->has_mode) { + mode = action->blockdev_snapshot_sync->mode; + } + + /* start processing */ + states = g_malloc0(sizeof(*states)); + drv = bdrv_find_format(format); + if (!drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + goto fail; + } + + states->old_bs = bdrv_find(device); + if (!states->old_bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + goto fail; + } + + if (!bdrv_is_inserted(states->old_bs)) { + error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); + goto fail; + } + + if (bdrv_in_use(states->old_bs)) { + error_set(errp, QERR_DEVICE_IN_USE, device); + goto fail; + } + + if (!bdrv_is_read_only(states->old_bs)) { + if (bdrv_flush(states->old_bs)) { + error_set(errp, QERR_IO_ERROR); + goto fail; + } + } + + proto_drv = bdrv_find_protocol(new_image_file); + if (!proto_drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + goto fail; + } + + flags = states->old_bs->open_flags; + + /* create new image w/backing file */ + if (mode != NEW_IMAGE_MODE_EXISTING) { + bdrv_img_create(new_image_file, format, + states->old_bs->filename, + states->old_bs->drv->format_name, + NULL, -1, flags, &local_err, false); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto fail; + } + } + + states->new_bs = bdrv_new(""); + /* TODO Inherit bs->options or only take explicit options with an + * extended QMP command? */ + ret = bdrv_open(states->new_bs, new_image_file, NULL, + flags | BDRV_O_NO_BACKING, drv); + if (ret != 0) { + error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); + goto fail; + } + + /* This removes our old bs from the bdrv_states, and adds the new bs */ + bdrv_append(states->new_bs, states->old_bs); + /* We don't need (or want) to use the transactional + * bdrv_reopen_multiple() across all the entries at once, because we + * don't want to abort all of them if one of them fails the reopen */ + bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR, + NULL); + + *p_opaque = states; + return 0; + +fail: + if (states) { + if (states->new_bs) { + bdrv_delete(states->new_bs); + } + g_free(states); + } + return -1; +} + +static void external_snapshot_rollback(BlockdevAction *action, + void *opaque) +{ + ExternalSnapshotState *states = opaque; + int open_flags; + + if (states) { + open_flags = states->old_bs->open_flags; + bdrv_deappend(states->old_bs); + bdrv_reopen(states->old_bs, open_flags, NULL); + } +} + +static void external_snapshot_clean(BlockdevAction *action, + void *opaque) +{ + ExternalSnapshotState *states = opaque; + + g_free(states); +} + +const BdrvActionOps external_snapshot_ops = { + .commit = external_snapshot_commit, + .rollback = external_snapshot_rollback, + .clean = external_snapshot_clean, +}; + +typedef struct BlkTransactionStates { + BlockdevAction *action; + void *opaque; + const BdrvActionOps *ops; QSIMPLEQ_ENTRY(BlkTransactionStates) entry; } BlkTransactionStates; @@ -792,10 +938,8 @@ typedef struct BlkTransactionStates { */ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) { - int ret = 0; BlockdevActionList *dev_entry = dev_list; BlkTransactionStates *states, *next; - Error *local_err = NULL; QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states; QSIMPLEQ_INIT(&snap_bdrv_states); @@ -806,125 +950,37 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) /* We don't do anything in this loop that commits us to the snapshot */ while (NULL != dev_entry) { BlockdevAction *dev_info = NULL; - BlockDriver *proto_drv; - BlockDriver *drv; - int flags; - enum NewImageMode mode; - const char *new_image_file; - const char *device; - const char *format = "qcow2"; - dev_info = dev_entry->value; dev_entry = dev_entry->next; states = g_malloc0(sizeof(BlkTransactionStates)); QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry); + states->action = dev_info; switch (dev_info->kind) { case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: - device = dev_info->blockdev_snapshot_sync->device; - if (!dev_info->blockdev_snapshot_sync->has_mode) { - dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; - } - new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file; - if (dev_info->blockdev_snapshot_sync->has_format) { - format = dev_info->blockdev_snapshot_sync->format; - } - mode = dev_info->blockdev_snapshot_sync->mode; + states->ops = &external_snapshot_ops; break; default: abort(); } - drv = bdrv_find_format(format); - if (!drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - goto delete_and_fail; - } - - states->old_bs = bdrv_find(device); - if (!states->old_bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, device); - goto delete_and_fail; - } - - if (!bdrv_is_inserted(states->old_bs)) { - error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); - goto delete_and_fail; - } - - if (bdrv_in_use(states->old_bs)) { - error_set(errp, QERR_DEVICE_IN_USE, device); - goto delete_and_fail; - } - - if (!bdrv_is_read_only(states->old_bs)) { - if (bdrv_flush(states->old_bs)) { - error_set(errp, QERR_IO_ERROR); - goto delete_and_fail; - } - } - - flags = states->old_bs->open_flags; - - proto_drv = bdrv_find_protocol(new_image_file); - if (!proto_drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - goto delete_and_fail; - } - - /* create new image w/backing file */ - if (mode != NEW_IMAGE_MODE_EXISTING) { - bdrv_img_create(new_image_file, format, - states->old_bs->filename, - states->old_bs->drv->format_name, - NULL, -1, flags, &local_err, false); - if (error_is_set(&local_err)) { - error_propagate(errp, local_err); - goto delete_and_fail; - } - } - - /* We will manually add the backing_hd field to the bs later */ - states->new_bs = bdrv_new(""); - /* TODO Inherit bs->options or only take explicit options with an - * extended QMP command? */ - ret = bdrv_open(states->new_bs, new_image_file, NULL, - flags | BDRV_O_NO_BACKING, drv); - if (ret != 0) { - error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); + if (states->ops->commit(states->action, &states->opaque, errp)) { goto delete_and_fail; } } - - /* Now we are going to do the actual pivot. Everything up to this point - * is reversible, but we are committed at this point */ - QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { - /* This removes our old bs from the bdrv_states, and adds the new bs */ - bdrv_append(states->new_bs, states->old_bs); - /* We don't need (or want) to use the transactional - * bdrv_reopen_multiple() across all the entries at once, because we - * don't want to abort all of them if one of them fails the reopen */ - bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR, - NULL); - } - /* success */ goto exit; delete_and_fail: - /* - * failure, and it is all-or-none; abandon each new bs, and keep using - * the original bs for all images - */ QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { - if (states->new_bs) { - bdrv_delete(states->new_bs); - } + states->ops->rollback(states->action, states->opaque); } + exit: QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) { + states->ops->clean(states->action, states->opaque); g_free(states); } }
Now code for external snapshot are packaged as one case in qmp_transaction, so later other operation could be added. The logic in qmp_transaction is changed a bit: Original code tries to create all images first and then update all *bdrv once together, new code create and update one *bdrv one time, and use bdrv_deappend() to rollback on fail. This allows mixing different kind of requests in qmp_transaction() later. Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com> --- blockdev.c | 250 +++++++++++++++++++++++++++++++++++++----------------------- 1 files changed, 153 insertions(+), 97 deletions(-)