Message ID | 1372386525-23155-2-git-send-email-imain@redhat.com |
---|---|
State | New |
Headers | show |
Il 28/06/2013 04:28, Ian Main ha scritto: > This patch adds sync-modes to the drive-backup interface and > implements the FULL, NONE and TOP modes of synchronization. > > FULL performs as before copying the entire contents of the drive > while preserving the point-in-time using CoW. > NONE only copies new writes to the target drive. > TOP copies changes to the topmost drive image and preserves the > point-in-time using CoW. > > Signed-off-by: Ian Main <imain@redhat.com> > --- > block/backup.c | 90 +++++++++++++++++++++++++++++++---------------- > blockdev.c | 25 ++++++++----- > include/block/block_int.h | 4 ++- > qapi-schema.json | 4 +++ > qmp-commands.hx | 1 + > 5 files changed, 85 insertions(+), 39 deletions(-) > > diff --git a/block/backup.c b/block/backup.c > index 16105d4..9bad85f 100644 > --- a/block/backup.c > +++ b/block/backup.c > @@ -37,6 +37,7 @@ typedef struct CowRequest { > typedef struct BackupBlockJob { > BlockJob common; > BlockDriverState *target; > + MirrorSyncMode sync_mode; > RateLimit limit; > BlockdevOnError on_source_error; > BlockdevOnError on_target_error; > @@ -247,40 +248,68 @@ static void coroutine_fn backup_run(void *opaque) > > bdrv_add_before_write_notifier(bs, &before_write); > > - for (; start < end; start++) { > - bool error_is_read; > - > - if (block_job_is_cancelled(&job->common)) { > - break; > + if (job->sync_mode == MIRROR_SYNC_MODE_NONE) { > + while (!block_job_is_cancelled(&job->common)) { > + /* Sleep for 100ms (SLICE_TIME). Our only goal here is to > + * catch when the job is cancelled. Otherwise we just let > + * our before_write notify callback service CoW requests. */ > + block_job_sleep_ns(&job->common, rt_clock, SLICE_TIME); I think there is no need to poll here. You can just do qemu_coroutine_yield(). > } > + } else { > + /* Both FULL and TOP SYNC_MODE's require copying.. */ > + for (; start < end; start++) { > + bool error_is_read; > > - /* we need to yield so that qemu_aio_flush() returns. > - * (without, VM does not reboot) > - */ > - if (job->common.speed) { > - uint64_t delay_ns = ratelimit_calculate_delay( > - &job->limit, job->sectors_read); > - job->sectors_read = 0; > - block_job_sleep_ns(&job->common, rt_clock, delay_ns); > - } else { > - block_job_sleep_ns(&job->common, rt_clock, 0); > - } > + if (block_job_is_cancelled(&job->common)) { > + break; > + } > > - if (block_job_is_cancelled(&job->common)) { > - break; > - } > + /* we need to yield so that qemu_aio_flush() returns. > + * (without, VM does not reboot) > + */ > + if (job->common.speed) { > + uint64_t delay_ns = ratelimit_calculate_delay( > + &job->limit, job->sectors_read); > + job->sectors_read = 0; > + block_job_sleep_ns(&job->common, rt_clock, delay_ns); > + } else { > + block_job_sleep_ns(&job->common, rt_clock, 0); > + } > > - ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, > - BACKUP_SECTORS_PER_CLUSTER, &error_is_read); > - if (ret < 0) { > - /* Depending on error action, fail now or retry cluster */ > - BlockErrorAction action = > - backup_error_action(job, error_is_read, -ret); > - if (action == BDRV_ACTION_REPORT) { > + if (block_job_is_cancelled(&job->common)) { > break; > - } else { > - start--; > - continue; > + } > + > + if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { > + int n, alloced; > + > + /* Check to see if these blocks are already in the backing file. */ > + > + alloced = > + bdrv_co_is_allocated_above(bs, bs->backing_hd, > + start * BACKUP_SECTORS_PER_CLUSTER, > + BACKUP_SECTORS_PER_CLUSTER, &n); You can just use bdrv_co_is_allocated, instead of bdrv_co_is_allocated-above. > + /* The above call returns true if the given sector is in the > + * topmost image. If that is the case then we must copy it as > + * it has been modified from the original backing file. > + * Otherwise we skip it. */ > + if (alloced == 0) { > + continue; > + } > + } > + /* FULL sync mode we copy the whole drive. */ > + ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, > + BACKUP_SECTORS_PER_CLUSTER, &error_is_read); > + if (ret < 0) { > + /* Depending on error action, fail now or retry cluster */ > + BlockErrorAction action = > + backup_error_action(job, error_is_read, -ret); > + if (action == BDRV_ACTION_REPORT) { > + break; > + } else { > + start--; > + continue; > + } > } > } > } > @@ -300,7 +329,7 @@ static void coroutine_fn backup_run(void *opaque) > } > > void backup_start(BlockDriverState *bs, BlockDriverState *target, > - int64_t speed, > + int64_t speed, MirrorSyncMode sync_mode, > BlockdevOnError on_source_error, > BlockdevOnError on_target_error, > BlockDriverCompletionFunc *cb, void *opaque, > @@ -335,6 +364,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, > job->on_source_error = on_source_error; > job->on_target_error = on_target_error; > job->target = target; > + job->sync_mode = sync_mode; > job->common.len = len; > job->common.co = qemu_coroutine_create(backup_run); > qemu_coroutine_enter(job->common.co, job); > diff --git a/blockdev.c b/blockdev.c > index c5abd65..000dea6 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -1430,17 +1430,13 @@ void qmp_drive_backup(const char *device, const char *target, > Error **errp) > { > BlockDriverState *bs; > - BlockDriverState *target_bs; > + BlockDriverState *target_bs, *source; > BlockDriver *drv = NULL; > Error *local_err = NULL; > int flags; > int64_t size; > int ret; > > - if (sync != MIRROR_SYNC_MODE_FULL) { > - error_setg(errp, "only sync mode 'full' is currently supported"); > - return; > - } > if (!has_speed) { > speed = 0; > } > @@ -1483,6 +1479,13 @@ void qmp_drive_backup(const char *device, const char *target, > > flags = bs->open_flags | BDRV_O_RDWR; > > + /* See if we have a backing HD we can use to create our new image > + * on top of. */ > + source = bs->backing_hd; Should the source be "bs" for MIRROR_SYNC_MODE_NONE? Also in this case you may want to default the format to "qcow2" instead of bs's format. Paolo > + if (!source && sync == MIRROR_SYNC_MODE_TOP) { > + sync = MIRROR_SYNC_MODE_FULL; > + } > + > size = bdrv_getlength(bs); > if (size < 0) { > error_setg_errno(errp, -size, "bdrv_getlength failed"); > @@ -1491,8 +1494,14 @@ void qmp_drive_backup(const char *device, const char *target, > > if (mode != NEW_IMAGE_MODE_EXISTING) { > assert(format && drv); > - bdrv_img_create(target, format, > - NULL, NULL, NULL, size, flags, &local_err, false); > + if (sync == MIRROR_SYNC_MODE_TOP) { > + bdrv_img_create(target, format, source->filename, > + source->drv->format_name, NULL, > + size, flags, &local_err, false); > + } else { > + bdrv_img_create(target, format, NULL, NULL, NULL, > + size, flags, &local_err, false); > + } > } > > if (error_is_set(&local_err)) { > @@ -1508,7 +1517,7 @@ void qmp_drive_backup(const char *device, const char *target, > return; > } > > - backup_start(bs, target_bs, speed, on_source_error, on_target_error, > + backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, > block_job_cb, bs, &local_err); > if (local_err != NULL) { > bdrv_delete(target_bs); > diff --git a/include/block/block_int.h b/include/block/block_int.h > index c6ac871..e45f2a0 100644 > --- a/include/block/block_int.h > +++ b/include/block/block_int.h > @@ -404,6 +404,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, > * @bs: Block device to operate on. > * @target: Block device to write to. > * @speed: The maximum speed, in bytes per second, or 0 for unlimited. > + * @sync_mode: What parts of the disk image should be copied to the destination. > * @on_source_error: The action to take upon error reading from the source. > * @on_target_error: The action to take upon error writing to the target. > * @cb: Completion function for the job. > @@ -413,7 +414,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, > * until the job is cancelled or manually completed. > */ > void backup_start(BlockDriverState *bs, BlockDriverState *target, > - int64_t speed, BlockdevOnError on_source_error, > + int64_t speed, MirrorSyncMode sync_mode, > + BlockdevOnError on_source_error, > BlockdevOnError on_target_error, > BlockDriverCompletionFunc *cb, void *opaque, > Error **errp); > diff --git a/qapi-schema.json b/qapi-schema.json > index cdd2d6b..b3f6c2a 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -1807,6 +1807,10 @@ > # @format: #optional the format of the new destination, default is to > # probe if @mode is 'existing', else the format of the source > # > +# @sync: what parts of the disk image should be copied to the destination > +# (all the disk, only the sectors allocated in the topmost image, or > +# only new I/O). > +# > # @mode: #optional whether and how QEMU should create a new image, default is > # 'absolute-paths'. > # > diff --git a/qmp-commands.hx b/qmp-commands.hx > index e075df4..f3f6b3d 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -957,6 +957,7 @@ Arguments: > > Example: > -> { "execute": "drive-backup", "arguments": { "device": "drive0", > + "sync": "full", > "target": "backup.img" } } > <- { "return": {} } > EQMP >
On Mon, Jul 01, 2013 at 02:16:12PM +0200, Paolo Bonzini wrote: > Il 28/06/2013 04:28, Ian Main ha scritto: > > This patch adds sync-modes to the drive-backup interface and > > implements the FULL, NONE and TOP modes of synchronization. > > > > FULL performs as before copying the entire contents of the drive > > while preserving the point-in-time using CoW. > > NONE only copies new writes to the target drive. > > TOP copies changes to the topmost drive image and preserves the > > point-in-time using CoW. > > > > Signed-off-by: Ian Main <imain@redhat.com> > > --- > > block/backup.c | 90 +++++++++++++++++++++++++++++++---------------- > > blockdev.c | 25 ++++++++----- > > include/block/block_int.h | 4 ++- > > qapi-schema.json | 4 +++ > > qmp-commands.hx | 1 + > > 5 files changed, 85 insertions(+), 39 deletions(-) > > > > diff --git a/block/backup.c b/block/backup.c > > index 16105d4..9bad85f 100644 > > --- a/block/backup.c > > +++ b/block/backup.c > > @@ -37,6 +37,7 @@ typedef struct CowRequest { > > typedef struct BackupBlockJob { > > BlockJob common; > > BlockDriverState *target; > > + MirrorSyncMode sync_mode; > > RateLimit limit; > > BlockdevOnError on_source_error; > > BlockdevOnError on_target_error; > > @@ -247,40 +248,68 @@ static void coroutine_fn backup_run(void *opaque) > > > > bdrv_add_before_write_notifier(bs, &before_write); > > > > - for (; start < end; start++) { > > - bool error_is_read; > > - > > - if (block_job_is_cancelled(&job->common)) { > > - break; > > + if (job->sync_mode == MIRROR_SYNC_MODE_NONE) { > > + while (!block_job_is_cancelled(&job->common)) { > > + /* Sleep for 100ms (SLICE_TIME). Our only goal here is to > > + * catch when the job is cancelled. Otherwise we just let > > + * our before_write notify callback service CoW requests. */ > > + block_job_sleep_ns(&job->common, rt_clock, SLICE_TIME); > > I think there is no need to poll here. You can just do > qemu_coroutine_yield(). Yes, I got this working. Much better solution, thanks. > > } > > + } else { > > + /* Both FULL and TOP SYNC_MODE's require copying.. */ > > + for (; start < end; start++) { > > + bool error_is_read; > > > > - /* we need to yield so that qemu_aio_flush() returns. > > - * (without, VM does not reboot) > > - */ > > - if (job->common.speed) { > > - uint64_t delay_ns = ratelimit_calculate_delay( > > - &job->limit, job->sectors_read); > > - job->sectors_read = 0; > > - block_job_sleep_ns(&job->common, rt_clock, delay_ns); > > - } else { > > - block_job_sleep_ns(&job->common, rt_clock, 0); > > - } > > + if (block_job_is_cancelled(&job->common)) { > > + break; > > + } > > > > - if (block_job_is_cancelled(&job->common)) { > > - break; > > - } > > + /* we need to yield so that qemu_aio_flush() returns. > > + * (without, VM does not reboot) > > + */ > > + if (job->common.speed) { > > + uint64_t delay_ns = ratelimit_calculate_delay( > > + &job->limit, job->sectors_read); > > + job->sectors_read = 0; > > + block_job_sleep_ns(&job->common, rt_clock, delay_ns); > > + } else { > > + block_job_sleep_ns(&job->common, rt_clock, 0); > > + } > > > > - ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, > > - BACKUP_SECTORS_PER_CLUSTER, &error_is_read); > > - if (ret < 0) { > > - /* Depending on error action, fail now or retry cluster */ > > - BlockErrorAction action = > > - backup_error_action(job, error_is_read, -ret); > > - if (action == BDRV_ACTION_REPORT) { > > + if (block_job_is_cancelled(&job->common)) { > > break; > > - } else { > > - start--; > > - continue; > > + } > > + > > + if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { > > + int n, alloced; > > + > > + /* Check to see if these blocks are already in the backing file. */ > > + > > + alloced = > > + bdrv_co_is_allocated_above(bs, bs->backing_hd, > > + start * BACKUP_SECTORS_PER_CLUSTER, > > + BACKUP_SECTORS_PER_CLUSTER, &n); > > You can just use bdrv_co_is_allocated, instead of > bdrv_co_is_allocated-above. Excellent. > > + /* The above call returns true if the given sector is in the > > + * topmost image. If that is the case then we must copy it as > > + * it has been modified from the original backing file. > > + * Otherwise we skip it. */ > > + if (alloced == 0) { > > + continue; > > + } > > + } > > + /* FULL sync mode we copy the whole drive. */ > > + ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, > > + BACKUP_SECTORS_PER_CLUSTER, &error_is_read); > > + if (ret < 0) { > > + /* Depending on error action, fail now or retry cluster */ > > + BlockErrorAction action = > > + backup_error_action(job, error_is_read, -ret); > > + if (action == BDRV_ACTION_REPORT) { > > + break; > > + } else { > > + start--; > > + continue; > > + } > > } > > } > > } > > @@ -300,7 +329,7 @@ static void coroutine_fn backup_run(void *opaque) > > } > > > > void backup_start(BlockDriverState *bs, BlockDriverState *target, > > - int64_t speed, > > + int64_t speed, MirrorSyncMode sync_mode, > > BlockdevOnError on_source_error, > > BlockdevOnError on_target_error, > > BlockDriverCompletionFunc *cb, void *opaque, > > @@ -335,6 +364,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, > > job->on_source_error = on_source_error; > > job->on_target_error = on_target_error; > > job->target = target; > > + job->sync_mode = sync_mode; > > job->common.len = len; > > job->common.co = qemu_coroutine_create(backup_run); > > qemu_coroutine_enter(job->common.co, job); > > diff --git a/blockdev.c b/blockdev.c > > index c5abd65..000dea6 100644 > > --- a/blockdev.c > > +++ b/blockdev.c > > @@ -1430,17 +1430,13 @@ void qmp_drive_backup(const char *device, const char *target, > > Error **errp) > > { > > BlockDriverState *bs; > > - BlockDriverState *target_bs; > > + BlockDriverState *target_bs, *source; > > BlockDriver *drv = NULL; > > Error *local_err = NULL; > > int flags; > > int64_t size; > > int ret; > > > > - if (sync != MIRROR_SYNC_MODE_FULL) { > > - error_setg(errp, "only sync mode 'full' is currently supported"); > > - return; > > - } > > if (!has_speed) { > > speed = 0; > > } > > @@ -1483,6 +1479,13 @@ void qmp_drive_backup(const char *device, const char *target, > > > > flags = bs->open_flags | BDRV_O_RDWR; > > > > + /* See if we have a backing HD we can use to create our new image > > + * on top of. */ > > + source = bs->backing_hd; > > Should the source be "bs" for MIRROR_SYNC_MODE_NONE? Also in this case > you may want to default the format to "qcow2" instead of bs's format. I'm not sure that it matters what the source is for NONE. Since we are copying all new writes, whether they would go to a top-most layer or not shouldn't matter? As for qcow2 format, there is a 'format' option to the drive-backup API which specifies the format. I guess we could set the default to qcow2 instead of the source format? Anyone have any opinions on that? I have made the other changes above done. If I don't hear on this issue soon I'll post another revision. Thanks for the review! Ian > Paolo > > > + if (!source && sync == MIRROR_SYNC_MODE_TOP) { > > + sync = MIRROR_SYNC_MODE_FULL; > > + } > > + > > size = bdrv_getlength(bs); > > if (size < 0) { > > error_setg_errno(errp, -size, "bdrv_getlength failed"); > > @@ -1491,8 +1494,14 @@ void qmp_drive_backup(const char *device, const char *target, > > > > if (mode != NEW_IMAGE_MODE_EXISTING) { > > assert(format && drv); > > - bdrv_img_create(target, format, > > - NULL, NULL, NULL, size, flags, &local_err, false); > > + if (sync == MIRROR_SYNC_MODE_TOP) { > > + bdrv_img_create(target, format, source->filename, > > + source->drv->format_name, NULL, > > + size, flags, &local_err, false); > > + } else { > > + bdrv_img_create(target, format, NULL, NULL, NULL, > > + size, flags, &local_err, false); > > + } > > } > > > > if (error_is_set(&local_err)) { > > @@ -1508,7 +1517,7 @@ void qmp_drive_backup(const char *device, const char *target, > > return; > > } > > > > - backup_start(bs, target_bs, speed, on_source_error, on_target_error, > > + backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, > > block_job_cb, bs, &local_err); > > if (local_err != NULL) { > > bdrv_delete(target_bs); > > diff --git a/include/block/block_int.h b/include/block/block_int.h > > index c6ac871..e45f2a0 100644 > > --- a/include/block/block_int.h > > +++ b/include/block/block_int.h > > @@ -404,6 +404,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, > > * @bs: Block device to operate on. > > * @target: Block device to write to. > > * @speed: The maximum speed, in bytes per second, or 0 for unlimited. > > + * @sync_mode: What parts of the disk image should be copied to the destination. > > * @on_source_error: The action to take upon error reading from the source. > > * @on_target_error: The action to take upon error writing to the target. > > * @cb: Completion function for the job. > > @@ -413,7 +414,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, > > * until the job is cancelled or manually completed. > > */ > > void backup_start(BlockDriverState *bs, BlockDriverState *target, > > - int64_t speed, BlockdevOnError on_source_error, > > + int64_t speed, MirrorSyncMode sync_mode, > > + BlockdevOnError on_source_error, > > BlockdevOnError on_target_error, > > BlockDriverCompletionFunc *cb, void *opaque, > > Error **errp); > > diff --git a/qapi-schema.json b/qapi-schema.json > > index cdd2d6b..b3f6c2a 100644 > > --- a/qapi-schema.json > > +++ b/qapi-schema.json > > @@ -1807,6 +1807,10 @@ > > # @format: #optional the format of the new destination, default is to > > # probe if @mode is 'existing', else the format of the source > > # > > +# @sync: what parts of the disk image should be copied to the destination > > +# (all the disk, only the sectors allocated in the topmost image, or > > +# only new I/O). > > +# > > # @mode: #optional whether and how QEMU should create a new image, default is > > # 'absolute-paths'. > > # > > diff --git a/qmp-commands.hx b/qmp-commands.hx > > index e075df4..f3f6b3d 100644 > > --- a/qmp-commands.hx > > +++ b/qmp-commands.hx > > @@ -957,6 +957,7 @@ Arguments: > > > > Example: > > -> { "execute": "drive-backup", "arguments": { "device": "drive0", > > + "sync": "full", > > "target": "backup.img" } } > > <- { "return": {} } > > EQMP > > >
Il 03/07/2013 20:14, Ian Main ha scritto: >> > >> > Should the source be "bs" for MIRROR_SYNC_MODE_NONE? Also in this case >> > you may want to default the format to "qcow2" instead of bs's format. > I'm not sure that it matters what the source is for NONE. Since we are > copying all new writes, whether they would go to a top-most layer or not > shouldn't matter? It would matter for reads of still-uncopied data, though. You have to read from the topmost layer, not the one below. > As for qcow2 format, there is a 'format' option to the drive-backup API > which specifies the format. I guess we could set the default to qcow2 > instead of the source format? Anyone have any opinions on that? That would be another possibility. Perhaps use qcow2 for top or none, and the source format for full. > I have made the other changes above done. If I don't hear on this issue > soon I'll post another revision. You can go ahead and post anyway (just remember to fix the backing file issue), it is a simple patch on top of what you have. Paolo
On Mon, 07/01 14:16, Paolo Bonzini wrote: > Il 28/06/2013 04:28, Ian Main ha scritto: > > diff --git a/blockdev.c b/blockdev.c > > index c5abd65..000dea6 100644 > > --- a/blockdev.c > > +++ b/blockdev.c > > @@ -1430,17 +1430,13 @@ void qmp_drive_backup(const char *device, const char *target, > > Error **errp) > > { > > BlockDriverState *bs; > > - BlockDriverState *target_bs; > > + BlockDriverState *target_bs, *source; > > BlockDriver *drv = NULL; > > Error *local_err = NULL; > > int flags; > > int64_t size; > > int ret; > > > > - if (sync != MIRROR_SYNC_MODE_FULL) { > > - error_setg(errp, "only sync mode 'full' is currently supported"); > > - return; > > - } > > if (!has_speed) { > > speed = 0; > > } > > @@ -1483,6 +1479,13 @@ void qmp_drive_backup(const char *device, const char *target, > > > > flags = bs->open_flags | BDRV_O_RDWR; > > > > + /* See if we have a backing HD we can use to create our new image > > + * on top of. */ > > + source = bs->backing_hd; > > Should the source be "bs" for MIRROR_SYNC_MODE_NONE? Also in this case > you may want to default the format to "qcow2" instead of bs's format. > > Paolo > Maybe not. "source" only affects when sync=top below here. For reading the uncopied for target from source, we can't simply assign it to "bs" for sync=none and create the new image with with bs->filename as backing file, because that way we don't know how to open it with what we have now: the backing file is already opened RW (as "bs"). Fam > > + if (!source && sync == MIRROR_SYNC_MODE_TOP) { > > + sync = MIRROR_SYNC_MODE_FULL; > > + } > > + > > size = bdrv_getlength(bs); > > if (size < 0) { > > error_setg_errno(errp, -size, "bdrv_getlength failed"); > > @@ -1491,8 +1494,14 @@ void qmp_drive_backup(const char *device, const char *target, > > > > if (mode != NEW_IMAGE_MODE_EXISTING) { > > assert(format && drv); > > - bdrv_img_create(target, format, > > - NULL, NULL, NULL, size, flags, &local_err, false); > > + if (sync == MIRROR_SYNC_MODE_TOP) { > > + bdrv_img_create(target, format, source->filename, > > + source->drv->format_name, NULL, > > + size, flags, &local_err, false); > > + } else { > > + bdrv_img_create(target, format, NULL, NULL, NULL, > > + size, flags, &local_err, false); > > + } > > } > > > > if (error_is_set(&local_err)) { > > @@ -1508,7 +1517,7 @@ void qmp_drive_backup(const char *device, const char *target, > > return; > > } > > > > - backup_start(bs, target_bs, speed, on_source_error, on_target_error, > > + backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, > > block_job_cb, bs, &local_err); > > if (local_err != NULL) { > > bdrv_delete(target_bs); > > diff --git a/include/block/block_int.h b/include/block/block_int.h > > index c6ac871..e45f2a0 100644 > > --- a/include/block/block_int.h > > +++ b/include/block/block_int.h > > @@ -404,6 +404,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, > > * @bs: Block device to operate on. > > * @target: Block device to write to. > > * @speed: The maximum speed, in bytes per second, or 0 for unlimited. > > + * @sync_mode: What parts of the disk image should be copied to the destination. > > * @on_source_error: The action to take upon error reading from the source. > > * @on_target_error: The action to take upon error writing to the target. > > * @cb: Completion function for the job. > > @@ -413,7 +414,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, > > * until the job is cancelled or manually completed. > > */ > > void backup_start(BlockDriverState *bs, BlockDriverState *target, > > - int64_t speed, BlockdevOnError on_source_error, > > + int64_t speed, MirrorSyncMode sync_mode, > > + BlockdevOnError on_source_error, > > BlockdevOnError on_target_error, > > BlockDriverCompletionFunc *cb, void *opaque, > > Error **errp); > > diff --git a/qapi-schema.json b/qapi-schema.json > > index cdd2d6b..b3f6c2a 100644 > > --- a/qapi-schema.json > > +++ b/qapi-schema.json > > @@ -1807,6 +1807,10 @@ > > # @format: #optional the format of the new destination, default is to > > # probe if @mode is 'existing', else the format of the source > > # > > +# @sync: what parts of the disk image should be copied to the destination > > +# (all the disk, only the sectors allocated in the topmost image, or > > +# only new I/O). > > +# > > # @mode: #optional whether and how QEMU should create a new image, default is > > # 'absolute-paths'. > > # > > diff --git a/qmp-commands.hx b/qmp-commands.hx > > index e075df4..f3f6b3d 100644 > > --- a/qmp-commands.hx > > +++ b/qmp-commands.hx > > @@ -957,6 +957,7 @@ Arguments: > > > > Example: > > -> { "execute": "drive-backup", "arguments": { "device": "drive0", > > + "sync": "full", > > "target": "backup.img" } } > > <- { "return": {} } > > EQMP > > > >
Il 08/07/2013 11:21, Fam Zheng ha scritto: > > Should the source be "bs" for MIRROR_SYNC_MODE_NONE? Also in this case > > you may want to default the format to "qcow2" instead of bs's format. > > Maybe not. "source" only affects when sync=top below here. For reading > the uncopied for target from source, we can't simply assign it to "bs" > for sync=none and create the new image with with bs->filename as backing > file, because that way we don't know how to open it with what we have > now: the backing file is already opened RW (as "bs"). But if we do not set the source, how can you read at all from the temporary backup destination? If I read the code correctly, target_bs should be opened with BDRV_O_NO_BACKING, and then you should set target_bs->backing_hd = bs. Which in turn would only work for a format that supports backing files (such as qcow2). I'm not sure how the "none" mode works with these patches. It's quite possible I'm wrong, of course, but then the explanation should be in the code or the commit message. It should be in the code or the commit message even if my suggestions are applied. :) Paolo
On Mon, Jul 15, 2013 at 12:50:39PM +0200, Paolo Bonzini wrote: > Il 08/07/2013 11:21, Fam Zheng ha scritto: > > > Should the source be "bs" for MIRROR_SYNC_MODE_NONE? Also in this case > > > you may want to default the format to "qcow2" instead of bs's format. > > > > Maybe not. "source" only affects when sync=top below here. For reading > > the uncopied for target from source, we can't simply assign it to "bs" > > for sync=none and create the new image with with bs->filename as backing > > file, because that way we don't know how to open it with what we have > > now: the backing file is already opened RW (as "bs"). > > But if we do not set the source, how can you read at all from the > temporary backup destination? > > If I read the code correctly, target_bs should be opened with > BDRV_O_NO_BACKING, and then you should set target_bs->backing_hd = bs. > Which in turn would only work for a format that supports backing files > (such as qcow2). OK well, I'll explain here my understanding. I apologize if I explain more than needed but it might be good to get this out there anyway. When we do the create with: bdrv_img_create(target, format, source->filename, source->drv->format_name, NULL, size, flags, &local_err, false); We are creating a new target image using the same backing file as the original disk image. Then any new data that has been laid on top of it since creation is copied in the main backup_run() loop. There is an extra check in the 'TOP' case so that we don't bother to copy all the data of the backing file as it already exists in the target. This is where the bdrv_co_is_allocated() is used to determine if the data exists in the topmost layer or below. Also any new data being written is intercepted via the write_notifier hook which ends up calling backup_do_cow() to copy old data out before it gets overwritten. > I'm not sure how the "none" mode works with these patches. It's quite > possible I'm wrong, of course, but then the explanation should be in the > code or the commit message. It should be in the code or the commit > message even if my suggestions are applied. :) For mode 'NONE' we create the new target image and only copy in the original data from the disk image starting from the time the call was made. This preserves the point in time data by only copying the parts that are *going to change* to the target image. This way we can reconstruct the final image by checking to see if the given block exists in the new target image first, and if it does not, you can get it from the original image. This is basically an optimization allowing you to do point-in-time snapshots with low overhead vs the 'FULL' version. Since there is no old data to copy out the loop in backup_run() for the NONE case just calls qemu_coroutine_yield() which only wakes up after an event (usually cancel in this case). The rest is handled by the before_write notifier which again calls backup_do_cow() to write out the old data so it can be preserved. Does that help? Ian
Il 15/07/2013 19:49, Ian Main ha scritto: > OK well, I'll explain here my understanding. I apologize if I explain > more than needed but it might be good to get this out there anyway. No problem, it's better to be verbose than to have an extra iteration. > When we do the create with: > > bdrv_img_create(target, format, source->filename, > source->drv->format_name, NULL, > size, flags, &local_err, false); > > We are creating a new target image using the same backing file as the > original disk image. Then any new data that has been laid on top of it > since creation is copied in the main backup_run() loop. There is an > extra check in the 'TOP' case so that we don't bother to copy all the > data of the backing file as it already exists in the target. This is > where the bdrv_co_is_allocated() is used to determine if the data exists > in the topmost layer or below. Yes, so far I understood it. >> I'm not sure how the "none" mode works with these patches. It's quite >> possible I'm wrong, of course, but then the explanation should be in the >> code or the commit message. It should be in the code or the commit >> message even if my suggestions are applied. :) > > For mode 'NONE' we create the new target image and only copy in the > original data from the disk image starting from the time the call was > made. This preserves the point in time data by only copying the parts > that are *going to change* to the target image. This way we can > reconstruct the final image by checking to see if the given block exists > in the new target image first, and if it does not, you can get it from > the original image. This is basically an optimization allowing you to > do point-in-time snapshots with low overhead vs the 'FULL' version. > > Since there is no old data to copy out the loop in backup_run() for the > NONE case just calls qemu_coroutine_yield() which only wakes up after an > event (usually cancel in this case). The rest is handled by the > before_write notifier which again calls backup_do_cow() to write out the > old data so it can be preserved. > > Does that help? Yes, it helps---I think I'm right. :) For mode 'NONE' we only copy in the original data from the disk image, but what makes us read the old image for the sectors we haven't copied yet? For that to work, we need to set target_bs->backing_hd. Now, the question is whether to do it for mode 'NONE' only, or also for the others. It is certainly required for mode 'NONE', because this mode will never get complete data in the destination. But it may actually be a good idea for other sync modes. This is because block-backup is a sort of live snapshot, except that the active image remains the pre-snapshot one. Thus it makes sense to have the same defaults as blockdev-snapshot-live, including making "qcow2" the default format. A user that wants to use an NBD client target for backup (this is different from fleecing, which uses a temporary file + an NBD server) should specify a "raw" format anyway, independent of the default we choose, so this change doesn't affect other use cases. On top of this, target_bs->backing_hd needs to be set manually to bs itself during the copy (*even if the source used in bdrv_img_create is bs->backing_hd, or is NULL!*), and reset just before closing target_bs. This way, if the target is exposed via the NBD server, it reads as a full copy of the image even while the backup is ongoing. This can be done using BDRV_O_NO_BACKING. Paolo
On Mon, Jul 15, 2013 at 10:47:25PM +0200, Paolo Bonzini wrote: > Il 15/07/2013 19:49, Ian Main ha scritto: > > OK well, I'll explain here my understanding. I apologize if I explain > > more than needed but it might be good to get this out there anyway. > > No problem, it's better to be verbose than to have an extra iteration. > > > When we do the create with: > > > > bdrv_img_create(target, format, source->filename, > > source->drv->format_name, NULL, > > size, flags, &local_err, false); > > > > We are creating a new target image using the same backing file as the > > original disk image. Then any new data that has been laid on top of it > > since creation is copied in the main backup_run() loop. There is an > > extra check in the 'TOP' case so that we don't bother to copy all the > > data of the backing file as it already exists in the target. This is > > where the bdrv_co_is_allocated() is used to determine if the data exists > > in the topmost layer or below. > > Yes, so far I understood it. > > >> I'm not sure how the "none" mode works with these patches. It's quite > >> possible I'm wrong, of course, but then the explanation should be in the > >> code or the commit message. It should be in the code or the commit > >> message even if my suggestions are applied. :) > > > > For mode 'NONE' we create the new target image and only copy in the > > original data from the disk image starting from the time the call was > > made. This preserves the point in time data by only copying the parts > > that are *going to change* to the target image. This way we can > > reconstruct the final image by checking to see if the given block exists > > in the new target image first, and if it does not, you can get it from > > the original image. This is basically an optimization allowing you to > > do point-in-time snapshots with low overhead vs the 'FULL' version. > > > > Since there is no old data to copy out the loop in backup_run() for the > > NONE case just calls qemu_coroutine_yield() which only wakes up after an > > event (usually cancel in this case). The rest is handled by the > > before_write notifier which again calls backup_do_cow() to write out the > > old data so it can be preserved. > > > > Does that help? > > Yes, it helps---I think I'm right. :) > > For mode 'NONE' we only copy in the original data from the disk image, > but what makes us read the old image for the sectors we haven't copied > yet? For that to work, we need to set target_bs->backing_hd. > > Now, the question is whether to do it for mode 'NONE' only, or also for > the others. It is certainly required for mode 'NONE', because this mode > will never get complete data in the destination. But it may actually be > a good idea for other sync modes. This is because block-backup is a > sort of live snapshot, except that the active image remains the > pre-snapshot one. Thus it makes sense to have the same defaults as > blockdev-snapshot-live, including making "qcow2" the default format. > > A user that wants to use an NBD client target for backup (this is > different from fleecing, which uses a temporary file + an NBD server) > should specify a "raw" format anyway, independent of the default we > choose, so this change doesn't affect other use cases. > > On top of this, target_bs->backing_hd needs to be set manually to bs > itself during the copy (*even if the source used in bdrv_img_create is > bs->backing_hd, or is NULL!*), and reset just before closing target_bs. > This way, if the target is exposed via the NBD server, it reads as a > full copy of the image even while the backup is ongoing. This can be > done using BDRV_O_NO_BACKING. > > Paolo Yes I see what you are saying now. I really like this idea. I will make up a new patch and include a separate patch to default to qcow2 as well. Ian
diff --git a/block/backup.c b/block/backup.c index 16105d4..9bad85f 100644 --- a/block/backup.c +++ b/block/backup.c @@ -37,6 +37,7 @@ typedef struct CowRequest { typedef struct BackupBlockJob { BlockJob common; BlockDriverState *target; + MirrorSyncMode sync_mode; RateLimit limit; BlockdevOnError on_source_error; BlockdevOnError on_target_error; @@ -247,40 +248,68 @@ static void coroutine_fn backup_run(void *opaque) bdrv_add_before_write_notifier(bs, &before_write); - for (; start < end; start++) { - bool error_is_read; - - if (block_job_is_cancelled(&job->common)) { - break; + if (job->sync_mode == MIRROR_SYNC_MODE_NONE) { + while (!block_job_is_cancelled(&job->common)) { + /* Sleep for 100ms (SLICE_TIME). Our only goal here is to + * catch when the job is cancelled. Otherwise we just let + * our before_write notify callback service CoW requests. */ + block_job_sleep_ns(&job->common, rt_clock, SLICE_TIME); } + } else { + /* Both FULL and TOP SYNC_MODE's require copying.. */ + for (; start < end; start++) { + bool error_is_read; - /* we need to yield so that qemu_aio_flush() returns. - * (without, VM does not reboot) - */ - if (job->common.speed) { - uint64_t delay_ns = ratelimit_calculate_delay( - &job->limit, job->sectors_read); - job->sectors_read = 0; - block_job_sleep_ns(&job->common, rt_clock, delay_ns); - } else { - block_job_sleep_ns(&job->common, rt_clock, 0); - } + if (block_job_is_cancelled(&job->common)) { + break; + } - if (block_job_is_cancelled(&job->common)) { - break; - } + /* we need to yield so that qemu_aio_flush() returns. + * (without, VM does not reboot) + */ + if (job->common.speed) { + uint64_t delay_ns = ratelimit_calculate_delay( + &job->limit, job->sectors_read); + job->sectors_read = 0; + block_job_sleep_ns(&job->common, rt_clock, delay_ns); + } else { + block_job_sleep_ns(&job->common, rt_clock, 0); + } - ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, - BACKUP_SECTORS_PER_CLUSTER, &error_is_read); - if (ret < 0) { - /* Depending on error action, fail now or retry cluster */ - BlockErrorAction action = - backup_error_action(job, error_is_read, -ret); - if (action == BDRV_ACTION_REPORT) { + if (block_job_is_cancelled(&job->common)) { break; - } else { - start--; - continue; + } + + if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { + int n, alloced; + + /* Check to see if these blocks are already in the backing file. */ + + alloced = + bdrv_co_is_allocated_above(bs, bs->backing_hd, + start * BACKUP_SECTORS_PER_CLUSTER, + BACKUP_SECTORS_PER_CLUSTER, &n); + /* The above call returns true if the given sector is in the + * topmost image. If that is the case then we must copy it as + * it has been modified from the original backing file. + * Otherwise we skip it. */ + if (alloced == 0) { + continue; + } + } + /* FULL sync mode we copy the whole drive. */ + ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, + BACKUP_SECTORS_PER_CLUSTER, &error_is_read); + if (ret < 0) { + /* Depending on error action, fail now or retry cluster */ + BlockErrorAction action = + backup_error_action(job, error_is_read, -ret); + if (action == BDRV_ACTION_REPORT) { + break; + } else { + start--; + continue; + } } } } @@ -300,7 +329,7 @@ static void coroutine_fn backup_run(void *opaque) } void backup_start(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, + int64_t speed, MirrorSyncMode sync_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, @@ -335,6 +364,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, job->on_source_error = on_source_error; job->on_target_error = on_target_error; job->target = target; + job->sync_mode = sync_mode; job->common.len = len; job->common.co = qemu_coroutine_create(backup_run); qemu_coroutine_enter(job->common.co, job); diff --git a/blockdev.c b/blockdev.c index c5abd65..000dea6 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1430,17 +1430,13 @@ void qmp_drive_backup(const char *device, const char *target, Error **errp) { BlockDriverState *bs; - BlockDriverState *target_bs; + BlockDriverState *target_bs, *source; BlockDriver *drv = NULL; Error *local_err = NULL; int flags; int64_t size; int ret; - if (sync != MIRROR_SYNC_MODE_FULL) { - error_setg(errp, "only sync mode 'full' is currently supported"); - return; - } if (!has_speed) { speed = 0; } @@ -1483,6 +1479,13 @@ void qmp_drive_backup(const char *device, const char *target, flags = bs->open_flags | BDRV_O_RDWR; + /* See if we have a backing HD we can use to create our new image + * on top of. */ + source = bs->backing_hd; + if (!source && sync == MIRROR_SYNC_MODE_TOP) { + sync = MIRROR_SYNC_MODE_FULL; + } + size = bdrv_getlength(bs); if (size < 0) { error_setg_errno(errp, -size, "bdrv_getlength failed"); @@ -1491,8 +1494,14 @@ void qmp_drive_backup(const char *device, const char *target, if (mode != NEW_IMAGE_MODE_EXISTING) { assert(format && drv); - bdrv_img_create(target, format, - NULL, NULL, NULL, size, flags, &local_err, false); + if (sync == MIRROR_SYNC_MODE_TOP) { + bdrv_img_create(target, format, source->filename, + source->drv->format_name, NULL, + size, flags, &local_err, false); + } else { + bdrv_img_create(target, format, NULL, NULL, NULL, + size, flags, &local_err, false); + } } if (error_is_set(&local_err)) { @@ -1508,7 +1517,7 @@ void qmp_drive_backup(const char *device, const char *target, return; } - backup_start(bs, target_bs, speed, on_source_error, on_target_error, + backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, block_job_cb, bs, &local_err); if (local_err != NULL) { bdrv_delete(target_bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index c6ac871..e45f2a0 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -404,6 +404,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, * @bs: Block device to operate on. * @target: Block device to write to. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @sync_mode: What parts of the disk image should be copied to the destination. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. * @cb: Completion function for the job. @@ -413,7 +414,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, * until the job is cancelled or manually completed. */ void backup_start(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, BlockdevOnError on_source_error, + int64_t speed, MirrorSyncMode sync_mode, + BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); diff --git a/qapi-schema.json b/qapi-schema.json index cdd2d6b..b3f6c2a 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1807,6 +1807,10 @@ # @format: #optional the format of the new destination, default is to # probe if @mode is 'existing', else the format of the source # +# @sync: what parts of the disk image should be copied to the destination +# (all the disk, only the sectors allocated in the topmost image, or +# only new I/O). +# # @mode: #optional whether and how QEMU should create a new image, default is # 'absolute-paths'. # diff --git a/qmp-commands.hx b/qmp-commands.hx index e075df4..f3f6b3d 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -957,6 +957,7 @@ Arguments: Example: -> { "execute": "drive-backup", "arguments": { "device": "drive0", + "sync": "full", "target": "backup.img" } } <- { "return": {} } EQMP
This patch adds sync-modes to the drive-backup interface and implements the FULL, NONE and TOP modes of synchronization. FULL performs as before copying the entire contents of the drive while preserving the point-in-time using CoW. NONE only copies new writes to the target drive. TOP copies changes to the topmost drive image and preserves the point-in-time using CoW. Signed-off-by: Ian Main <imain@redhat.com> --- block/backup.c | 90 +++++++++++++++++++++++++++++++---------------- blockdev.c | 25 ++++++++----- include/block/block_int.h | 4 ++- qapi-schema.json | 4 +++ qmp-commands.hx | 1 + 5 files changed, 85 insertions(+), 39 deletions(-)