Message ID | 5ab9de38e11e20ff18958669c8a2308b1f7a778d.1380141614.git.jcody@redhat.com |
---|---|
State | New |
Headers | show |
On Wed, Sep 25, 2013 at 05:02:56PM -0400, Jeff Cody wrote: > @@ -1070,7 +1070,43 @@ exit: > return ret; > } > > +/* > + * Allocate a new payload block at the end of the file. > + * > + * Allocation will happen at 1MB alignment inside the file > + * > + * Returns the file offset start of the new payload block > + */ > +static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, > + uint64_t *new_offset) > +{ > + *new_offset = bdrv_getlength(bs->file); > + > + /* per the spec, the address for a block is in units of 1MB */ > + *new_offset = ROUND_UP(*new_offset, 1024*1024); > + > + return bdrv_truncate(bs->file, *new_offset + s->block_size); > +} > + > +/* > + * Update the BAT tablet entry with the new file offset, and the new entry s/tablet/table/ > + * state */ > +static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s, > + VHDXSectorInfo *sinfo, > + uint64_t *bat_entry, > + uint64_t *bat_offset, int state) > +{ > + /* The BAT entry is a uint64, with 44 bits for the file offset in units of > + * 1MB, and 3 bits for the block state. */ > + s->bat[sinfo->bat_idx] = ((sinfo->file_offset>>20) << QEMU coding style requires space around the '>>'. Please run scripts/checkpatch.pl. > + VHDX_BAT_FILE_OFF_BITS); > > + s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK; > + > + *bat_entry = cpu_to_le64(s->bat[sinfo->bat_idx]); Call this argument bat_entry_le so callers know now to manipulate the little-endian value directly? > + *bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry); > + > +} > > /* Per the spec, on the first write of guest-visible data to the file the > * data write guid must be updated in the header */ > @@ -1087,7 +1123,117 @@ int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s) > static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, > int nb_sectors, QEMUIOVector *qiov) > { > - return -ENOTSUP; > + int ret = -ENOTSUP; > + BDRVVHDXState *s = bs->opaque; > + VHDXSectorInfo sinfo; > + uint64_t bytes_done = 0; > + uint64_t bat_entry = 0; > + uint64_t bat_entry_offset = 0; > + bool bat_update; > + QEMUIOVector hd_qiov; > + > + qemu_iovec_init(&hd_qiov, qiov->niov); > + > + qemu_co_mutex_lock(&s->lock); > + > + ret = vhdx_user_visible_write(bs, s); > + if (ret < 0) { > + goto exit; > + } > + > + while (nb_sectors > 0) { > + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { > + /* not supported yet */ > + ret = -ENOTSUP; > + goto exit; > + } else { > + bat_update = false; > + vhdx_block_translate(s, sector_num, nb_sectors, &sinfo); > + > + qemu_iovec_reset(&hd_qiov); > + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, sinfo.bytes_avail); > + /* check the payload block state */ > + switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) { > + case PAYLOAD_BLOCK_ZERO: > + /* in this case, we need to preserve zero writes for > + * data that is not part of this write, so we must pad > + * the rest of the buffer to zeroes */ > + > + /* if we are on a posix system with ftruncate() that extends > + * a file, then it is zero-filled for us. On Win32, the raw > + * layer uses SetFilePointer and SetFileEnd, which does not > + * zero fill AFAIK */ > + > + /* TODO: queue another write of zero buffers if the host OS does > + * not zero-fill on file extension */ This is never addressed in the series. > + > + /* fall through */ > + case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ > + case PAYLOAD_BLOCK_UNMAPPED: /* fall through */ > + case PAYLOAD_BLOCK_UNDEFINED: /* fall through */ > + ret = vhdx_allocate_block(bs, s, &sinfo.file_offset); > + if (ret < 0) { > + goto exit; > + } > + /* once we support differencing files, this may also be > + * partially present */ > + /* update block state to the newly specified state */ > + vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, > + &bat_entry_offset, > + PAYLOAD_BLOCK_FULL_PRESENT); In the specification the constant is called PAYLOAD_BLOCK_FULLY_PRESENT. Is s/FULLY/FULL/ intentional and can you match the spec for consistency? > + bat_update = true; > + /* since we just allocated a block, file_offset is the > + * beginning of the payload block. It needs to be the > + * write address, which includes the offset into the block */ > + sinfo.file_offset += sinfo.block_offset; > + /* fall through */ > + case PAYLOAD_BLOCK_FULL_PRESENT: > + /* if the file offset address is in the header zone, > + * there is a problem */ > + if (sinfo.file_offset < (1024*1024)) { > + ret = -EFAULT; > + goto exit; > + } > + /* block exists, so we can just overwrite it */ > + qemu_co_mutex_unlock(&s->lock); > + ret = bdrv_co_writev(bs->file, > + sinfo.file_offset>>BDRV_SECTOR_BITS, > + sinfo.sectors_avail, &hd_qiov); > + qemu_co_mutex_lock(&s->lock); > + if (ret < 0) { > + goto exit; > + } > + break; > + case PAYLOAD_BLOCK_PARTIALLY_PRESENT: > + /* we don't yet support difference files, fall through > + * to error */ > + default: > + ret = -EIO; > + goto exit; > + break; > + } > + > + if (bat_update) { Data must be stable on disk before we can expose the updated BAT entry. Missing bdrv_co_flush().
On Tue, Oct 01, 2013 at 03:29:17PM +0200, Stefan Hajnoczi wrote: > On Wed, Sep 25, 2013 at 05:02:56PM -0400, Jeff Cody wrote: > > @@ -1070,7 +1070,43 @@ exit: > > return ret; > > } > > > > +/* > > + * Allocate a new payload block at the end of the file. > > + * > > + * Allocation will happen at 1MB alignment inside the file > > + * > > + * Returns the file offset start of the new payload block > > + */ > > +static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, > > + uint64_t *new_offset) > > +{ > > + *new_offset = bdrv_getlength(bs->file); > > + > > + /* per the spec, the address for a block is in units of 1MB */ > > + *new_offset = ROUND_UP(*new_offset, 1024*1024); > > + > > + return bdrv_truncate(bs->file, *new_offset + s->block_size); > > +} > > + > > +/* > > + * Update the BAT tablet entry with the new file offset, and the new entry > > s/tablet/table/ > > > + * state */ > > +static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s, > > + VHDXSectorInfo *sinfo, > > + uint64_t *bat_entry, > > + uint64_t *bat_offset, int state) > > +{ > > + /* The BAT entry is a uint64, with 44 bits for the file offset in units of > > + * 1MB, and 3 bits for the block state. */ > > + s->bat[sinfo->bat_idx] = ((sinfo->file_offset>>20) << > > QEMU coding style requires space around the '>>'. Please run > scripts/checkpatch.pl. I missed that - but I did run checkpatch.pl, it did not catch it: vhdx-v6/0011-block-vhdx-write-support.patch has no obvious style problems and is ready for submission. total: 0 errors, 0 warnings, 31 lines checked I think the parenthesis hid it. > > > + VHDX_BAT_FILE_OFF_BITS); > > > > + s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK; > > + > > + *bat_entry = cpu_to_le64(s->bat[sinfo->bat_idx]); > > Call this argument bat_entry_le so callers know now to manipulate the > little-endian value directly? > Yes, good idea. > > + *bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry); > > + > > +} > > > > /* Per the spec, on the first write of guest-visible data to the file the > > * data write guid must be updated in the header */ > > @@ -1087,7 +1123,117 @@ int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s) > > static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, > > int nb_sectors, QEMUIOVector *qiov) > > { > > - return -ENOTSUP; > > + int ret = -ENOTSUP; > > + BDRVVHDXState *s = bs->opaque; > > + VHDXSectorInfo sinfo; > > + uint64_t bytes_done = 0; > > + uint64_t bat_entry = 0; > > + uint64_t bat_entry_offset = 0; > > + bool bat_update; > > + QEMUIOVector hd_qiov; > > + > > + qemu_iovec_init(&hd_qiov, qiov->niov); > > + > > + qemu_co_mutex_lock(&s->lock); > > + > > + ret = vhdx_user_visible_write(bs, s); > > + if (ret < 0) { > > + goto exit; > > + } > > + > > + while (nb_sectors > 0) { > > + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { > > + /* not supported yet */ > > + ret = -ENOTSUP; > > + goto exit; > > + } else { > > + bat_update = false; > > + vhdx_block_translate(s, sector_num, nb_sectors, &sinfo); > > + > > + qemu_iovec_reset(&hd_qiov); > > + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, sinfo.bytes_avail); > > + /* check the payload block state */ > > + switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) { > > + case PAYLOAD_BLOCK_ZERO: > > + /* in this case, we need to preserve zero writes for > > + * data that is not part of this write, so we must pad > > + * the rest of the buffer to zeroes */ > > + > > + /* if we are on a posix system with ftruncate() that extends > > + * a file, then it is zero-filled for us. On Win32, the raw > > + * layer uses SetFilePointer and SetFileEnd, which does not > > + * zero fill AFAIK */ > > + > > + /* TODO: queue another write of zero buffers if the host OS does > > + * not zero-fill on file extension */ > > This is never addressed in the series. > I will address it in v7 > > + > > + /* fall through */ > > + case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ > > + case PAYLOAD_BLOCK_UNMAPPED: /* fall through */ > > + case PAYLOAD_BLOCK_UNDEFINED: /* fall through */ > > + ret = vhdx_allocate_block(bs, s, &sinfo.file_offset); > > + if (ret < 0) { > > + goto exit; > > + } > > + /* once we support differencing files, this may also be > > + * partially present */ > > + /* update block state to the newly specified state */ > > + vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, > > + &bat_entry_offset, > > + PAYLOAD_BLOCK_FULL_PRESENT); > > In the specification the constant is called PAYLOAD_BLOCK_FULLY_PRESENT. > Is s/FULLY/FULL/ intentional and can you match the spec for consistency? > That was unintentional, and yes I will change it to match the spec naming. > > + bat_update = true; > > + /* since we just allocated a block, file_offset is the > > + * beginning of the payload block. It needs to be the > > + * write address, which includes the offset into the block */ > > + sinfo.file_offset += sinfo.block_offset; > > + /* fall through */ > > + case PAYLOAD_BLOCK_FULL_PRESENT: > > + /* if the file offset address is in the header zone, > > + * there is a problem */ > > + if (sinfo.file_offset < (1024*1024)) { > > + ret = -EFAULT; > > + goto exit; > > + } > > + /* block exists, so we can just overwrite it */ > > + qemu_co_mutex_unlock(&s->lock); > > + ret = bdrv_co_writev(bs->file, > > + sinfo.file_offset>>BDRV_SECTOR_BITS, > > + sinfo.sectors_avail, &hd_qiov); > > + qemu_co_mutex_lock(&s->lock); > > + if (ret < 0) { > > + goto exit; > > + } > > + break; > > + case PAYLOAD_BLOCK_PARTIALLY_PRESENT: > > + /* we don't yet support difference files, fall through > > + * to error */ > > + default: > > + ret = -EIO; > > + goto exit; > > + break; > > + } > > + > > + if (bat_update) { > > Data must be stable on disk before we can expose the updated BAT entry. > Missing bdrv_co_flush(). > Good point, I will add that in for v7. Thanks, Jeff
diff --git a/block/vhdx.c b/block/vhdx.c index a0a01ec..129468c 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -941,7 +941,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, } } - /* TODO: differencing files, write */ + /* TODO: differencing files */ return 0; fail: @@ -1070,7 +1070,43 @@ exit: return ret; } +/* + * Allocate a new payload block at the end of the file. + * + * Allocation will happen at 1MB alignment inside the file + * + * Returns the file offset start of the new payload block + */ +static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, + uint64_t *new_offset) +{ + *new_offset = bdrv_getlength(bs->file); + + /* per the spec, the address for a block is in units of 1MB */ + *new_offset = ROUND_UP(*new_offset, 1024*1024); + + return bdrv_truncate(bs->file, *new_offset + s->block_size); +} + +/* + * Update the BAT tablet entry with the new file offset, and the new entry + * state */ +static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s, + VHDXSectorInfo *sinfo, + uint64_t *bat_entry, + uint64_t *bat_offset, int state) +{ + /* The BAT entry is a uint64, with 44 bits for the file offset in units of + * 1MB, and 3 bits for the block state. */ + s->bat[sinfo->bat_idx] = ((sinfo->file_offset>>20) << + VHDX_BAT_FILE_OFF_BITS); + s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK; + + *bat_entry = cpu_to_le64(s->bat[sinfo->bat_idx]); + *bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry); + +} /* Per the spec, on the first write of guest-visible data to the file the * data write guid must be updated in the header */ @@ -1087,7 +1123,117 @@ int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s) static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - return -ENOTSUP; + int ret = -ENOTSUP; + BDRVVHDXState *s = bs->opaque; + VHDXSectorInfo sinfo; + uint64_t bytes_done = 0; + uint64_t bat_entry = 0; + uint64_t bat_entry_offset = 0; + bool bat_update; + QEMUIOVector hd_qiov; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + qemu_co_mutex_lock(&s->lock); + + ret = vhdx_user_visible_write(bs, s); + if (ret < 0) { + goto exit; + } + + while (nb_sectors > 0) { + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { + /* not supported yet */ + ret = -ENOTSUP; + goto exit; + } else { + bat_update = false; + vhdx_block_translate(s, sector_num, nb_sectors, &sinfo); + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, sinfo.bytes_avail); + /* check the payload block state */ + switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) { + case PAYLOAD_BLOCK_ZERO: + /* in this case, we need to preserve zero writes for + * data that is not part of this write, so we must pad + * the rest of the buffer to zeroes */ + + /* if we are on a posix system with ftruncate() that extends + * a file, then it is zero-filled for us. On Win32, the raw + * layer uses SetFilePointer and SetFileEnd, which does not + * zero fill AFAIK */ + + /* TODO: queue another write of zero buffers if the host OS does + * not zero-fill on file extension */ + + /* fall through */ + case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ + case PAYLOAD_BLOCK_UNMAPPED: /* fall through */ + case PAYLOAD_BLOCK_UNDEFINED: /* fall through */ + ret = vhdx_allocate_block(bs, s, &sinfo.file_offset); + if (ret < 0) { + goto exit; + } + /* once we support differencing files, this may also be + * partially present */ + /* update block state to the newly specified state */ + vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, + &bat_entry_offset, + PAYLOAD_BLOCK_FULL_PRESENT); + bat_update = true; + /* since we just allocated a block, file_offset is the + * beginning of the payload block. It needs to be the + * write address, which includes the offset into the block */ + sinfo.file_offset += sinfo.block_offset; + /* fall through */ + case PAYLOAD_BLOCK_FULL_PRESENT: + /* if the file offset address is in the header zone, + * there is a problem */ + if (sinfo.file_offset < (1024*1024)) { + ret = -EFAULT; + goto exit; + } + /* block exists, so we can just overwrite it */ + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_writev(bs->file, + sinfo.file_offset>>BDRV_SECTOR_BITS, + sinfo.sectors_avail, &hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto exit; + } + break; + case PAYLOAD_BLOCK_PARTIALLY_PRESENT: + /* we don't yet support difference files, fall through + * to error */ + default: + ret = -EIO; + goto exit; + break; + } + + if (bat_update) { + /* this will update the BAT entry into the log journal, and + * then flush the log journal out to disk */ + ret = vhdx_log_write_and_flush(bs, s, &bat_entry, + sizeof(VHDXBatEntry), + bat_entry_offset); + if (ret < 0) { + goto exit; + } + } + + nb_sectors -= sinfo.sectors_avail; + sector_num += sinfo.sectors_avail; + bytes_done += sinfo.bytes_avail; + + } + } + +exit: + qemu_co_mutex_unlock(&s->lock); + return ret; }
This adds support for writing to VHDX image files, using coroutines. Writes into the BAT table goes through the VHDX log. Currently, BAT table writes occur when expanding a dynamic VHDX file, and allocating a new BAT entry. Signed-off-by: Jeff Cody <jcody@redhat.com> --- block/vhdx.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 2 deletions(-)