Message ID | 4a2d211444689a51f9e85bb8b2975e5551442ab9.1374687002.git.jcody@redhat.com |
---|---|
State | New |
Headers | show |
On Wed, 07/24 13:54, Jeff Cody wrote: > This adds support for writing to the VHDX log. > > For spec details, see VHDX Specification Format v1.00: > https://www.microsoft.com/en-us/download/details.aspx?id=34750 > > There are a few limitations to this log support: > 1.) There is no caching yet > 2.) The log is flushed after each entry > > The primary write interface, vhdx_log_write_and_flush(), performs a log > write followed by an immediate flush of the log. > > As each log entry sector is a minimum of 4KB, partial sector writes are > filled in with data from the disk write destination. > > If the current file log GUID is 0, a new GUID is generated and updated > in the header. > > Signed-off-by: Jeff Cody <jcody@redhat.com> > --- > block/vhdx-log.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > block/vhdx.h | 3 + > 2 files changed, 276 insertions(+) > > diff --git a/block/vhdx-log.c b/block/vhdx-log.c > index 89b9000..786b393 100644 > --- a/block/vhdx-log.c > +++ b/block/vhdx-log.c > @@ -170,6 +170,53 @@ exit: > return ret; > } > > +/* Writes num_sectors to the log (all log sectors are 4096 bytes), > + * from buffer 'buffer'. Upon return, *sectors_written will contain > + * the number of sectors successfully written. > + * > + * It is assumed that 'buffer' is at least 4096*num_sectors large. > + * > + * 0 is returned on success, -errno otherwise */ > +static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log, > + uint32_t *sectors_written, void *buffer, > + uint32_t num_sectors) > +{ > + int ret = 0; > + uint64_t offset; > + uint32_t write; > + void *buffer_tmp; > + BDRVVHDXState *s = bs->opaque; > + > + vhdx_user_visible_write(bs, s); > + > + write = log->write; > + > + buffer_tmp = buffer; > + while (num_sectors) { > + > + offset = log->offset + write; > + write = vhdx_log_inc_idx(write, log->length); > + if (write == log->read) { > + /* full */ > + break; > + } > + ret = bdrv_pwrite_sync(bs->file, offset, buffer_tmp, > + VHDX_LOG_SECTOR_SIZE); > + if (ret < 0) { > + goto exit; > + } > + buffer_tmp += VHDX_LOG_SECTOR_SIZE; > + > + log->write = write; > + *sectors_written = *sectors_written + 1; > + num_sectors--; > + } > + > +exit: > + return ret; > +} > + > + > /* Validates a log entry header */ > static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, > BDRVVHDXState *s) > @@ -732,3 +779,229 @@ exit: > return ret; > } > > + > + > +static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc, > + VHDXLogDataSector *sector, void *data, > + uint64_t seq) > +{ > + memcpy(&desc->leading_bytes, data, 8); > + data += 8; > + cpu_to_le64s(&desc->leading_bytes); > + memcpy(sector->data, data, 4084); > + data += 4084; > + memcpy(&desc->trailing_bytes, data, 4); > + cpu_to_le32s(&desc->trailing_bytes); > + data += 4; > + > + sector->sequence_high = (uint32_t) (seq >> 32); > + sector->sequence_low = (uint32_t) (seq & 0xffffffff); > + sector->data_signature = VHDX_LOG_DATA_SIGNATURE; > + > + vhdx_log_desc_le_export(desc); > + vhdx_log_data_le_export(sector); > +} > + > + > +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, > + void *data, uint32_t length, uint64_t offset) > +{ > + int ret = 0; > + void *buffer = NULL; > + void *merged_sector = NULL; > + void *data_tmp, *sector_write; > + unsigned int i; > + int sector_offset; > + uint32_t desc_sectors, sectors, total_length; > + uint32_t sectors_written = 0; > + uint32_t aligned_length; > + uint32_t leading_length = 0; > + uint32_t trailing_length = 0; > + uint32_t partial_sectors = 0; > + uint32_t bytes_written = 0; > + uint64_t file_offset; > + VHDXHeader *header; > + VHDXLogEntryHeader new_hdr; > + VHDXLogDescriptor *new_desc = NULL; > + VHDXLogDataSector *data_sector = NULL; > + MSGUID new_guid = { 0 }; > + > + header = s->headers[s->curr_header]; > + > + /* need to have offset read data, and be on 4096 byte boundary */ > + > + if (length > header->log_length) { > + /* no log present. we could create a log here instead of failing */ Does newly created vhdx have allocated log sectors? > + ret = -EINVAL; > + goto exit; > + } > + > + if (vhdx_log_guid_is_zero(&header->log_guid)) { > + vhdx_guid_generate(&new_guid); > + vhdx_update_headers(bs, s, false, &new_guid); > + } else { > + /* currently, we require that the log be flushed after > + * every write. */ > + ret = -ENOTSUP; Can we make an assertion here? > + } > + > + /* 0 is an invalid sequence number, but may also represent the first > + * log write (or a wrapped seq) */ > + if (s->log.sequence == 0) { > + s->log.sequence = 1; > + } > + > + sector_offset = offset % VHDX_LOG_SECTOR_SIZE; > + file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE; > + > + aligned_length = length; > + > + /* add in the unaligned head and tail bytes */ > + if (sector_offset) { > + leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset); > + leading_length = leading_length > length ? length : leading_length; > + aligned_length -= leading_length; > + partial_sectors++; > + } > + > + sectors = aligned_length / VHDX_LOG_SECTOR_SIZE; > + trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE); > + if (trailing_length) { > + partial_sectors++; > + } > + > + sectors += partial_sectors; > + > + /* sectors is now how many sectors the data itself takes, not > + * including the header and descriptor metadata */ > + > + new_hdr = (VHDXLogEntryHeader) { > + .signature = VHDX_LOG_SIGNATURE, > + .tail = s->log.tail, > + .sequence_number = s->log.sequence, > + .descriptor_count = sectors, > + .reserved = 0, > + .flushed_file_offset = bdrv_getlength(bs->file), > + .last_file_offset = bdrv_getlength(bs->file), > + }; > + > + memcpy(&new_hdr.log_guid, &header->log_guid, sizeof(MSGUID)); > + > + desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count); > + > + total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE; > + new_hdr.entry_length = total_length; > + > + vhdx_log_entry_hdr_le_export(&new_hdr); > + > + buffer = qemu_blockalign(bs, total_length); > + memcpy(buffer, &new_hdr, sizeof(new_hdr)); > + > + new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr)); > + data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE); > + data_tmp = data; > + > + /* All log sectors are 4KB, so for any partial sectors we must > + * merge the data with preexisting data from the final file > + * destination */ > + merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); > + > + for (i = 0; i < sectors; i++) { > + new_desc->signature = VHDX_LOG_DESC_SIGNATURE; > + new_desc->sequence_number = s->log.sequence; > + new_desc->file_offset = file_offset; > + > + if (i == 0 && leading_length) { > + /* partial sector at the front of the buffer */ > + ret = bdrv_pread(bs->file, file_offset, merged_sector, > + VHDX_LOG_SECTOR_SIZE); > + if (ret < 0) { > + goto exit; > + } > + memcpy(merged_sector + sector_offset, data_tmp, leading_length); > + bytes_written = leading_length; > + sector_write = merged_sector; > + } else if (i == sectors - 1 && trailing_length) { > + /* partial sector at the end of the buffer */ > + ret = bdrv_pread(bs->file, > + file_offset, > + merged_sector + trailing_length, > + VHDX_LOG_SECTOR_SIZE - trailing_length); > + if (ret < 0) { > + goto exit; > + } > + memcpy(merged_sector, data_tmp, trailing_length); > + bytes_written = trailing_length; > + sector_write = merged_sector; > + } else { > + bytes_written = VHDX_LOG_SECTOR_SIZE; > + sector_write = data_tmp; > + } > + > + /* populate the raw sector data into the proper structures, > + * as well as update the descriptor, and convert to proper > + * endianness */ > + vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write, > + s->log.sequence); > + > + data_tmp += bytes_written; > + data_sector++; > + new_desc++; > + file_offset += VHDX_LOG_SECTOR_SIZE; > + } > + > + /* checksum covers entire entry, from the log header through the > + * last data sector */ > + vhdx_update_checksum(buffer, total_length, 4); > + cpu_to_le32s((uint32_t *)(buffer + 4)); > + > + /* now write to the log */ > + vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, > + desc_sectors + sectors); > + if (ret < 0) { > + goto exit; > + } > + > + if (sectors_written != desc_sectors + sectors) { > + /* instead of failing, we could flush the log here */ > + ret = -EINVAL; > + goto exit; > + } > + > + s->log.sequence++; > + /* write new tail */ > + s->log.tail = s->log.write; > + > +exit: > + qemu_vfree(buffer); > + qemu_vfree(merged_sector); > + return ret; > +} > + > +/* Perform a log write, and then immediately flush the entire log */ > +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, > + void *data, uint32_t length, uint64_t offset) > +{ > + int ret = 0; > + VHDXLogSequence logs = { .valid = true, > + .count = 1, > + .hdr = { 0 } }; > + > + > + ret = vhdx_log_write(bs, s, data, length, offset); > + if (ret < 0) { > + goto exit; > + } > + logs.log = s->log; > + > + ret = vhdx_log_flush(bs, s, &logs); > + if (ret < 0) { > + goto exit; > + } > + > + s->log = logs.log; > + > +exit: > + return ret; > +} > + > diff --git a/block/vhdx.h b/block/vhdx.h > index 24b126e..b210efc 100644 > --- a/block/vhdx.h > +++ b/block/vhdx.h > @@ -393,6 +393,9 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); > > int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s); > > +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, > + void *data, uint32_t length, uint64_t offset); > + > static inline void leguid_to_cpus(MSGUID *guid) > { > le32_to_cpus(&guid->data1); > -- > 1.8.1.4 > >
On Tue, Jul 30, 2013 at 11:57:20AM +0800, Fam Zheng wrote: > On Wed, 07/24 13:54, Jeff Cody wrote: > > This adds support for writing to the VHDX log. > > > > For spec details, see VHDX Specification Format v1.00: > > https://www.microsoft.com/en-us/download/details.aspx?id=34750 > > > > There are a few limitations to this log support: > > 1.) There is no caching yet > > 2.) The log is flushed after each entry > > > > The primary write interface, vhdx_log_write_and_flush(), performs a log > > write followed by an immediate flush of the log. > > > > As each log entry sector is a minimum of 4KB, partial sector writes are > > filled in with data from the disk write destination. > > > > If the current file log GUID is 0, a new GUID is generated and updated > > in the header. > > > > Signed-off-by: Jeff Cody <jcody@redhat.com> > > --- > > block/vhdx-log.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > > block/vhdx.h | 3 + > > 2 files changed, 276 insertions(+) > > > > diff --git a/block/vhdx-log.c b/block/vhdx-log.c > > index 89b9000..786b393 100644 > > --- a/block/vhdx-log.c > > +++ b/block/vhdx-log.c > > @@ -170,6 +170,53 @@ exit: > > return ret; > > } > > > > +/* Writes num_sectors to the log (all log sectors are 4096 bytes), > > + * from buffer 'buffer'. Upon return, *sectors_written will contain > > + * the number of sectors successfully written. > > + * > > + * It is assumed that 'buffer' is at least 4096*num_sectors large. > > + * > > + * 0 is returned on success, -errno otherwise */ > > +static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log, > > + uint32_t *sectors_written, void *buffer, > > + uint32_t num_sectors) > > +{ > > + int ret = 0; > > + uint64_t offset; > > + uint32_t write; > > + void *buffer_tmp; > > + BDRVVHDXState *s = bs->opaque; > > + > > + vhdx_user_visible_write(bs, s); > > + > > + write = log->write; > > + > > + buffer_tmp = buffer; > > + while (num_sectors) { > > + > > + offset = log->offset + write; > > + write = vhdx_log_inc_idx(write, log->length); > > + if (write == log->read) { > > + /* full */ > > + break; > > + } > > + ret = bdrv_pwrite_sync(bs->file, offset, buffer_tmp, > > + VHDX_LOG_SECTOR_SIZE); > > + if (ret < 0) { > > + goto exit; > > + } > > + buffer_tmp += VHDX_LOG_SECTOR_SIZE; > > + > > + log->write = write; > > + *sectors_written = *sectors_written + 1; > > + num_sectors--; > > + } > > + > > +exit: > > + return ret; > > +} > > + > > + > > /* Validates a log entry header */ > > static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, > > BDRVVHDXState *s) > > @@ -732,3 +779,229 @@ exit: > > return ret; > > } > > > > + > > + > > +static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc, > > + VHDXLogDataSector *sector, void *data, > > + uint64_t seq) > > +{ > > + memcpy(&desc->leading_bytes, data, 8); > > + data += 8; > > + cpu_to_le64s(&desc->leading_bytes); > > + memcpy(sector->data, data, 4084); > > + data += 4084; > > + memcpy(&desc->trailing_bytes, data, 4); > > + cpu_to_le32s(&desc->trailing_bytes); > > + data += 4; > > + > > + sector->sequence_high = (uint32_t) (seq >> 32); > > + sector->sequence_low = (uint32_t) (seq & 0xffffffff); > > + sector->data_signature = VHDX_LOG_DATA_SIGNATURE; > > + > > + vhdx_log_desc_le_export(desc); > > + vhdx_log_data_le_export(sector); > > +} > > + > > + > > +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, > > + void *data, uint32_t length, uint64_t offset) > > +{ > > + int ret = 0; > > + void *buffer = NULL; > > + void *merged_sector = NULL; > > + void *data_tmp, *sector_write; > > + unsigned int i; > > + int sector_offset; > > + uint32_t desc_sectors, sectors, total_length; > > + uint32_t sectors_written = 0; > > + uint32_t aligned_length; > > + uint32_t leading_length = 0; > > + uint32_t trailing_length = 0; > > + uint32_t partial_sectors = 0; > > + uint32_t bytes_written = 0; > > + uint64_t file_offset; > > + VHDXHeader *header; > > + VHDXLogEntryHeader new_hdr; > > + VHDXLogDescriptor *new_desc = NULL; > > + VHDXLogDataSector *data_sector = NULL; > > + MSGUID new_guid = { 0 }; > > + > > + header = s->headers[s->curr_header]; > > + > > + /* need to have offset read data, and be on 4096 byte boundary */ > > + > > + if (length > header->log_length) { > > + /* no log present. we could create a log here instead of failing */ > > Does newly created vhdx have allocated log sectors? > I don't know of any way to make Hyper-V create a file without an allocated log area (I believe with the files I've generated, it allocates a 1MB log between the header and the BAT region). The spec says that "LogLength" in the header should be a multiple of 1MB. And technically, 0 is a multiple of every number, so when parsing the header I don't fail out on a zero-lengthed log. In practice, I don't think Hyper-V creates files with zero-length logs, but I don't think the spec rules it out. So we could either allocate a log in the file at this point, or fail. > > + ret = -EINVAL; > > + goto exit; > > + } > > + > > + if (vhdx_log_guid_is_zero(&header->log_guid)) { > > + vhdx_guid_generate(&new_guid); > > + vhdx_update_headers(bs, s, false, &new_guid); > > + } else { > > + /* currently, we require that the log be flushed after > > + * every write. */ > > + ret = -ENOTSUP; > > Can we make an assertion here? > I don't know if we should assert here - the VM could certainly continue on if this is not the primary drive. > > + } > > + > > + /* 0 is an invalid sequence number, but may also represent the first > > + * log write (or a wrapped seq) */ > > + if (s->log.sequence == 0) { > > + s->log.sequence = 1; > > + } > > + > > + sector_offset = offset % VHDX_LOG_SECTOR_SIZE; > > + file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE; > > + > > + aligned_length = length; > > + > > + /* add in the unaligned head and tail bytes */ > > + if (sector_offset) { > > + leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset); > > + leading_length = leading_length > length ? length : leading_length; > > + aligned_length -= leading_length; > > + partial_sectors++; > > + } > > + > > + sectors = aligned_length / VHDX_LOG_SECTOR_SIZE; > > + trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE); > > + if (trailing_length) { > > + partial_sectors++; > > + } > > + > > + sectors += partial_sectors; > > + > > + /* sectors is now how many sectors the data itself takes, not > > + * including the header and descriptor metadata */ > > + > > + new_hdr = (VHDXLogEntryHeader) { > > + .signature = VHDX_LOG_SIGNATURE, > > + .tail = s->log.tail, > > + .sequence_number = s->log.sequence, > > + .descriptor_count = sectors, > > + .reserved = 0, > > + .flushed_file_offset = bdrv_getlength(bs->file), > > + .last_file_offset = bdrv_getlength(bs->file), > > + }; > > + > > + memcpy(&new_hdr.log_guid, &header->log_guid, sizeof(MSGUID)); > > + > > + desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count); > > + > > + total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE; > > + new_hdr.entry_length = total_length; > > + > > + vhdx_log_entry_hdr_le_export(&new_hdr); > > + > > + buffer = qemu_blockalign(bs, total_length); > > + memcpy(buffer, &new_hdr, sizeof(new_hdr)); > > + > > + new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr)); > > + data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE); > > + data_tmp = data; > > + > > + /* All log sectors are 4KB, so for any partial sectors we must > > + * merge the data with preexisting data from the final file > > + * destination */ > > + merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); > > + > > + for (i = 0; i < sectors; i++) { > > + new_desc->signature = VHDX_LOG_DESC_SIGNATURE; > > + new_desc->sequence_number = s->log.sequence; > > + new_desc->file_offset = file_offset; > > + > > + if (i == 0 && leading_length) { > > + /* partial sector at the front of the buffer */ > > + ret = bdrv_pread(bs->file, file_offset, merged_sector, > > + VHDX_LOG_SECTOR_SIZE); > > + if (ret < 0) { > > + goto exit; > > + } > > + memcpy(merged_sector + sector_offset, data_tmp, leading_length); > > + bytes_written = leading_length; > > + sector_write = merged_sector; > > + } else if (i == sectors - 1 && trailing_length) { > > + /* partial sector at the end of the buffer */ > > + ret = bdrv_pread(bs->file, > > + file_offset, > > + merged_sector + trailing_length, > > + VHDX_LOG_SECTOR_SIZE - trailing_length); > > + if (ret < 0) { > > + goto exit; > > + } > > + memcpy(merged_sector, data_tmp, trailing_length); > > + bytes_written = trailing_length; > > + sector_write = merged_sector; > > + } else { > > + bytes_written = VHDX_LOG_SECTOR_SIZE; > > + sector_write = data_tmp; > > + } > > + > > + /* populate the raw sector data into the proper structures, > > + * as well as update the descriptor, and convert to proper > > + * endianness */ > > + vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write, > > + s->log.sequence); > > + > > + data_tmp += bytes_written; > > + data_sector++; > > + new_desc++; > > + file_offset += VHDX_LOG_SECTOR_SIZE; > > + } > > + > > + /* checksum covers entire entry, from the log header through the > > + * last data sector */ > > + vhdx_update_checksum(buffer, total_length, 4); > > + cpu_to_le32s((uint32_t *)(buffer + 4)); > > + > > + /* now write to the log */ > > + vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, > > + desc_sectors + sectors); > > + if (ret < 0) { > > + goto exit; > > + } > > + > > + if (sectors_written != desc_sectors + sectors) { > > + /* instead of failing, we could flush the log here */ > > + ret = -EINVAL; > > + goto exit; > > + } > > + > > + s->log.sequence++; > > + /* write new tail */ > > + s->log.tail = s->log.write; > > + > > +exit: > > + qemu_vfree(buffer); > > + qemu_vfree(merged_sector); > > + return ret; > > +} > > + > > +/* Perform a log write, and then immediately flush the entire log */ > > +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, > > + void *data, uint32_t length, uint64_t offset) > > +{ > > + int ret = 0; > > + VHDXLogSequence logs = { .valid = true, > > + .count = 1, > > + .hdr = { 0 } }; > > + > > + > > + ret = vhdx_log_write(bs, s, data, length, offset); > > + if (ret < 0) { > > + goto exit; > > + } > > + logs.log = s->log; > > + > > + ret = vhdx_log_flush(bs, s, &logs); > > + if (ret < 0) { > > + goto exit; > > + } > > + > > + s->log = logs.log; > > + > > +exit: > > + return ret; > > +} > > + > > diff --git a/block/vhdx.h b/block/vhdx.h > > index 24b126e..b210efc 100644 > > --- a/block/vhdx.h > > +++ b/block/vhdx.h > > @@ -393,6 +393,9 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); > > > > int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s); > > > > +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, > > + void *data, uint32_t length, uint64_t offset); > > + > > static inline void leguid_to_cpus(MSGUID *guid) > > { > > le32_to_cpus(&guid->data1); > > -- > > 1.8.1.4 > > > > > > -- > Fam
diff --git a/block/vhdx-log.c b/block/vhdx-log.c index 89b9000..786b393 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -170,6 +170,53 @@ exit: return ret; } +/* Writes num_sectors to the log (all log sectors are 4096 bytes), + * from buffer 'buffer'. Upon return, *sectors_written will contain + * the number of sectors successfully written. + * + * It is assumed that 'buffer' is at least 4096*num_sectors large. + * + * 0 is returned on success, -errno otherwise */ +static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log, + uint32_t *sectors_written, void *buffer, + uint32_t num_sectors) +{ + int ret = 0; + uint64_t offset; + uint32_t write; + void *buffer_tmp; + BDRVVHDXState *s = bs->opaque; + + vhdx_user_visible_write(bs, s); + + write = log->write; + + buffer_tmp = buffer; + while (num_sectors) { + + offset = log->offset + write; + write = vhdx_log_inc_idx(write, log->length); + if (write == log->read) { + /* full */ + break; + } + ret = bdrv_pwrite_sync(bs->file, offset, buffer_tmp, + VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + buffer_tmp += VHDX_LOG_SECTOR_SIZE; + + log->write = write; + *sectors_written = *sectors_written + 1; + num_sectors--; + } + +exit: + return ret; +} + + /* Validates a log entry header */ static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, BDRVVHDXState *s) @@ -732,3 +779,229 @@ exit: return ret; } + + +static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc, + VHDXLogDataSector *sector, void *data, + uint64_t seq) +{ + memcpy(&desc->leading_bytes, data, 8); + data += 8; + cpu_to_le64s(&desc->leading_bytes); + memcpy(sector->data, data, 4084); + data += 4084; + memcpy(&desc->trailing_bytes, data, 4); + cpu_to_le32s(&desc->trailing_bytes); + data += 4; + + sector->sequence_high = (uint32_t) (seq >> 32); + sector->sequence_low = (uint32_t) (seq & 0xffffffff); + sector->data_signature = VHDX_LOG_DATA_SIGNATURE; + + vhdx_log_desc_le_export(desc); + vhdx_log_data_le_export(sector); +} + + +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset) +{ + int ret = 0; + void *buffer = NULL; + void *merged_sector = NULL; + void *data_tmp, *sector_write; + unsigned int i; + int sector_offset; + uint32_t desc_sectors, sectors, total_length; + uint32_t sectors_written = 0; + uint32_t aligned_length; + uint32_t leading_length = 0; + uint32_t trailing_length = 0; + uint32_t partial_sectors = 0; + uint32_t bytes_written = 0; + uint64_t file_offset; + VHDXHeader *header; + VHDXLogEntryHeader new_hdr; + VHDXLogDescriptor *new_desc = NULL; + VHDXLogDataSector *data_sector = NULL; + MSGUID new_guid = { 0 }; + + header = s->headers[s->curr_header]; + + /* need to have offset read data, and be on 4096 byte boundary */ + + if (length > header->log_length) { + /* no log present. we could create a log here instead of failing */ + ret = -EINVAL; + goto exit; + } + + if (vhdx_log_guid_is_zero(&header->log_guid)) { + vhdx_guid_generate(&new_guid); + vhdx_update_headers(bs, s, false, &new_guid); + } else { + /* currently, we require that the log be flushed after + * every write. */ + ret = -ENOTSUP; + } + + /* 0 is an invalid sequence number, but may also represent the first + * log write (or a wrapped seq) */ + if (s->log.sequence == 0) { + s->log.sequence = 1; + } + + sector_offset = offset % VHDX_LOG_SECTOR_SIZE; + file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE; + + aligned_length = length; + + /* add in the unaligned head and tail bytes */ + if (sector_offset) { + leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset); + leading_length = leading_length > length ? length : leading_length; + aligned_length -= leading_length; + partial_sectors++; + } + + sectors = aligned_length / VHDX_LOG_SECTOR_SIZE; + trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE); + if (trailing_length) { + partial_sectors++; + } + + sectors += partial_sectors; + + /* sectors is now how many sectors the data itself takes, not + * including the header and descriptor metadata */ + + new_hdr = (VHDXLogEntryHeader) { + .signature = VHDX_LOG_SIGNATURE, + .tail = s->log.tail, + .sequence_number = s->log.sequence, + .descriptor_count = sectors, + .reserved = 0, + .flushed_file_offset = bdrv_getlength(bs->file), + .last_file_offset = bdrv_getlength(bs->file), + }; + + memcpy(&new_hdr.log_guid, &header->log_guid, sizeof(MSGUID)); + + desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count); + + total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE; + new_hdr.entry_length = total_length; + + vhdx_log_entry_hdr_le_export(&new_hdr); + + buffer = qemu_blockalign(bs, total_length); + memcpy(buffer, &new_hdr, sizeof(new_hdr)); + + new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr)); + data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE); + data_tmp = data; + + /* All log sectors are 4KB, so for any partial sectors we must + * merge the data with preexisting data from the final file + * destination */ + merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + + for (i = 0; i < sectors; i++) { + new_desc->signature = VHDX_LOG_DESC_SIGNATURE; + new_desc->sequence_number = s->log.sequence; + new_desc->file_offset = file_offset; + + if (i == 0 && leading_length) { + /* partial sector at the front of the buffer */ + ret = bdrv_pread(bs->file, file_offset, merged_sector, + VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + memcpy(merged_sector + sector_offset, data_tmp, leading_length); + bytes_written = leading_length; + sector_write = merged_sector; + } else if (i == sectors - 1 && trailing_length) { + /* partial sector at the end of the buffer */ + ret = bdrv_pread(bs->file, + file_offset, + merged_sector + trailing_length, + VHDX_LOG_SECTOR_SIZE - trailing_length); + if (ret < 0) { + goto exit; + } + memcpy(merged_sector, data_tmp, trailing_length); + bytes_written = trailing_length; + sector_write = merged_sector; + } else { + bytes_written = VHDX_LOG_SECTOR_SIZE; + sector_write = data_tmp; + } + + /* populate the raw sector data into the proper structures, + * as well as update the descriptor, and convert to proper + * endianness */ + vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write, + s->log.sequence); + + data_tmp += bytes_written; + data_sector++; + new_desc++; + file_offset += VHDX_LOG_SECTOR_SIZE; + } + + /* checksum covers entire entry, from the log header through the + * last data sector */ + vhdx_update_checksum(buffer, total_length, 4); + cpu_to_le32s((uint32_t *)(buffer + 4)); + + /* now write to the log */ + vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, + desc_sectors + sectors); + if (ret < 0) { + goto exit; + } + + if (sectors_written != desc_sectors + sectors) { + /* instead of failing, we could flush the log here */ + ret = -EINVAL; + goto exit; + } + + s->log.sequence++; + /* write new tail */ + s->log.tail = s->log.write; + +exit: + qemu_vfree(buffer); + qemu_vfree(merged_sector); + return ret; +} + +/* Perform a log write, and then immediately flush the entire log */ +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset) +{ + int ret = 0; + VHDXLogSequence logs = { .valid = true, + .count = 1, + .hdr = { 0 } }; + + + ret = vhdx_log_write(bs, s, data, length, offset); + if (ret < 0) { + goto exit; + } + logs.log = s->log; + + ret = vhdx_log_flush(bs, s, &logs); + if (ret < 0) { + goto exit; + } + + s->log = logs.log; + +exit: + return ret; +} + diff --git a/block/vhdx.h b/block/vhdx.h index 24b126e..b210efc 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -393,6 +393,9 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s); +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset); + static inline void leguid_to_cpus(MSGUID *guid) { le32_to_cpus(&guid->data1);
This adds support for writing to the VHDX log. For spec details, see VHDX Specification Format v1.00: https://www.microsoft.com/en-us/download/details.aspx?id=34750 There are a few limitations to this log support: 1.) There is no caching yet 2.) The log is flushed after each entry The primary write interface, vhdx_log_write_and_flush(), performs a log write followed by an immediate flush of the log. As each log entry sector is a minimum of 4KB, partial sector writes are filled in with data from the disk write destination. If the current file log GUID is 0, a new GUID is generated and updated in the header. Signed-off-by: Jeff Cody <jcody@redhat.com> --- block/vhdx-log.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ block/vhdx.h | 3 + 2 files changed, 276 insertions(+)