@@ -3173,14 +3173,18 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
* of 32768 512-byte sectors (16 MiB) per request.
*/
#define MAX_WRITE_ZEROES_DEFAULT 32768
+/* allocate iovec with zeroes using 1 MiB chunks to avoid to big allocations */
+#define MAX_ZEROES_CHUNK (1024 * 1024)
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
- struct iovec iov = {0};
int ret = 0;
+ void *chunk = NULL;
+
+ qemu_iovec_init(&qiov, 0);
int max_write_zeroes = bs->bl.max_write_zeroes ?
bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
@@ -3217,27 +3221,40 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
}
if (ret == -ENOTSUP) {
+ int64_t num_bytes;
+ int chunk_size;
+
/* Fall back to bounce buffer if write zeroes is unsupported */
- iov.iov_len = num * BDRV_SECTOR_SIZE;
- if (iov.iov_base == NULL) {
- iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
- if (iov.iov_base == NULL) {
+ num = MIN(num, MAX_WRITE_ZEROES_DEFAULT);
+
+ num_bytes = (int64_t)num << BDRV_SECTOR_BITS;
+ chunk_size = MIN(MAX_ZEROES_CHUNK, num_bytes);
+
+ if (chunk == NULL) {
+ chunk = qemu_try_blockalign(bs, chunk_size);
+ if (chunk == NULL) {
ret = -ENOMEM;
goto fail;
}
- memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
+ memset(chunk, 0, chunk_size);
+ }
+
+ while (num_bytes > 0) {
+ int to_add = MIN(chunk_size, num_bytes);
+ qemu_iovec_add(&qiov, chunk, to_add);
+ num_bytes -= to_add;
}
- qemu_iovec_init_external(&qiov, &iov, 1);
ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
/* Keep bounce buffer around if it is big enough for all
* all future requests.
*/
- if (num < max_write_zeroes) {
- qemu_vfree(iov.iov_base);
- iov.iov_base = NULL;
+ if (chunk_size != MAX_ZEROES_CHUNK) {
+ qemu_vfree(chunk);
+ chunk = NULL;
}
+ qemu_iovec_reset(&qiov);
}
sector_num += num;
@@ -3245,7 +3262,8 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
}
fail:
- qemu_vfree(iov.iov_base);
+ qemu_iovec_destroy(&qiov);
+ qemu_vfree(chunk);
return ret;
}
bdrv_co_do_write_zeroes split writes using bl.max_write_zeroes or 16 MiB as a chunk size. This is implemented in this way to tolerate buggy block backends which do not accept too big requests. Though if the bdrv_co_write_zeroes callback is not good enough, we fallback to write data explicitely using bdrv_co_writev and we create buffer to accomodate zeroes inside. The size of this buffer is the size of the chunk. Thus if the underlying layer will have bl.max_write_zeroes high enough, f.e. 4 GiB, the allocation can fail. Actually, there is no need to allocate such a big amount of memory. We could simply allocate 1 MiB buffer and create iovec, which will point to the same memory. Signed-off-by: Denis V. Lunev <den@openvz.org> CC: Kevin Wolf <kwolf@redhat.com> CC: Stefan Hajnoczi <stefanha@redhat.com> CC: Peter Lieven <pl@kamp.de> --- Changes from v2: - fixed num assignment as MAX_WRITE_ZEROES_DEFAULT is already in sectors Changes from v1: - using MAX_WRITE_ZEROES_DEFAULT as a limit for real write block.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-)