diff mbox

[v2,04/15] qcow2: alloc space for COW in one chunk

Message ID 1496330073-51338-5-git-send-email-anton.nefedov@virtuozzo.com
State New
Headers show

Commit Message

Anton Nefedov June 1, 2017, 3:14 p.m. UTC
From: "Denis V. Lunev" <den@openvz.org>

Currently each single write operation can result in 3 write operations
if guest offsets are not cluster aligned. One write is performed for the
real payload and two for COW-ed areas. Thus the data possibly lays
non-contiguously on the host filesystem. This will reduce further
sequential read performance significantly.

The patch allocates the space in the file with cluster granularity,
ensuring
  1. better host offset locality
  2. less space allocation operations
     (which can be expensive on distributed storage)

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
---
 block/qcow2.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

Comments

Eric Blake June 5, 2017, 2:28 p.m. UTC | #1
On 06/01/2017 10:14 AM, Anton Nefedov wrote:
> From: "Denis V. Lunev" <den@openvz.org>
> 
> Currently each single write operation can result in 3 write operations

Maybe:

Currently each single guest write operation can result in up to 3 host
write operations

> if guest offsets are not cluster aligned. One write is performed for the
> real payload and two for COW-ed areas. Thus the data possibly lays
> non-contiguously on the host filesystem. This will reduce further
> sequential read performance significantly.
> 
> The patch allocates the space in the file with cluster granularity,
> ensuring
>   1. better host offset locality
>   2. less space allocation operations
>      (which can be expensive on distributed storage)
> 
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
> ---
>  block/qcow2.c | 24 +++++++++++++++++++++++-
>  1 file changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/block/qcow2.c b/block/qcow2.c
> index b3ba5da..cd5efba 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -1575,6 +1575,24 @@ fail:
>      return ret;
>  }
>  
> +static void handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
> +{
> +    BDRVQcow2State *s = bs->opaque;
> +    QCowL2Meta *m;
> +
> +    for (m = l2meta; m != NULL; m = m->next) {
> +        uint64_t bytes = m->nb_clusters << s->cluster_bits;
> +
> +        if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
> +            continue;
> +        }
> +
> +        /* try to alloc host space in one chunk for better locality */
> +        bdrv_co_pwrite_zeroes(bs->file, m->alloc_offset, bytes,
> +                              BDRV_REQ_ALLOCATE);

Is it worth a trace point, to easily track when we are using
handle_alloc_space()?  Or do existing trace points in
bdrv_co_pwrite_zeroes already sufficiently serve the purpose?

> +    }
> +}
> +
>  static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
>                                           uint64_t bytes, QEMUIOVector *qiov,
>                                           int flags)
> @@ -1656,8 +1674,12 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
>          if (ret < 0) {
>              goto fail;
>          }
> -
>          qemu_co_mutex_unlock(&s->lock);
> +
> +        if (bs->file->bs->supported_zero_flags & BDRV_REQ_ALLOCATE) {
> +            handle_alloc_space(bs, l2meta);
> +        }
> +

This certainly looks better than v1!

>          BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
>          trace_qcow2_writev_data(qemu_coroutine_self(),
>                                  cluster_offset + offset_in_cluster);
>
diff mbox

Patch

diff --git a/block/qcow2.c b/block/qcow2.c
index b3ba5da..cd5efba 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1575,6 +1575,24 @@  fail:
     return ret;
 }
 
+static void handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
+{
+    BDRVQcow2State *s = bs->opaque;
+    QCowL2Meta *m;
+
+    for (m = l2meta; m != NULL; m = m->next) {
+        uint64_t bytes = m->nb_clusters << s->cluster_bits;
+
+        if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
+            continue;
+        }
+
+        /* try to alloc host space in one chunk for better locality */
+        bdrv_co_pwrite_zeroes(bs->file, m->alloc_offset, bytes,
+                              BDRV_REQ_ALLOCATE);
+    }
+}
+
 static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                          uint64_t bytes, QEMUIOVector *qiov,
                                          int flags)
@@ -1656,8 +1674,12 @@  static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
         if (ret < 0) {
             goto fail;
         }
-
         qemu_co_mutex_unlock(&s->lock);
+
+        if (bs->file->bs->supported_zero_flags & BDRV_REQ_ALLOCATE) {
+            handle_alloc_space(bs, l2meta);
+        }
+
         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         trace_qcow2_writev_data(qemu_coroutine_self(),
                                 cluster_offset + offset_in_cluster);