diff mbox

[27/27] block/parallels: improve image writing performance further

Message ID 1425977481-13317-28-git-send-email-den@openvz.org
State New
Headers show

Commit Message

Denis V. Lunev March 10, 2015, 8:51 a.m. UTC
Try to perform IO for the biggest continuous block possible.
All blocks abscent in the image are accounted in the same type
and preallocation is made for all of them at once.

The performance for sequential write is increased from 200 Gb/sec to
235 Gb/sec on my SSD HDD.

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Roman Kagan <rkagan@parallels.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/parallels.c | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

Comments

Roman Kagan March 11, 2015, 9:25 a.m. UTC | #1
On Tue, Mar 10, 2015 at 11:51:21AM +0300, Denis V. Lunev wrote:
> Try to perform IO for the biggest continuous block possible.
> All blocks abscent in the image are accounted in the same type
> and preallocation is made for all of them at once.
> 
> The performance for sequential write is increased from 200 Gb/sec to
> 235 Gb/sec on my SSD HDD.
> 
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Roman Kagan <rkagan@parallels.com>
> CC: Kevin Wolf <kwolf@redhat.com>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  block/parallels.c | 44 ++++++++++++++++++++++++--------------------
>  1 file changed, 24 insertions(+), 20 deletions(-)

Reviewed-by: Roman Kagan <rkagan@parallels.com>

Roman.
diff mbox

Patch

diff --git a/block/parallels.c b/block/parallels.c
index c6343c5..ccfdfab 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -178,42 +178,46 @@  static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
     return start_off;
 }
 
-static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num)
+static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
+                                 int nb_sectors, int *pnum)
 {
     BDRVParallelsState *s = bs->opaque;
-    uint32_t idx, offset;
-    int64_t pos;
+    uint32_t idx, to_allocate, i;
+    int64_t pos, space;
 
-    idx = sector_num / s->tracks;
-    offset = sector_num % s->tracks;
+    pos = block_status(s, sector_num, nb_sectors, pnum);
+    if (pos > 0) {
+        return pos;
+    }
 
+    idx = sector_num / s->tracks;
     if (idx >= s->bat_size) {
         return -EINVAL;
     }
-    if (s->bat_bitmap[idx] != 0) {
-        return bat2sect(s, idx) + offset;
-    }
 
-    pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS;
-    if (s->data_end + s->tracks > pos) {
+    to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx;
+    space = to_allocate * s->tracks;
+    if (s->data_end + space > bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS) {
         int ret;
+        space += s->prealloc_size;
         if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE)
-            ret = bdrv_write_zeroes(bs->file, s->data_end,
-                                    s->prealloc_size, 0);
+            ret = bdrv_write_zeroes(bs->file, s->data_end, space, 0);
         else
             ret = bdrv_truncate(bs->file,
-                    (s->data_end + s->prealloc_size) << BDRV_SECTOR_BITS);
+                                (s->data_end + space) << BDRV_SECTOR_BITS);
         if (ret < 0) {
             return ret;
         }
     }
-    pos = s->data_end;
-    s->data_end += s->tracks;
 
-    s->bat_bitmap[idx] = cpu_to_le32(pos / s->off_multiplier);
+    for (i = 0; i < to_allocate; i++) {
+        s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
+        s->data_end += s->tracks;
+        bitmap_set(s->bat_dirty_bmap,
+                   bat_entry_off(idx) / s->bat_dirty_block, 1);
+    }
 
-    bitmap_set(s->bat_dirty_bmap, bat_entry_off(idx) / s->bat_dirty_block, 1);
-    return bat2sect(s, idx) + offset;
+    return bat2sect(s, idx) + sector_num % s->tracks;
 }
 
 
@@ -278,8 +282,8 @@  static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
 
     qemu_co_mutex_lock(&s->lock);
     while (nb_sectors > 0) {
-        int64_t position = allocate_cluster(bs, sector_num);
-        int n = cluster_remainder(s, sector_num, nb_sectors);
+        int n;
+        int64_t position = allocate_clusters(bs, sector_num, nb_sectors, &n);
         int nbytes = n << BDRV_SECTOR_BITS;
 
         if (position < 0) {