From patchwork Tue Dec 30 10:07:10 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Denis V. Lunev" X-Patchwork-Id: 424614 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 45853140119 for ; Tue, 30 Dec 2014 21:12:12 +1100 (AEDT) Received: from localhost ([::1]:36375 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Y5tmc-0005C5-Ei for incoming@patchwork.ozlabs.org; Tue, 30 Dec 2014 05:12:10 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:56242) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Y5thV-0005Lf-E9 for qemu-devel@nongnu.org; Tue, 30 Dec 2014 05:06:54 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Y5thU-0003a1-4m for qemu-devel@nongnu.org; Tue, 30 Dec 2014 05:06:53 -0500 Received: from mailhub.sw.ru ([195.214.232.25]:24248 helo=relay.sw.ru) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Y5thT-0003Zb-LM for qemu-devel@nongnu.org; Tue, 30 Dec 2014 05:06:52 -0500 Received: from hades.sw.ru ([10.30.8.132]) by relay.sw.ru (8.13.4/8.13.4) with ESMTP id sBUA6epf008551; Tue, 30 Dec 2014 13:06:50 +0300 (MSK) From: "Denis V. Lunev" To: Date: Tue, 30 Dec 2014 13:07:10 +0300 Message-Id: <1419934032-24216-8-git-send-email-den@openvz.org> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1419934032-24216-1-git-send-email-den@openvz.org> References: <1419931701-19362-1-git-send-email-den@openvz.org> <1419934032-24216-1-git-send-email-den@openvz.org> X-detected-operating-system: by eggs.gnu.org: OpenBSD 3.x X-Received-From: 195.214.232.25 Cc: Kevin Wolf , "Denis V. Lunev" , qemu-devel@nongnu.org, Stefan Hajnoczi Subject: [Qemu-devel] [PATCH 17/19] block/parallels: delay writing to BAT till bdrv_co_flush_to_os X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org The idea is that we do not need to immediately sync BAT to the image as from the guest point of view there is a possibility that IO is lost even in the physical controller until flush command was finished. bdrv_co_flush_to_os is exactly the right place for this purpose. Technically the patch aligns writes into BAT to MAX(bdrv_align, 4096), which elliminates read-modify-write transactions on BAT update and cache ready-to-write content in a special buffer in BDRVParallelsState. This buffer possibly contains ParallelsHeader if the first page of the image should be modified. The header occupies first 64 bytes of the image and the BAT starts immediately after it. It is also possible that BAT end is not aligned to the cluster size. ParallelsHeader->data_off is not specified for this case. We should write only part of the cache in that case. This patch speed ups qemu-img create -f parallels -o cluster_size=64k ./1.hds 64G qemu-io -f parallels -c "write -P 0x11 0 1024k" 1.hds writing from 50-60 Mb/sec to 80-90 Mb/sec on rotational media and from 160 Mb/sec to 190 Mb/sec on SSD disk. Signed-off-by: Denis V. Lunev CC: Kevin Wolf CC: Stefan Hajnoczi --- block/parallels.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 5 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 46cf031..18b9267 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -62,6 +62,11 @@ typedef struct BDRVParallelsState { uint32_t *bat; unsigned int bat_size; + uint32_t *bat_cache; + unsigned bat_cache_size; + int bat_cache_off; + int data_off; + unsigned int tracks; unsigned int off_multiplier; @@ -148,6 +153,17 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, le32_to_cpus(&s->bat[i]); qemu_co_mutex_init(&s->lock); + + s->bat_cache_off = -1; + if (bs->open_flags & BDRV_O_RDWR) { + s->bat_cache_size = MAX(bdrv_opt_mem_align(bs->file), 4096); + s->bat_cache = qemu_blockalign(bs->file, s->bat_cache_size); + } + s->data_off = le32_to_cpu(s->ph.data_off) * BDRV_SECTOR_SIZE; + if (s->data_off == 0) { + s->data_off = ROUND_UP(bat_offset(s->bat_size), BDRV_SECTOR_SIZE); + } + return 0; fail_format: @@ -171,10 +187,71 @@ static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) return (uint64_t)s->bat[index] * s->off_multiplier + offset; } +static int write_bat_cache(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + int size, off; + + if (s->bat_cache_off == -1) { + /* no cached data */ + return 0; + } + + size = s->bat_cache_size; + if (size + s->bat_cache_off > s->data_off) { + /* avoid writing to the first data block */ + size = s->data_off - s->bat_cache_off; + } + + off = s->bat_cache_off; + s->bat_cache_off = -1; + return bdrv_pwrite(bs->file, off, s->bat_cache, size); +} + +static int cache_bat(BlockDriverState *bs, uint32_t idx, uint32_t new_data_off) +{ + int ret, i, off, cache_off; + int64_t first_idx, last_idx; + BDRVParallelsState *s = bs->opaque; + uint32_t *cache = s->bat_cache; + + off = bat_offset(idx); + cache_off = (off / s->bat_cache_size) * s->bat_cache_size; + + if (s->bat_cache_off != -1 && s->bat_cache_off != cache_off) { + ret = write_bat_cache(bs); + if (ret < 0) { + return ret; + } + } + + first_idx = idx - (off - cache_off) / sizeof(uint32_t); + last_idx = first_idx + s->bat_cache_size / sizeof(uint32_t); + if (first_idx < 0) { + memcpy(s->bat_cache, &s->ph, sizeof(s->ph)); + first_idx = 0; + cache = s->bat_cache + sizeof(s->ph) / sizeof(uint32_t); + } + + if (last_idx > s->bat_size) { + memset(cache + s->bat_size - first_idx, 0, + sizeof(uint32_t) * (last_idx - s->bat_size)); + } + + for (i = 0; i < last_idx - first_idx; i++) { + cache[i] = cpu_to_le32(s->bat[first_idx + i]); + } + cache[idx - first_idx] = cpu_to_le32(new_data_off); + s->bat[idx] = new_data_off; + + s->bat_cache_off = cache_off; + return 0; +} + static int64_t allocate_sector(BlockDriverState *bs, int64_t sector_num) { BDRVParallelsState *s = bs->opaque; - uint32_t idx, offset, tmp; + uint32_t idx, offset; int64_t pos; int ret; @@ -190,17 +267,27 @@ static int64_t allocate_sector(BlockDriverState *bs, int64_t sector_num) pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS; bdrv_truncate(bs->file, (pos + s->tracks) << BDRV_SECTOR_BITS); - s->bat[idx] = pos / s->off_multiplier; - - tmp = cpu_to_le32(s->bat[idx]); - ret = bdrv_pwrite(bs->file, bat_offset(idx), &tmp, sizeof(tmp)); + ret = cache_bat(bs, idx, pos / s->off_multiplier); if (ret < 0) { return ret; } return (uint64_t)s->bat[idx] * s->off_multiplier + offset; } +static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + int ret; + + qemu_co_mutex_lock(&s->lock); + ret = write_bat_cache(bs); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + + static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, int nb_sectors) { @@ -387,6 +474,7 @@ exit: static void parallels_close(BlockDriverState *bs) { BDRVParallelsState *s = bs->opaque; + qemu_vfree(s->bat_cache); g_free(s->bat); } @@ -416,6 +504,7 @@ static BlockDriver bdrv_parallels = { .bdrv_open = parallels_open, .bdrv_close = parallels_close, .bdrv_co_get_block_status = parallels_co_get_block_status, + .bdrv_co_flush_to_os = parallels_co_flush_to_os, .bdrv_co_readv = parallels_co_readv, .bdrv_co_writev = parallels_co_writev,