From patchwork Wed May 16 11:59:40 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Orit Wasserman X-Patchwork-Id: 159611 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 06453B6F6E for ; Wed, 16 May 2012 22:14:20 +1000 (EST) Received: from localhost ([::1]:48094 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SUcul-0005xt-F1 for incoming@patchwork.ozlabs.org; Wed, 16 May 2012 08:01:11 -0400 Received: from eggs.gnu.org ([208.118.235.92]:44021) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SUcuH-0005Mi-3n for qemu-devel@nongnu.org; Wed, 16 May 2012 08:00:48 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1SUcu9-0000i9-In for qemu-devel@nongnu.org; Wed, 16 May 2012 08:00:40 -0400 Received: from mx1.redhat.com ([209.132.183.28]:50782) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SUcu8-0000hM-QE for qemu-devel@nongnu.org; Wed, 16 May 2012 08:00:33 -0400 Received: from int-mx01.intmail.prod.int.phx2.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id q4GC0QD2011670 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Wed, 16 May 2012 08:00:26 -0400 Received: from dhcp-1-120.tlv.redhat.com (vpn-203-2.tlv.redhat.com [10.35.203.2]) by int-mx01.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id q4GBxlR0010467; Wed, 16 May 2012 08:00:21 -0400 From: Orit Wasserman To: qemu-devel@nongnu.org Date: Wed, 16 May 2012 14:59:40 +0300 Message-Id: <1337169582-28312-8-git-send-email-owasserm@redhat.com> In-Reply-To: <1337169582-28312-1-git-send-email-owasserm@redhat.com> References: <1337169582-28312-1-git-send-email-owasserm@redhat.com> X-Scanned-By: MIMEDefang 2.67 on 10.5.11.11 X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 209.132.183.28 Cc: aliguori@us.ibm.com, quintela@redhat.com, Petter Svard , stefanha@gmail.com, mdroth@linux.vnet.ibm.com, blauwirbel@gmail.com, Orit Wasserman , Benoit Hudzia , avi@redhat.com, pbonzini@redhat.com, Aidan Shribman Subject: [Qemu-devel] [PATCH v10 7/9] Add XBZRLE to ram_save_block and ram_save_live X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org In the outgoing migration check to see if the page is cached and changed than send compressed page by using save_xbrle_page function. In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set and decompress the page (by using load_xbrle function). Signed-off-by: Benoit Hudzia Signed-off-by: Petter Svard Signed-off-by: Aidan Shribman Signed-off-by: Orit Wasserman --- arch_init.c | 220 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---- migration.c | 26 +++++++- migration.h | 8 ++ savevm.c | 91 ++++++++++++++++++++++++ 4 files changed, 329 insertions(+), 16 deletions(-) diff --git a/arch_init.c b/arch_init.c index a334a2e..7ebdb7a 100644 --- a/arch_init.c +++ b/arch_init.c @@ -43,6 +43,15 @@ #include "hw/smbios.h" #include "exec-memory.h" #include "hw/pcspk.h" +#include "qemu/cache.h" + +#ifdef DEBUG_ARCH_INIT +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif #ifdef TARGET_SPARC int graphic_width = 1024; @@ -95,6 +104,7 @@ const uint32_t arch_type = QEMU_ARCH; #define RAM_SAVE_FLAG_PAGE 0x08 #define RAM_SAVE_FLAG_EOS 0x10 #define RAM_SAVE_FLAG_CONTINUE 0x20 +#define RAM_SAVE_FLAG_XBZRLE 0x40 #ifdef __ALTIVEC__ #include @@ -127,6 +137,22 @@ static int is_dup_page(uint8_t *page) return 1; } +/* XBZRLE (Xor Based Zero Length Encoding */ +typedef struct XBZRLEHeader { + uint32_t xh_cksum; + uint16_t xh_len; + uint8_t xh_flags; +} XBZRLEHeader; + +/* struct contains XBZRLE cache and a static page + used by the compression */ +static struct { + /* buffer used for XBZRLE encoding */ + uint8_t *encoded_buf; + /* Cache for XBZRLE */ + Cache *cache; +} XBZRLE = {0}; + static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, int cont, int flag) { @@ -139,19 +165,78 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, } +#define ENCODING_FLAG_XBZRLE 0x1 + +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, + ram_addr_t current_addr, RAMBlock *block, + ram_addr_t offset, int cont) +{ + int encoded_len = 0, bytes_sent = -1, ret = -1; + XBZRLEHeader hdr = {0}; + uint8_t *prev_cached_page; + + /* check to see if page is cached , if not cache and return */ + if (!cache_is_cached(XBZRLE.cache, current_addr)) { + cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data, + TARGET_PAGE_SIZE)); + goto done; + } + + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); + + /* XBZRLE encoding (if there is no overflow) */ + encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); + if (encoded_len == 0) { + bytes_sent = 0; + DPRINTF("Unmodifed page or overflow skipping\n"); + goto done; + } else if (encoded_len == -1) { + bytes_sent = -1; + DPRINTF("Overflow\n"); + /* update data in the cache */ + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); + goto done; + } + + /* we need to update the data in the cache, in order to get the same data + we cached we decode the encoded page on the cached data */ + ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len, + prev_cached_page, TARGET_PAGE_SIZE); + g_assert(ret != -1); + + hdr.xh_len = encoded_len; + hdr.xh_flags |= ENCODING_FLAG_XBZRLE; + + /* Send XBZRLE based compressed page */ + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); + qemu_put_byte(f, hdr.xh_flags); + qemu_put_be16(f, hdr.xh_len); + qemu_put_be32(f, hdr.xh_cksum); + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); + bytes_sent = encoded_len + sizeof(hdr); + +done: + return bytes_sent; +} + static RAMBlock *last_block; static ram_addr_t last_offset; -static int ram_save_block(QEMUFile *f) +static int ram_save_block(QEMUFile *f, int stage) { RAMBlock *block = last_block; ram_addr_t offset = last_offset; - int bytes_sent = 0; + int bytes_sent = -1; MemoryRegion *mr; + ram_addr_t current_addr; if (!block) block = QLIST_FIRST(&ram_list.blocks); + current_addr = block->offset + offset; + do { mr = block->mr; if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE, @@ -168,7 +253,22 @@ static int ram_save_block(QEMUFile *f) save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, *p); bytes_sent = 1; - } else { + } else if (migrate_use_xbzrle()) { + /* in stage 1 none of the pages are cached so we just want to + cache them for next stages, and send the cached copy */ + if (stage == 1) { + cache_insert(XBZRLE.cache, current_addr, + g_memdup(p, TARGET_PAGE_SIZE)); + } else { + bytes_sent = save_xbzrle_page(f, p, current_addr, block, + offset, cont); + } + /* always send the cached page copy */ + p = get_cached_data(XBZRLE.cache, current_addr); + } + + /* either we didn't send yet (we may got XBZRLE overflow) */ + if (bytes_sent == -1) { save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); qemu_put_buffer(f, p, TARGET_PAGE_SIZE); bytes_sent = TARGET_PAGE_SIZE; @@ -262,6 +362,17 @@ static void sort_ram_list(void) g_free(blocks); } +static void migration_end(void) +{ + memory_global_dirty_log_stop(); + + if (migrate_use_xbzrle()) { + cache_fini(XBZRLE.cache); + g_free(XBZRLE.cache); + XBZRLE.cache = NULL; + } +} + int ram_save_live(QEMUFile *f, int stage, void *opaque) { ram_addr_t addr; @@ -271,7 +382,7 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) int ret; if (stage < 0) { - memory_global_dirty_log_stop(); + migration_end(); return 0; } @@ -284,6 +395,16 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) last_offset = 0; sort_ram_list(); + if (migrate_use_xbzrle()) { + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), + TARGET_PAGE_SIZE); + if (!XBZRLE.cache) { + DPRINTF("Error creating cache\n"); + return -1; + } + XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); + } + /* Make sure all dirty bits are set */ QLIST_FOREACH(block, &ram_list.blocks, next) { for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { @@ -311,9 +432,12 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) while ((ret = qemu_file_rate_limit(f)) == 0) { int bytes_sent; - bytes_sent = ram_save_block(f); - bytes_transferred += bytes_sent; - if (bytes_sent == 0) { /* no more blocks */ + bytes_sent = ram_save_block(f, stage); + /* bytes_sent 0 represent unchanged page, + bytes_sent -1 represent no more blocks*/ + if (bytes_sent > 0) { + bytes_transferred += bytes_sent; + } else if (bytes_sent == -1) { /* no more blocks */ break; } } @@ -336,19 +460,62 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) int bytes_sent; /* flush all remaining blocks regardless of rate limiting */ - while ((bytes_sent = ram_save_block(f)) != 0) { + while ((bytes_sent = ram_save_block(f, stage)) != -1) { bytes_transferred += bytes_sent; } - memory_global_dirty_log_stop(); + migration_end(); } qemu_put_be64(f, RAM_SAVE_FLAG_EOS); expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; + DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time, + migrate_max_downtime()); + return (stage == 2) && (expected_time <= migrate_max_downtime()); } +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) +{ + int ret, rc = 0; + uint8_t *xbzrle_buf = NULL; + XBZRLEHeader hdr = {0}; + + /* extract RLE header */ + hdr.xh_flags = qemu_get_byte(f); + hdr.xh_len = qemu_get_be16(f); + hdr.xh_cksum = qemu_get_be32(f); + + if (!(hdr.xh_flags & ENCODING_FLAG_XBZRLE)) { + fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); + return -1; + } + + if (hdr.xh_len > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); + return -1; + } + + /* load data and decode */ + xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE); + qemu_get_buffer(f, xbzrle_buf, hdr.xh_len); + + /* decode RLE */ + ret = xbzrle_decode_buffer(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE); + if (ret == -1) { + fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); + rc = -1; + } else if (ret > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", + ret, TARGET_PAGE_SIZE); + rc = -1; + } + + g_free(xbzrle_buf); + return rc; +} + static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -382,8 +549,11 @@ static inline void *host_from_stream_offset(QEMUFile *f, int ram_load(QEMUFile *f, void *opaque, int version_id) { ram_addr_t addr; - int flags; + int flags, ret = 0; int error; + static uint64_t seq_iter; + + seq_iter++; if (version_id < 4 || version_id > 4) { return -EINVAL; @@ -413,8 +583,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) QLIST_FOREACH(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id))) { - if (block->length != length) - return -EINVAL; + if (block->length != length) { + ret = -EINVAL; + goto done; + } break; } } @@ -422,7 +594,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) if (!block) { fprintf(stderr, "Unknown ramblock \"%s\", cannot " "accept migration\n", id); - return -EINVAL; + ret = -EINVAL; + goto done; } total_ram_bytes -= length; @@ -451,16 +624,33 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) void *host; host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + } else if (flags & RAM_SAVE_FLAG_XBZRLE) { + void *host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } + + if (load_xbzrle(f, addr, host) < 0) { + ret = -EINVAL; + goto done; + } } error = qemu_file_get_error(f); if (error) { - return error; + ret = error; + goto done; } } while (!(flags & RAM_SAVE_FLAG_EOS)); - return 0; +done: + DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n", + ret, seq_iter); + return ret; } #ifdef HAS_AUDIO diff --git a/migration.c b/migration.c index 66e71a3..ba11adb 100644 --- a/migration.c +++ b/migration.c @@ -43,6 +43,9 @@ enum { #define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ +/* Migration XBZRLE cache size */ +#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024) + static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -55,7 +58,8 @@ static MigrationState *migrate_get_current(void) static MigrationState current_migration = { .state = MIG_STATE_SETUP, .bandwidth_limit = MAX_THROTTLE, - }; + .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE, +}; return ¤t_migration; } @@ -410,6 +414,7 @@ static MigrationState *migrate_init(const MigrationParams *params) MigrationState *s = migrate_get_current(); int64_t bandwidth_limit = s->bandwidth_limit; bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; + int64_t xbzrle_cache_size = s->xbzrle_cache_size; memcpy(enabled_capabilities, s->enabled_capabilities, sizeof(enabled_capabilities)); @@ -419,6 +424,7 @@ static MigrationState *migrate_init(const MigrationParams *params) s->params = *params; memcpy(s->enabled_capabilities, enabled_capabilities, sizeof(enabled_capabilities)); + s->xbzrle_cache_size = xbzrle_cache_size; s->state = MIG_STATE_SETUP; @@ -514,3 +520,21 @@ void qmp_migrate_set_downtime(double value, Error **errp) value = MAX(0, MIN(UINT64_MAX, value)); max_downtime = (uint64_t)value; } + +int migrate_use_xbzrle(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; +} + +int64_t migrate_xbzrle_cache_size(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->xbzrle_cache_size; +} diff --git a/migration.h b/migration.h index e42b0a0..175c729 100644 --- a/migration.h +++ b/migration.h @@ -39,6 +39,7 @@ struct MigrationState void *opaque; MigrationParams params; bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; + int64_t xbzrle_cache_size; }; void process_incoming_migration(QEMUFile *f); @@ -98,4 +99,11 @@ void migrate_add_blocker(Error *reason); */ void migrate_del_blocker(Error *reason); +int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen); +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); + +int migrate_use_xbzrle(void); +int64_t migrate_xbzrle_cache_size(void); + #endif diff --git a/savevm.c b/savevm.c index 42937a0..31db838 100644 --- a/savevm.c +++ b/savevm.c @@ -2374,3 +2374,94 @@ void vmstate_register_ram_global(MemoryRegion *mr) { vmstate_register_ram(mr, NULL); } + +/* + page = zrun nzrun + | zrun nzrun page + + zrun = length + + nzrun = length byte... + + length = uleb128 encoded integer + */ +int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen) +{ + uint32_t zrun_len = 0, nzrun_len = 0; + int d = 0 , i = 0; + uint8_t *nzrun_start = NULL; + + while (i < slen) { + /* overflow */ + if (d + 2 > dlen) { + return -1; + } + + while (!(old_buf[i] ^ new_buf[i]) && ++i <= slen) { + zrun_len++; + } + + /* buffer unchanged */ + if (zrun_len == slen) { + return 0; + } + + /* skip last zero run */ + if (i == slen + 1) { + return d; + } + + d += uleb128_encode_small(dst + d, zrun_len); + + zrun_len = 0; + nzrun_start = new_buf + i; + while ((old_buf[i] ^ new_buf[i]) != 0 && ++i <= slen) { + nzrun_len++; + } + + /* overflow */ + if (d + nzrun_len + 2 > dlen) { + return -1; + } + + d += uleb128_encode_small(dst + d, nzrun_len); + memcpy(dst + d, nzrun_start, nzrun_len); + d += nzrun_len; + nzrun_len = 0; + } + + return d; +} + +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) +{ + int i = 0, d = 0; + uint32_t count = 0; + + while (i < slen) { + + /* zrun */ + i += uleb128_decode_small(src + i, &count); + d += count; + + /* overflow */ + g_assert(d <= dlen); + + /* completed decoding */ + if (i == slen - 1) { + return d; + } + + /* nzrun */ + i += uleb128_decode_small(src + i, &count); + + g_assert(d + count <= dlen); + + memcpy(dst + d , src + i, count); + d += count; + i += count; + } + + return d; +}