From patchwork Tue May 22 12:57:03 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Orit Wasserman X-Patchwork-Id: 160638 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 54ABEB6FB9 for ; Tue, 22 May 2012 23:53:16 +1000 (EST) Received: from localhost ([::1]:38538 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SWog4-00031L-If for incoming@patchwork.ozlabs.org; Tue, 22 May 2012 08:59:04 -0400 Received: from eggs.gnu.org ([208.118.235.92]:60669) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SWofY-0001z6-Ip for qemu-devel@nongnu.org; Tue, 22 May 2012 08:58:36 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1SWofQ-0001Cr-TM for qemu-devel@nongnu.org; Tue, 22 May 2012 08:58:32 -0400 Received: from mx1.redhat.com ([209.132.183.28]:5336) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SWofQ-0001CQ-FC for qemu-devel@nongnu.org; Tue, 22 May 2012 08:58:24 -0400 Received: from int-mx11.intmail.prod.int.phx2.redhat.com (int-mx11.intmail.prod.int.phx2.redhat.com [10.5.11.24]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id q4MCwAwY021912 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Tue, 22 May 2012 08:58:10 -0400 Received: from dhcp-1-120.tlv.redhat.com (dhcp-4-39.tlv.redhat.com [10.35.4.39]) by int-mx11.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id q4MCvExB019492; Tue, 22 May 2012 08:58:03 -0400 From: Orit Wasserman To: qemu-devel@nongnu.org Date: Tue, 22 May 2012 15:57:03 +0300 Message-Id: <1337691425-6022-8-git-send-email-owasserm@redhat.com> In-Reply-To: <1337691425-6022-1-git-send-email-owasserm@redhat.com> References: <1337691425-6022-1-git-send-email-owasserm@redhat.com> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.24 X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 209.132.183.28 Cc: peter.maydell@linaro.org, aliguori@us.ibm.com, quintela@redhat.com, Petter Svard , stefanha@gmail.com, mdroth@linux.vnet.ibm.com, Benoit Hudzia , blauwirbel@gmail.com, Orit Wasserman , chegu_vinod@hp.com, avi@redhat.com, Aidan Shribman , pbonzini@redhat.com, eblake@redhat.com Subject: [Qemu-devel] [PATCH v11 7/9] Add XBZRLE to ram_save_block and ram_save_live X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org In the outgoing migration check to see if the page is cached and changed than send compressed page by using save_xbrle_page function. In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set and decompress the page (by using load_xbrle function). Signed-off-by: Benoit Hudzia Signed-off-by: Petter Svard Signed-off-by: Aidan Shribman Signed-off-by: Orit Wasserman --- arch_init.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---- migration.c | 26 +++++++- migration.h | 8 ++ savevm.c | 91 ++++++++++++++++++++++++ 4 files changed, 332 insertions(+), 16 deletions(-) diff --git a/arch_init.c b/arch_init.c index 071dc8d..536d34c 100644 --- a/arch_init.c +++ b/arch_init.c @@ -43,6 +43,15 @@ #include "hw/smbios.h" #include "exec-memory.h" #include "hw/pcspk.h" +#include "qemu/cache.h" + +#ifdef DEBUG_ARCH_INIT +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif #ifdef TARGET_SPARC int graphic_width = 1024; @@ -94,6 +103,7 @@ const uint32_t arch_type = QEMU_ARCH; #define RAM_SAVE_FLAG_PAGE 0x08 #define RAM_SAVE_FLAG_EOS 0x10 #define RAM_SAVE_FLAG_CONTINUE 0x20 +#define RAM_SAVE_FLAG_XBZRLE 0x40 #ifdef __ALTIVEC__ #include @@ -157,6 +167,22 @@ static int is_dup_page(uint8_t *page) return 1; } +/* XBZRLE (Xor Based Zero Length Encoding */ +typedef struct XBZRLEHeader { + uint32_t xh_cksum; + uint16_t xh_len; + uint8_t xh_flags; +} XBZRLEHeader; + +/* struct contains XBZRLE cache and a static page + used by the compression */ +static struct { + /* buffer used for XBZRLE encoding */ + uint8_t *encoded_buf; + /* Cache for XBZRLE */ + Cache *cache; +} XBZRLE = {0}; + static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, int cont, int flag) { @@ -169,19 +195,78 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, } +#define ENCODING_FLAG_XBZRLE 0x1 + +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, + ram_addr_t current_addr, RAMBlock *block, + ram_addr_t offset, int cont) +{ + int encoded_len = 0, bytes_sent = -1, ret = -1; + XBZRLEHeader hdr = {0}; + uint8_t *prev_cached_page; + + /* check to see if page is cached , if not cache and return */ + if (!cache_is_cached(XBZRLE.cache, current_addr)) { + cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data, + TARGET_PAGE_SIZE)); + goto done; + } + + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); + + /* XBZRLE encoding (if there is no overflow) */ + encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); + if (encoded_len == 0) { + bytes_sent = 0; + DPRINTF("Unmodifed page or overflow skipping\n"); + goto done; + } else if (encoded_len == -1) { + bytes_sent = -1; + DPRINTF("Overflow\n"); + /* update data in the cache */ + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); + goto done; + } + + /* we need to update the data in the cache, in order to get the same data + we cached we decode the encoded page on the cached data */ + ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len, + prev_cached_page, TARGET_PAGE_SIZE); + g_assert(ret != -1); + + hdr.xh_len = encoded_len; + hdr.xh_flags |= ENCODING_FLAG_XBZRLE; + + /* Send XBZRLE based compressed page */ + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); + qemu_put_byte(f, hdr.xh_flags); + qemu_put_be16(f, hdr.xh_len); + qemu_put_be32(f, hdr.xh_cksum); + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); + bytes_sent = encoded_len + sizeof(hdr); + +done: + return bytes_sent; +} + static RAMBlock *last_block; static ram_addr_t last_offset; -static int ram_save_block(QEMUFile *f) +static int ram_save_block(QEMUFile *f, int stage) { RAMBlock *block = last_block; ram_addr_t offset = last_offset; - int bytes_sent = 0; + int bytes_sent = -1; MemoryRegion *mr; + ram_addr_t current_addr; if (!block) block = QLIST_FIRST(&ram_list.blocks); + current_addr = block->offset + offset; + do { mr = block->mr; if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE, @@ -198,7 +283,24 @@ static int ram_save_block(QEMUFile *f) save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, *p); bytes_sent = 1; - } else { + } else if (migrate_use_xbzrle()) { + /* in stage 1 none of the pages are cached so we just want to + cache them for next stages, and send the cached copy */ + if (stage == 1) { + cache_insert(XBZRLE.cache, current_addr, + g_memdup(p, TARGET_PAGE_SIZE)); + } else { + bytes_sent = save_xbzrle_page(f, p, current_addr, block, + offset, cont); + } + /* send the cached page copy for stage 1 and 2*/ + if (stage != 3) { + p = get_cached_data(XBZRLE.cache, current_addr); + } + } + + /* either we didn't send yet (we may got XBZRLE overflow) */ + if (bytes_sent == -1) { save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); qemu_put_buffer(f, p, TARGET_PAGE_SIZE); bytes_sent = TARGET_PAGE_SIZE; @@ -292,6 +394,17 @@ static void sort_ram_list(void) g_free(blocks); } +static void migration_end(void) +{ + memory_global_dirty_log_stop(); + + if (migrate_use_xbzrle()) { + cache_fini(XBZRLE.cache); + g_free(XBZRLE.cache); + XBZRLE.cache = NULL; + } +} + int ram_save_live(QEMUFile *f, int stage, void *opaque) { ram_addr_t addr; @@ -301,7 +414,7 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) int ret; if (stage < 0) { - memory_global_dirty_log_stop(); + migration_end(); return 0; } @@ -314,6 +427,17 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) last_offset = 0; sort_ram_list(); + if (migrate_use_xbzrle()) { + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / + TARGET_PAGE_SIZE, + TARGET_PAGE_SIZE); + if (!XBZRLE.cache) { + DPRINTF("Error creating cache\n"); + return -1; + } + XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); + } + /* Make sure all dirty bits are set */ QLIST_FOREACH(block, &ram_list.blocks, next) { for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { @@ -341,9 +465,12 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) while ((ret = qemu_file_rate_limit(f)) == 0) { int bytes_sent; - bytes_sent = ram_save_block(f); - bytes_transferred += bytes_sent; - if (bytes_sent == 0) { /* no more blocks */ + bytes_sent = ram_save_block(f, stage); + /* bytes_sent 0 represent unchanged page, + bytes_sent -1 represent no more blocks*/ + if (bytes_sent > 0) { + bytes_transferred += bytes_sent; + } else if (bytes_sent == -1) { /* no more blocks */ break; } } @@ -366,19 +493,62 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) int bytes_sent; /* flush all remaining blocks regardless of rate limiting */ - while ((bytes_sent = ram_save_block(f)) != 0) { + while ((bytes_sent = ram_save_block(f, stage)) != -1) { bytes_transferred += bytes_sent; } - memory_global_dirty_log_stop(); + migration_end(); } qemu_put_be64(f, RAM_SAVE_FLAG_EOS); expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; + DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time, + migrate_max_downtime()); + return (stage == 2) && (expected_time <= migrate_max_downtime()); } +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) +{ + int ret, rc = 0; + uint8_t *xbzrle_buf = NULL; + XBZRLEHeader hdr = {0}; + + /* extract RLE header */ + hdr.xh_flags = qemu_get_byte(f); + hdr.xh_len = qemu_get_be16(f); + hdr.xh_cksum = qemu_get_be32(f); + + if (!(hdr.xh_flags & ENCODING_FLAG_XBZRLE)) { + fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); + return -1; + } + + if (hdr.xh_len > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); + return -1; + } + + /* load data and decode */ + xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE); + qemu_get_buffer(f, xbzrle_buf, hdr.xh_len); + + /* decode RLE */ + ret = xbzrle_decode_buffer(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE); + if (ret == -1) { + fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); + rc = -1; + } else if (ret > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", + ret, TARGET_PAGE_SIZE); + rc = -1; + } + + g_free(xbzrle_buf); + return rc; +} + static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -412,8 +582,11 @@ static inline void *host_from_stream_offset(QEMUFile *f, int ram_load(QEMUFile *f, void *opaque, int version_id) { ram_addr_t addr; - int flags; + int flags, ret = 0; int error; + static uint64_t seq_iter; + + seq_iter++; if (version_id < 4 || version_id > 4) { return -EINVAL; @@ -443,8 +616,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) QLIST_FOREACH(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id))) { - if (block->length != length) - return -EINVAL; + if (block->length != length) { + ret = -EINVAL; + goto done; + } break; } } @@ -452,7 +627,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) if (!block) { fprintf(stderr, "Unknown ramblock \"%s\", cannot " "accept migration\n", id); - return -EINVAL; + ret = -EINVAL; + goto done; } total_ram_bytes -= length; @@ -481,16 +657,33 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) void *host; host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + } else if (flags & RAM_SAVE_FLAG_XBZRLE) { + void *host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } + + if (load_xbzrle(f, addr, host) < 0) { + ret = -EINVAL; + goto done; + } } error = qemu_file_get_error(f); if (error) { - return error; + ret = error; + goto done; } } while (!(flags & RAM_SAVE_FLAG_EOS)); - return 0; +done: + DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n", + ret, seq_iter); + return ret; } #ifdef HAS_AUDIO diff --git a/migration.c b/migration.c index 952f542..92c39e8 100644 --- a/migration.c +++ b/migration.c @@ -43,6 +43,9 @@ enum { #define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ +/* Migration XBZRLE cache size */ +#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024) + static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -55,7 +58,8 @@ static MigrationState *migrate_get_current(void) static MigrationState current_migration = { .state = MIG_STATE_SETUP, .bandwidth_limit = MAX_THROTTLE, - }; + .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE, +}; return ¤t_migration; } @@ -410,6 +414,7 @@ static MigrationState *migrate_init(const MigrationParams *params) MigrationState *s = migrate_get_current(); int64_t bandwidth_limit = s->bandwidth_limit; bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; + int64_t xbzrle_cache_size = s->xbzrle_cache_size; memcpy(enabled_capabilities, s->enabled_capabilities, sizeof(enabled_capabilities)); @@ -419,6 +424,7 @@ static MigrationState *migrate_init(const MigrationParams *params) s->params = *params; memcpy(s->enabled_capabilities, enabled_capabilities, sizeof(enabled_capabilities)); + s->xbzrle_cache_size = xbzrle_cache_size; s->state = MIG_STATE_SETUP; @@ -516,3 +522,21 @@ void qmp_migrate_set_downtime(double value, Error **errp) value = MAX(0, MIN(UINT64_MAX, value)); max_downtime = (uint64_t)value; } + +int migrate_use_xbzrle(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; +} + +int64_t migrate_xbzrle_cache_size(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->xbzrle_cache_size; +} diff --git a/migration.h b/migration.h index 00d1992..eb0b822 100644 --- a/migration.h +++ b/migration.h @@ -39,6 +39,7 @@ struct MigrationState void *opaque; MigrationParams params; bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; + int64_t xbzrle_cache_size; }; void process_incoming_migration(QEMUFile *f); @@ -99,4 +100,11 @@ void migrate_add_blocker(Error *reason); */ void migrate_del_blocker(Error *reason); +int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen); +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); + +int migrate_use_xbzrle(void); +int64_t migrate_xbzrle_cache_size(void); + #endif diff --git a/savevm.c b/savevm.c index 42937a0..31db838 100644 --- a/savevm.c +++ b/savevm.c @@ -2374,3 +2374,94 @@ void vmstate_register_ram_global(MemoryRegion *mr) { vmstate_register_ram(mr, NULL); } + +/* + page = zrun nzrun + | zrun nzrun page + + zrun = length + + nzrun = length byte... + + length = uleb128 encoded integer + */ +int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen) +{ + uint32_t zrun_len = 0, nzrun_len = 0; + int d = 0 , i = 0; + uint8_t *nzrun_start = NULL; + + while (i < slen) { + /* overflow */ + if (d + 2 > dlen) { + return -1; + } + + while (!(old_buf[i] ^ new_buf[i]) && ++i <= slen) { + zrun_len++; + } + + /* buffer unchanged */ + if (zrun_len == slen) { + return 0; + } + + /* skip last zero run */ + if (i == slen + 1) { + return d; + } + + d += uleb128_encode_small(dst + d, zrun_len); + + zrun_len = 0; + nzrun_start = new_buf + i; + while ((old_buf[i] ^ new_buf[i]) != 0 && ++i <= slen) { + nzrun_len++; + } + + /* overflow */ + if (d + nzrun_len + 2 > dlen) { + return -1; + } + + d += uleb128_encode_small(dst + d, nzrun_len); + memcpy(dst + d, nzrun_start, nzrun_len); + d += nzrun_len; + nzrun_len = 0; + } + + return d; +} + +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) +{ + int i = 0, d = 0; + uint32_t count = 0; + + while (i < slen) { + + /* zrun */ + i += uleb128_decode_small(src + i, &count); + d += count; + + /* overflow */ + g_assert(d <= dlen); + + /* completed decoding */ + if (i == slen - 1) { + return d; + } + + /* nzrun */ + i += uleb128_decode_small(src + i, &count); + + g_assert(d + count <= dlen); + + memcpy(dst + d , src + i, count); + d += count; + i += count; + } + + return d; +}