Message ID | 1337691425-6022-8-git-send-email-owasserm@redhat.com |
---|---|
State | New |
Headers | show |
Orit Wasserman <owasserm@redhat.com> wrote: > In the outgoing migration check to see if the page is cached and > changed than send compressed page by using save_xbrle_page function. > In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set > and decompress the page (by using load_xbrle function). This patch can be split to easy review. > --- a/arch_init.c > +++ b/arch_init.c > @@ -43,6 +43,15 @@ > #include "hw/smbios.h" > #include "exec-memory.h" > #include "hw/pcspk.h" > +#include "qemu/cache.h" > + > +#ifdef DEBUG_ARCH_INIT > +#define DPRINTF(fmt, ...) \ > + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) > +#else > +#define DPRINTF(fmt, ...) \ > + do { } while (0) > +#endif Independent of xbzrle. > > #ifdef TARGET_SPARC > int graphic_width = 1024; > @@ -94,6 +103,7 @@ const uint32_t arch_type = QEMU_ARCH; > #define RAM_SAVE_FLAG_PAGE 0x08 > #define RAM_SAVE_FLAG_EOS 0x10 > #define RAM_SAVE_FLAG_CONTINUE 0x20 > +#define RAM_SAVE_FLAG_XBZRLE 0x40 > > #ifdef __ALTIVEC__ > #include <altivec.h> > @@ -157,6 +167,22 @@ static int is_dup_page(uint8_t *page) > return 1; > } > > +/* XBZRLE (Xor Based Zero Length Encoding */ > +typedef struct XBZRLEHeader { > + uint32_t xh_cksum; We are still not using this value, and we are sending it anyway (with a value of zero). What happens when we start using if for a checksum, and we migration to a new version that "expects" it to be valid? I would preffer not to sent it, or sent the correct value. > + uint16_t xh_len; > + uint8_t xh_flags; > +} XBZRLEHeader; > + > +/* struct contains XBZRLE cache and a static page > + used by the compression */ > +static struct { > + /* buffer used for XBZRLE encoding */ > + uint8_t *encoded_buf; > + /* Cache for XBZRLE */ > + Cache *cache; > +} XBZRLE = {0}; Use c99 initializers, please. { .encoded_buf = NULL, .cache = NULL, } More instances in other parts. > + > static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, > int cont, int flag) > { > @@ -169,19 +195,78 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, > > } > > +#define ENCODING_FLAG_XBZRLE 0x1 > + > +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, > + ram_addr_t current_addr, RAMBlock *block, > + ram_addr_t offset, int cont) > +{ > + int encoded_len = 0, bytes_sent = -1, ret = -1; > + XBZRLEHeader hdr = {0}; > + uint8_t *prev_cached_page; > + > + /* check to see if page is cached , if not cache and return */ > + if (!cache_is_cached(XBZRLE.cache, current_addr)) { > + cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data, > + TARGET_PAGE_SIZE)); > + goto done; > + } > + > + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); > + > + /* XBZRLE encoding (if there is no overflow) */ > + encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data, > + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, > + TARGET_PAGE_SIZE); > + if (encoded_len == 0) { > + bytes_sent = 0; > + DPRINTF("Unmodifed page or overflow skipping\n"); > + goto done; > + } else if (encoded_len == -1) { > + bytes_sent = -1; > + DPRINTF("Overflow\n"); > + /* update data in the cache */ > + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); > + goto done; > + } > + > + /* we need to update the data in the cache, in order to get the same data > + we cached we decode the encoded page on the cached data */ > + ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len, > + prev_cached_page, TARGET_PAGE_SIZE); > + g_assert(ret != -1); > + > + hdr.xh_len = encoded_len; > + hdr.xh_flags |= ENCODING_FLAG_XBZRLE; > + > + /* Send XBZRLE based compressed page */ > + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); > + qemu_put_byte(f, hdr.xh_flags); > + qemu_put_be16(f, hdr.xh_len); > + qemu_put_be32(f, hdr.xh_cksum); > + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); > + bytes_sent = encoded_len + sizeof(hdr); > + > +done: > + return bytes_sent; > +} > + > static RAMBlock *last_block; > static ram_addr_t last_offset; > > -static int ram_save_block(QEMUFile *f) > +static int ram_save_block(QEMUFile *f, int stage) > { > RAMBlock *block = last_block; > ram_addr_t offset = last_offset; > - int bytes_sent = 0; > + int bytes_sent = -1; > MemoryRegion *mr; > + ram_addr_t current_addr; > > if (!block) > block = QLIST_FIRST(&ram_list.blocks); > > + current_addr = block->offset + offset; > + > do { > mr = block->mr; > if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE, > @@ -198,7 +283,24 @@ static int ram_save_block(QEMUFile *f) > save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS); > qemu_put_byte(f, *p); > bytes_sent = 1; > - } else { > + } else if (migrate_use_xbzrle()) { > + /* in stage 1 none of the pages are cached so we just want to > + cache them for next stages, and send the cached copy */ > + if (stage == 1) { > + cache_insert(XBZRLE.cache, current_addr, > + g_memdup(p, TARGET_PAGE_SIZE)); > + } else { > + bytes_sent = save_xbzrle_page(f, p, current_addr, block, > + offset, cont); > + } > + /* send the cached page copy for stage 1 and 2*/ > + if (stage != 3) { > + p = get_cached_data(XBZRLE.cache, current_addr); > + } > + } > + > + /* either we didn't send yet (we may got XBZRLE overflow) */ > + if (bytes_sent == -1) { > save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); > qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > bytes_sent = TARGET_PAGE_SIZE; I think that code is not right when save_xbzrle_page() returns 0. That means that page hasn't changed since last time we sent that page. We shouldn't break in that case. Just continue with next page, right? On the other hand ... Why are we doing the stage == 1 test? stage 1 normally only sent part of the pages, so we could use the generic code there? It would just return -1 as bytes_sent, and do the same code that we have now? The optimization for stage 3 is not done backwards? We are inserting the page in the cache even if we are on stage 3. In stage three we should: - look if page is on the cache: do usual xbrlze trick - if it is not, just sent the whole page without inserting it into the cache? We are never going to reuse it, so putting it into the cache would not help us at all. We are just making an extra copy? > > qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; > > + DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time, > + migrate_max_downtime()); > + This belongs to debugging patch. > + /* load data and decode */ > + xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE); can't we have a static buffer of that size, and avoid all the malloc/free business? If space is tight, we can allways put it on the xbrle structure and assign it only for migration. > @@ -481,16 +657,33 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > void *host; > > host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + return -EINVAL; > + } Why is this check only needed now? Later, Juan.
On 06/01/2012 02:42 PM, Juan Quintela wrote: > Orit Wasserman <owasserm@redhat.com> wrote: >> In the outgoing migration check to see if the page is cached and >> changed than send compressed page by using save_xbrle_page function. >> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set >> and decompress the page (by using load_xbrle function). > > > This patch can be split to easy review. Sure. > >> --- a/arch_init.c >> +++ b/arch_init.c >> @@ -43,6 +43,15 @@ >> #include "hw/smbios.h" >> #include "exec-memory.h" >> #include "hw/pcspk.h" >> +#include "qemu/cache.h" >> + >> +#ifdef DEBUG_ARCH_INIT >> +#define DPRINTF(fmt, ...) \ >> + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) >> +#else >> +#define DPRINTF(fmt, ...) \ >> + do { } while (0) >> +#endif > > Independent of xbzrle. > >> >> #ifdef TARGET_SPARC >> int graphic_width = 1024; >> @@ -94,6 +103,7 @@ const uint32_t arch_type = QEMU_ARCH; >> #define RAM_SAVE_FLAG_PAGE 0x08 >> #define RAM_SAVE_FLAG_EOS 0x10 >> #define RAM_SAVE_FLAG_CONTINUE 0x20 >> +#define RAM_SAVE_FLAG_XBZRLE 0x40 >> >> #ifdef __ALTIVEC__ >> #include <altivec.h> >> @@ -157,6 +167,22 @@ static int is_dup_page(uint8_t *page) >> return 1; >> } >> >> +/* XBZRLE (Xor Based Zero Length Encoding */ >> +typedef struct XBZRLEHeader { >> + uint32_t xh_cksum; > > We are still not using this value, and we are sending it anyway (with a > value of zero). What happens when we start using if for a checksum, and > we migration to a new version that "expects" it to be valid? I would > preffer not to sent it, or sent the correct value. I think I will remove it, checksum should be used for all migration not just XBZRLE. I guess we can add it to the protocol in the future. > >> + uint16_t xh_len; >> + uint8_t xh_flags; >> +} XBZRLEHeader; >> + >> +/* struct contains XBZRLE cache and a static page >> + used by the compression */ >> +static struct { >> + /* buffer used for XBZRLE encoding */ >> + uint8_t *encoded_buf; >> + /* Cache for XBZRLE */ >> + Cache *cache; >> +} XBZRLE = {0}; > > Use c99 initializers, please. > > { .encoded_buf = NULL, > .cache = NULL, > } > > More instances in other parts. > >> + >> static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, >> int cont, int flag) > > { >> @@ -169,19 +195,78 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, >> >> } >> >> +#define ENCODING_FLAG_XBZRLE 0x1 >> + >> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, >> + ram_addr_t current_addr, RAMBlock *block, >> + ram_addr_t offset, int cont) >> +{ >> + int encoded_len = 0, bytes_sent = -1, ret = -1; >> + XBZRLEHeader hdr = {0}; >> + uint8_t *prev_cached_page; >> + >> + /* check to see if page is cached , if not cache and return */ >> + if (!cache_is_cached(XBZRLE.cache, current_addr)) { >> + cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data, >> + TARGET_PAGE_SIZE)); >> + goto done; >> + } >> + >> + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); >> + >> + /* XBZRLE encoding (if there is no overflow) */ >> + encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data, >> + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, >> + TARGET_PAGE_SIZE); >> + if (encoded_len == 0) { >> + bytes_sent = 0; >> + DPRINTF("Unmodifed page or overflow skipping\n"); >> + goto done; >> + } else if (encoded_len == -1) { >> + bytes_sent = -1; >> + DPRINTF("Overflow\n"); >> + /* update data in the cache */ >> + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); >> + goto done; >> + } >> + >> + /* we need to update the data in the cache, in order to get the same data >> + we cached we decode the encoded page on the cached data */ >> + ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len, >> + prev_cached_page, TARGET_PAGE_SIZE); >> + g_assert(ret != -1); >> + >> + hdr.xh_len = encoded_len; >> + hdr.xh_flags |= ENCODING_FLAG_XBZRLE; >> + >> + /* Send XBZRLE based compressed page */ >> + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); >> + qemu_put_byte(f, hdr.xh_flags); >> + qemu_put_be16(f, hdr.xh_len); >> + qemu_put_be32(f, hdr.xh_cksum); >> + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); >> + bytes_sent = encoded_len + sizeof(hdr); >> + >> +done: >> + return bytes_sent; >> +} >> + >> static RAMBlock *last_block; >> static ram_addr_t last_offset; >> >> -static int ram_save_block(QEMUFile *f) >> +static int ram_save_block(QEMUFile *f, int stage) >> { >> RAMBlock *block = last_block; >> ram_addr_t offset = last_offset; >> - int bytes_sent = 0; >> + int bytes_sent = -1; >> MemoryRegion *mr; >> + ram_addr_t current_addr; >> >> if (!block) >> block = QLIST_FIRST(&ram_list.blocks); >> >> + current_addr = block->offset + offset; >> + >> do { >> mr = block->mr; >> if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE, >> @@ -198,7 +283,24 @@ static int ram_save_block(QEMUFile *f) >> save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS); >> qemu_put_byte(f, *p); >> bytes_sent = 1; >> - } else { >> + } else if (migrate_use_xbzrle()) { >> + /* in stage 1 none of the pages are cached so we just want to >> + cache them for next stages, and send the cached copy */ >> + if (stage == 1) { >> + cache_insert(XBZRLE.cache, current_addr, >> + g_memdup(p, TARGET_PAGE_SIZE)); >> + } else { >> + bytes_sent = save_xbzrle_page(f, p, current_addr, block, >> + offset, cont); >> + } >> + /* send the cached page copy for stage 1 and 2*/ >> + if (stage != 3) { >> + p = get_cached_data(XBZRLE.cache, current_addr); >> + } >> + } >> + >> + /* either we didn't send yet (we may got XBZRLE overflow) */ >> + if (bytes_sent == -1) { >> save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); >> qemu_put_buffer(f, p, TARGET_PAGE_SIZE); >> bytes_sent = TARGET_PAGE_SIZE; > > > I think that code is not right when save_xbzrle_page() returns 0. That > means that page hasn't changed since last time we sent that page. We > shouldn't break in that case. Just continue with next page, right? > You are right I missed that , will be fixed > On the other hand ... Why are we doing the stage == 1 test? stage 1 > normally only sent part of the pages, so we could use the generic code > there? It would just return -1 as bytes_sent, and do the same code that > we have now? we need to add the pages to the cache in stage 1 (for the next stage), and there is no need for checking if the page is cached. and send the pages from the cache for consistency > > The optimization for stage 3 is not done backwards? We are inserting > the page in the cache even if we are on stage 3. In stage three we > should: > - look if page is on the cache: do usual xbrlze trick > - if it is not, just sent the whole page without inserting it into the > cache? We are never going to reuse it, so putting it into the cache > would not help us at all. We are just making an extra copy? right no need to insert the page into the cache in stage 3, I will remove it > > >> >> qemu_put_be64(f, RAM_SAVE_FLAG_EOS); >> >> expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; >> >> + DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time, >> + migrate_max_downtime()); >> + > > This belongs to debugging patch. > >> + /* load data and decode */ >> + xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE); > > can't we have a static buffer of that size, and avoid all the > malloc/free business? If space is tight, we can allways put it on the > xbrle structure and assign it only for migration. good idea > >> @@ -481,16 +657,33 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) >> void *host; >> >> host = host_from_stream_offset(f, addr, flags); >> + if (!host) { >> + return -EINVAL; >> + } > > Why is this check only needed now? I wish I knew, looks like it is missing in upstream. Do you think I should fix it separately ? Thanks, Orit > > Later, Juan.
Orit Wasserman <owasserm@redhat.com> wrote: > On 06/01/2012 02:42 PM, Juan Quintela wrote: >> We are still not using this value, and we are sending it anyway (with a >> value of zero). What happens when we start using if for a checksum, and >> we migration to a new version that "expects" it to be valid? I would >> preffer not to sent it, or sent the correct value. > I think I will remove it, checksum should be used for all migration > not just XBZRLE. > I guess we can add it to the protocol in the future. Agreed. >> On the other hand ... Why are we doing the stage == 1 test? stage 1 >> normally only sent part of the pages, so we could use the generic code >> there? It would just return -1 as bytes_sent, and do the same code that >> we have now? > > we need to add the pages to the cache in stage 1 (for the next stage), > and there is no need for checking if the page is cached. > and send the pages from the cache for consistency My question is: If we remove the check and just call the other function, everything works, no? So , why add the special case? If it dont' work, we need to change it, because nothing warantees that we fill the cache during stage . >>> void *host; >>> >>> host = host_from_stream_offset(f, addr, flags); >>> + if (!host) { >>> + return -EINVAL; >>> + } >> >> Why is this check only needed now? > I wish I knew, looks like it is missing in upstream. > Do you think I should fix it separately ? Yeap. Thanks, Juan.
diff --git a/arch_init.c b/arch_init.c index 071dc8d..536d34c 100644 --- a/arch_init.c +++ b/arch_init.c @@ -43,6 +43,15 @@ #include "hw/smbios.h" #include "exec-memory.h" #include "hw/pcspk.h" +#include "qemu/cache.h" + +#ifdef DEBUG_ARCH_INIT +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif #ifdef TARGET_SPARC int graphic_width = 1024; @@ -94,6 +103,7 @@ const uint32_t arch_type = QEMU_ARCH; #define RAM_SAVE_FLAG_PAGE 0x08 #define RAM_SAVE_FLAG_EOS 0x10 #define RAM_SAVE_FLAG_CONTINUE 0x20 +#define RAM_SAVE_FLAG_XBZRLE 0x40 #ifdef __ALTIVEC__ #include <altivec.h> @@ -157,6 +167,22 @@ static int is_dup_page(uint8_t *page) return 1; } +/* XBZRLE (Xor Based Zero Length Encoding */ +typedef struct XBZRLEHeader { + uint32_t xh_cksum; + uint16_t xh_len; + uint8_t xh_flags; +} XBZRLEHeader; + +/* struct contains XBZRLE cache and a static page + used by the compression */ +static struct { + /* buffer used for XBZRLE encoding */ + uint8_t *encoded_buf; + /* Cache for XBZRLE */ + Cache *cache; +} XBZRLE = {0}; + static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, int cont, int flag) { @@ -169,19 +195,78 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, } +#define ENCODING_FLAG_XBZRLE 0x1 + +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, + ram_addr_t current_addr, RAMBlock *block, + ram_addr_t offset, int cont) +{ + int encoded_len = 0, bytes_sent = -1, ret = -1; + XBZRLEHeader hdr = {0}; + uint8_t *prev_cached_page; + + /* check to see if page is cached , if not cache and return */ + if (!cache_is_cached(XBZRLE.cache, current_addr)) { + cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data, + TARGET_PAGE_SIZE)); + goto done; + } + + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); + + /* XBZRLE encoding (if there is no overflow) */ + encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); + if (encoded_len == 0) { + bytes_sent = 0; + DPRINTF("Unmodifed page or overflow skipping\n"); + goto done; + } else if (encoded_len == -1) { + bytes_sent = -1; + DPRINTF("Overflow\n"); + /* update data in the cache */ + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); + goto done; + } + + /* we need to update the data in the cache, in order to get the same data + we cached we decode the encoded page on the cached data */ + ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len, + prev_cached_page, TARGET_PAGE_SIZE); + g_assert(ret != -1); + + hdr.xh_len = encoded_len; + hdr.xh_flags |= ENCODING_FLAG_XBZRLE; + + /* Send XBZRLE based compressed page */ + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); + qemu_put_byte(f, hdr.xh_flags); + qemu_put_be16(f, hdr.xh_len); + qemu_put_be32(f, hdr.xh_cksum); + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); + bytes_sent = encoded_len + sizeof(hdr); + +done: + return bytes_sent; +} + static RAMBlock *last_block; static ram_addr_t last_offset; -static int ram_save_block(QEMUFile *f) +static int ram_save_block(QEMUFile *f, int stage) { RAMBlock *block = last_block; ram_addr_t offset = last_offset; - int bytes_sent = 0; + int bytes_sent = -1; MemoryRegion *mr; + ram_addr_t current_addr; if (!block) block = QLIST_FIRST(&ram_list.blocks); + current_addr = block->offset + offset; + do { mr = block->mr; if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE, @@ -198,7 +283,24 @@ static int ram_save_block(QEMUFile *f) save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, *p); bytes_sent = 1; - } else { + } else if (migrate_use_xbzrle()) { + /* in stage 1 none of the pages are cached so we just want to + cache them for next stages, and send the cached copy */ + if (stage == 1) { + cache_insert(XBZRLE.cache, current_addr, + g_memdup(p, TARGET_PAGE_SIZE)); + } else { + bytes_sent = save_xbzrle_page(f, p, current_addr, block, + offset, cont); + } + /* send the cached page copy for stage 1 and 2*/ + if (stage != 3) { + p = get_cached_data(XBZRLE.cache, current_addr); + } + } + + /* either we didn't send yet (we may got XBZRLE overflow) */ + if (bytes_sent == -1) { save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); qemu_put_buffer(f, p, TARGET_PAGE_SIZE); bytes_sent = TARGET_PAGE_SIZE; @@ -292,6 +394,17 @@ static void sort_ram_list(void) g_free(blocks); } +static void migration_end(void) +{ + memory_global_dirty_log_stop(); + + if (migrate_use_xbzrle()) { + cache_fini(XBZRLE.cache); + g_free(XBZRLE.cache); + XBZRLE.cache = NULL; + } +} + int ram_save_live(QEMUFile *f, int stage, void *opaque) { ram_addr_t addr; @@ -301,7 +414,7 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) int ret; if (stage < 0) { - memory_global_dirty_log_stop(); + migration_end(); return 0; } @@ -314,6 +427,17 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) last_offset = 0; sort_ram_list(); + if (migrate_use_xbzrle()) { + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / + TARGET_PAGE_SIZE, + TARGET_PAGE_SIZE); + if (!XBZRLE.cache) { + DPRINTF("Error creating cache\n"); + return -1; + } + XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); + } + /* Make sure all dirty bits are set */ QLIST_FOREACH(block, &ram_list.blocks, next) { for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { @@ -341,9 +465,12 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) while ((ret = qemu_file_rate_limit(f)) == 0) { int bytes_sent; - bytes_sent = ram_save_block(f); - bytes_transferred += bytes_sent; - if (bytes_sent == 0) { /* no more blocks */ + bytes_sent = ram_save_block(f, stage); + /* bytes_sent 0 represent unchanged page, + bytes_sent -1 represent no more blocks*/ + if (bytes_sent > 0) { + bytes_transferred += bytes_sent; + } else if (bytes_sent == -1) { /* no more blocks */ break; } } @@ -366,19 +493,62 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) int bytes_sent; /* flush all remaining blocks regardless of rate limiting */ - while ((bytes_sent = ram_save_block(f)) != 0) { + while ((bytes_sent = ram_save_block(f, stage)) != -1) { bytes_transferred += bytes_sent; } - memory_global_dirty_log_stop(); + migration_end(); } qemu_put_be64(f, RAM_SAVE_FLAG_EOS); expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; + DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time, + migrate_max_downtime()); + return (stage == 2) && (expected_time <= migrate_max_downtime()); } +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) +{ + int ret, rc = 0; + uint8_t *xbzrle_buf = NULL; + XBZRLEHeader hdr = {0}; + + /* extract RLE header */ + hdr.xh_flags = qemu_get_byte(f); + hdr.xh_len = qemu_get_be16(f); + hdr.xh_cksum = qemu_get_be32(f); + + if (!(hdr.xh_flags & ENCODING_FLAG_XBZRLE)) { + fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); + return -1; + } + + if (hdr.xh_len > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); + return -1; + } + + /* load data and decode */ + xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE); + qemu_get_buffer(f, xbzrle_buf, hdr.xh_len); + + /* decode RLE */ + ret = xbzrle_decode_buffer(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE); + if (ret == -1) { + fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); + rc = -1; + } else if (ret > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", + ret, TARGET_PAGE_SIZE); + rc = -1; + } + + g_free(xbzrle_buf); + return rc; +} + static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -412,8 +582,11 @@ static inline void *host_from_stream_offset(QEMUFile *f, int ram_load(QEMUFile *f, void *opaque, int version_id) { ram_addr_t addr; - int flags; + int flags, ret = 0; int error; + static uint64_t seq_iter; + + seq_iter++; if (version_id < 4 || version_id > 4) { return -EINVAL; @@ -443,8 +616,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) QLIST_FOREACH(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id))) { - if (block->length != length) - return -EINVAL; + if (block->length != length) { + ret = -EINVAL; + goto done; + } break; } } @@ -452,7 +627,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) if (!block) { fprintf(stderr, "Unknown ramblock \"%s\", cannot " "accept migration\n", id); - return -EINVAL; + ret = -EINVAL; + goto done; } total_ram_bytes -= length; @@ -481,16 +657,33 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) void *host; host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + } else if (flags & RAM_SAVE_FLAG_XBZRLE) { + void *host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } + + if (load_xbzrle(f, addr, host) < 0) { + ret = -EINVAL; + goto done; + } } error = qemu_file_get_error(f); if (error) { - return error; + ret = error; + goto done; } } while (!(flags & RAM_SAVE_FLAG_EOS)); - return 0; +done: + DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n", + ret, seq_iter); + return ret; } #ifdef HAS_AUDIO diff --git a/migration.c b/migration.c index 952f542..92c39e8 100644 --- a/migration.c +++ b/migration.c @@ -43,6 +43,9 @@ enum { #define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ +/* Migration XBZRLE cache size */ +#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024) + static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -55,7 +58,8 @@ static MigrationState *migrate_get_current(void) static MigrationState current_migration = { .state = MIG_STATE_SETUP, .bandwidth_limit = MAX_THROTTLE, - }; + .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE, +}; return ¤t_migration; } @@ -410,6 +414,7 @@ static MigrationState *migrate_init(const MigrationParams *params) MigrationState *s = migrate_get_current(); int64_t bandwidth_limit = s->bandwidth_limit; bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; + int64_t xbzrle_cache_size = s->xbzrle_cache_size; memcpy(enabled_capabilities, s->enabled_capabilities, sizeof(enabled_capabilities)); @@ -419,6 +424,7 @@ static MigrationState *migrate_init(const MigrationParams *params) s->params = *params; memcpy(s->enabled_capabilities, enabled_capabilities, sizeof(enabled_capabilities)); + s->xbzrle_cache_size = xbzrle_cache_size; s->state = MIG_STATE_SETUP; @@ -516,3 +522,21 @@ void qmp_migrate_set_downtime(double value, Error **errp) value = MAX(0, MIN(UINT64_MAX, value)); max_downtime = (uint64_t)value; } + +int migrate_use_xbzrle(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; +} + +int64_t migrate_xbzrle_cache_size(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->xbzrle_cache_size; +} diff --git a/migration.h b/migration.h index 00d1992..eb0b822 100644 --- a/migration.h +++ b/migration.h @@ -39,6 +39,7 @@ struct MigrationState void *opaque; MigrationParams params; bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; + int64_t xbzrle_cache_size; }; void process_incoming_migration(QEMUFile *f); @@ -99,4 +100,11 @@ void migrate_add_blocker(Error *reason); */ void migrate_del_blocker(Error *reason); +int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen); +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); + +int migrate_use_xbzrle(void); +int64_t migrate_xbzrle_cache_size(void); + #endif diff --git a/savevm.c b/savevm.c index 42937a0..31db838 100644 --- a/savevm.c +++ b/savevm.c @@ -2374,3 +2374,94 @@ void vmstate_register_ram_global(MemoryRegion *mr) { vmstate_register_ram(mr, NULL); } + +/* + page = zrun nzrun + | zrun nzrun page + + zrun = length + + nzrun = length byte... + + length = uleb128 encoded integer + */ +int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, + uint8_t *dst, int dlen) +{ + uint32_t zrun_len = 0, nzrun_len = 0; + int d = 0 , i = 0; + uint8_t *nzrun_start = NULL; + + while (i < slen) { + /* overflow */ + if (d + 2 > dlen) { + return -1; + } + + while (!(old_buf[i] ^ new_buf[i]) && ++i <= slen) { + zrun_len++; + } + + /* buffer unchanged */ + if (zrun_len == slen) { + return 0; + } + + /* skip last zero run */ + if (i == slen + 1) { + return d; + } + + d += uleb128_encode_small(dst + d, zrun_len); + + zrun_len = 0; + nzrun_start = new_buf + i; + while ((old_buf[i] ^ new_buf[i]) != 0 && ++i <= slen) { + nzrun_len++; + } + + /* overflow */ + if (d + nzrun_len + 2 > dlen) { + return -1; + } + + d += uleb128_encode_small(dst + d, nzrun_len); + memcpy(dst + d, nzrun_start, nzrun_len); + d += nzrun_len; + nzrun_len = 0; + } + + return d; +} + +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) +{ + int i = 0, d = 0; + uint32_t count = 0; + + while (i < slen) { + + /* zrun */ + i += uleb128_decode_small(src + i, &count); + d += count; + + /* overflow */ + g_assert(d <= dlen); + + /* completed decoding */ + if (i == slen - 1) { + return d; + } + + /* nzrun */ + i += uleb128_decode_small(src + i, &count); + + g_assert(d + count <= dlen); + + memcpy(dst + d , src + i, count); + d += count; + i += count; + } + + return d; +}