[v11,7/9] Add XBZRLE to ram_save_block and ram_save_live

Message ID	1337691425-6022-8-git-send-email-owasserm@redhat.com
State	New
Headers	show Return-Path: <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org> From: Orit Wasserman <owasserm@redhat.com> To: qemu-devel@nongnu.org Date: Tue, 22 May 2012 15:57:03 +0300 Message-Id: <1337691425-6022-8-git-send-email-owasserm@redhat.com> In-Reply-To: <1337691425-6022-1-git-send-email-owasserm@redhat.com> References: <1337691425-6022-1-git-send-email-owasserm@redhat.com> Cc: peter.maydell@linaro.org, aliguori@us.ibm.com, quintela@redhat.com, Petter Svard <petters@cs.umu.se>, stefanha@gmail.com, mdroth@linux.vnet.ibm.com, Benoit Hudzia <benoit.hudzia@sap.com>, blauwirbel@gmail.com, Orit Wasserman <owasserm@redhat.com>, chegu_vinod@hp.com, avi@redhat.com, Aidan Shribman <aidan.shribman@sap.com>, pbonzini@redhat.com, eblake@redhat.com Subject: [Qemu-devel] [PATCH v11 7/9] Add XBZRLE to ram_save_block and ram_save_live Precedence: list Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

Message ID

1337691425-6022-8-git-send-email-owasserm@redhat.com

State

New

Headers

From: Orit Wasserman <owasserm@redhat.com>
To: qemu-devel@nongnu.org
Date: Tue, 22 May 2012 15:57:03 +0300
Message-Id: <1337691425-6022-8-git-send-email-owasserm@redhat.com>
In-Reply-To: <1337691425-6022-1-git-send-email-owasserm@redhat.com>
References: <1337691425-6022-1-git-send-email-owasserm@redhat.com>
Cc: peter.maydell@linaro.org, aliguori@us.ibm.com, quintela@redhat.com,
	Petter Svard <petters@cs.umu.se>, stefanha@gmail.com,
	mdroth@linux.vnet.ibm.com, Benoit Hudzia <benoit.hudzia@sap.com>,
	blauwirbel@gmail.com, Orit Wasserman <owasserm@redhat.com>,
	chegu_vinod@hp.com, avi@redhat.com,
	Aidan Shribman <aidan.shribman@sap.com>, pbonzini@redhat.com,
	eblake@redhat.com
Subject: [Qemu-devel] [PATCH v11 7/9] Add XBZRLE to ram_save_block and
	ram_save_live
Precedence: list
Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org
Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

Commit Message

Orit Wasserman May 22, 2012, 12:57 p.m. UTC

In the outgoing migration check to see if the page is cached and
changed than send compressed page by using save_xbrle_page function.
In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
and decompress the page (by using load_xbrle function).

Signed-off-by: Benoit Hudzia <benoit.hudzia@sap.com>
Signed-off-by: Petter Svard <petters@cs.umu.se>
Signed-off-by: Aidan Shribman <aidan.shribman@sap.com>
Signed-off-by: Orit Wasserman <owasserm@redhat.com>
---
 arch_init.c |  223 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 migration.c |   26 +++++++-
 migration.h |    8 ++
 savevm.c    |   91 ++++++++++++++++++++++++
 4 files changed, 332 insertions(+), 16 deletions(-)

Comments

Juan Quintela June 1, 2012, 11:42 a.m. UTC | #1

Orit Wasserman <owasserm@redhat.com> wrote:
> In the outgoing migration check to see if the page is cached and
> changed than send compressed page by using save_xbrle_page function.
> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
> and decompress the page (by using load_xbrle function).


This patch can be split to easy review.

> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -43,6 +43,15 @@
>  #include "hw/smbios.h"
>  #include "exec-memory.h"
>  #include "hw/pcspk.h"
> +#include "qemu/cache.h"
> +
> +#ifdef DEBUG_ARCH_INIT
> +#define DPRINTF(fmt, ...) \
> +    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
> +#else
> +#define DPRINTF(fmt, ...) \
> +    do { } while (0)
> +#endif

Independent of xbzrle.

>  
>  #ifdef TARGET_SPARC
>  int graphic_width = 1024;
> @@ -94,6 +103,7 @@ const uint32_t arch_type = QEMU_ARCH;
>  #define RAM_SAVE_FLAG_PAGE     0x08
>  #define RAM_SAVE_FLAG_EOS      0x10
>  #define RAM_SAVE_FLAG_CONTINUE 0x20
> +#define RAM_SAVE_FLAG_XBZRLE   0x40
>  
>  #ifdef __ALTIVEC__
>  #include <altivec.h>
> @@ -157,6 +167,22 @@ static int is_dup_page(uint8_t *page)
>      return 1;
>  }
>  
> +/* XBZRLE (Xor Based Zero Length Encoding */
> +typedef struct XBZRLEHeader {
> +    uint32_t xh_cksum;

We are still not using this value, and we are sending it anyway (with a
value of zero).  What happens when we start using if for a checksum, and
we migration to a new version that "expects" it to be valid?  I would
preffer not to sent it, or sent the correct value.

> +    uint16_t xh_len;
> +    uint8_t xh_flags;
> +} XBZRLEHeader;
> +
> +/* struct contains XBZRLE cache and a static page
> +   used by the compression */
> +static struct {
> +    /* buffer used for XBZRLE encoding */
> +    uint8_t *encoded_buf;
> +    /* Cache for XBZRLE */
> +    Cache *cache;
> +} XBZRLE = {0};

Use c99 initializers, please.

{ .encoded_buf = NULL,
  .cache = NULL,
}

More instances in other parts.

> +
>  static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>          int cont, int flag)
 >  {
> @@ -169,19 +195,78 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>  
>  }
>  
> +#define ENCODING_FLAG_XBZRLE 0x1
> +
> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
> +                            ram_addr_t current_addr, RAMBlock *block,
> +                            ram_addr_t offset, int cont)
> +{
> +    int encoded_len = 0, bytes_sent = -1, ret = -1;
> +    XBZRLEHeader hdr = {0};
> +    uint8_t *prev_cached_page;
> +
> +    /* check to see if page is cached , if not cache and return */
> +    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
> +        cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data,
> +                                                          TARGET_PAGE_SIZE));
> +        goto done;
> +    }
> +
> +    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
> +
> +    /* XBZRLE encoding (if there is no overflow) */
> +    encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data,
> +                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
> +                                       TARGET_PAGE_SIZE);
> +    if (encoded_len == 0) {
> +        bytes_sent = 0;
> +        DPRINTF("Unmodifed page or overflow skipping\n");
> +        goto done;
> +    } else if (encoded_len == -1) {
> +        bytes_sent = -1;
> +        DPRINTF("Overflow\n");
> +        /* update data in the cache */
> +        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
> +        goto done;
> +    }
> +
> +    /* we need to update the data in the cache, in order to get the same data
> +       we cached we decode the encoded page on the cached data */
> +    ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len,
> +                               prev_cached_page, TARGET_PAGE_SIZE);
> +    g_assert(ret != -1);
> +
> +    hdr.xh_len = encoded_len;
> +    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
> +
> +    /* Send XBZRLE based compressed page */
> +    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
> +    qemu_put_byte(f, hdr.xh_flags);
> +    qemu_put_be16(f, hdr.xh_len);
> +    qemu_put_be32(f, hdr.xh_cksum);
> +    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
> +    bytes_sent = encoded_len + sizeof(hdr);
> +
> +done:
> +    return bytes_sent;
> +}
> +
>  static RAMBlock *last_block;
>  static ram_addr_t last_offset;
>  
> -static int ram_save_block(QEMUFile *f)
> +static int ram_save_block(QEMUFile *f, int stage)
>  {
>      RAMBlock *block = last_block;
>      ram_addr_t offset = last_offset;
> -    int bytes_sent = 0;
> +    int bytes_sent = -1;
>      MemoryRegion *mr;
> +    ram_addr_t current_addr;
>  
>      if (!block)
>          block = QLIST_FIRST(&ram_list.blocks);
>  
> +    current_addr = block->offset + offset;
> +
>      do {
>          mr = block->mr;
>          if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
> @@ -198,7 +283,24 @@ static int ram_save_block(QEMUFile *f)
>                  save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
>                  qemu_put_byte(f, *p);
>                  bytes_sent = 1;
> -            } else {
> +            } else if (migrate_use_xbzrle()) {
> +                /* in stage 1 none of the pages are cached so we just want to
> +                   cache them for next stages, and send the cached copy */
> +                if (stage == 1) {
> +                    cache_insert(XBZRLE.cache, current_addr,
> +                                 g_memdup(p, TARGET_PAGE_SIZE));
> +                } else {
> +                    bytes_sent = save_xbzrle_page(f, p, current_addr, block,
> +                                                  offset, cont);
> +                }
> +                /* send the cached page copy for stage 1 and 2*/
> +                if (stage != 3) {
> +                    p = get_cached_data(XBZRLE.cache, current_addr);
> +                }
> +            }
> +
> +            /* either we didn't send yet (we may got XBZRLE overflow) */
> +            if (bytes_sent == -1) {
>                  save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>                  qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>                  bytes_sent = TARGET_PAGE_SIZE;


I think that code is not right when save_xbzrle_page() returns 0.  That
means that page hasn't changed since last time we sent that page.  We
shouldn't break in that case.  Just continue with next page, right?

On the other hand ... Why are we doing the stage == 1 test?  stage 1
normally only sent part of the pages, so we could use the generic code
there?  It would just return -1 as bytes_sent, and do the same code that
we have now?

The optimization for stage 3 is not done backwards?  We are inserting
the page in the cache even if we are on stage 3.  In stage three we
should:
- look if page is on the cache: do usual xbrlze trick
- if it is not, just sent the whole page without inserting it into the
cache?  We are never going to reuse it, so putting it into the cache
would not help us at all.  We are just making an extra copy?


>  
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>  
>      expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>  
> +    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
> +        migrate_max_downtime());
> +

This belongs to debugging patch.

> +    /* load data and decode */
> +    xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE);

can't we have a static buffer of that size, and avoid all the
malloc/free business?  If space is tight, we can allways put it on the
xbrle structure and assign it only for migration.

> @@ -481,16 +657,33 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>              void *host;
>  
>              host = host_from_stream_offset(f, addr, flags);
> +            if (!host) {
> +                return -EINVAL;
> +            }

Why is this check only needed now?

Later, Juan.

Orit Wasserman June 6, 2012, 2:13 a.m. UTC | #2

On 06/01/2012 02:42 PM, Juan Quintela wrote:
> Orit Wasserman <owasserm@redhat.com> wrote:
>> In the outgoing migration check to see if the page is cached and
>> changed than send compressed page by using save_xbrle_page function.
>> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
>> and decompress the page (by using load_xbrle function).
> 
> 
> This patch can be split to easy review.
Sure.
> 
>> --- a/arch_init.c
>> +++ b/arch_init.c
>> @@ -43,6 +43,15 @@
>>  #include "hw/smbios.h"
>>  #include "exec-memory.h"
>>  #include "hw/pcspk.h"
>> +#include "qemu/cache.h"
>> +
>> +#ifdef DEBUG_ARCH_INIT
>> +#define DPRINTF(fmt, ...) \
>> +    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
>> +#else
>> +#define DPRINTF(fmt, ...) \
>> +    do { } while (0)
>> +#endif
> 
> Independent of xbzrle.
> 
>>  
>>  #ifdef TARGET_SPARC
>>  int graphic_width = 1024;
>> @@ -94,6 +103,7 @@ const uint32_t arch_type = QEMU_ARCH;
>>  #define RAM_SAVE_FLAG_PAGE     0x08
>>  #define RAM_SAVE_FLAG_EOS      0x10
>>  #define RAM_SAVE_FLAG_CONTINUE 0x20
>> +#define RAM_SAVE_FLAG_XBZRLE   0x40
>>  
>>  #ifdef __ALTIVEC__
>>  #include <altivec.h>
>> @@ -157,6 +167,22 @@ static int is_dup_page(uint8_t *page)
>>      return 1;
>>  }
>>  
>> +/* XBZRLE (Xor Based Zero Length Encoding */
>> +typedef struct XBZRLEHeader {
>> +    uint32_t xh_cksum;
> 
> We are still not using this value, and we are sending it anyway (with a
> value of zero).  What happens when we start using if for a checksum, and
> we migration to a new version that "expects" it to be valid?  I would
> preffer not to sent it, or sent the correct value.
I think I will remove it, checksum should be used for all migration not just XBZRLE.
I guess we can add it to the protocol in the future.
> 
>> +    uint16_t xh_len;
>> +    uint8_t xh_flags;
>> +} XBZRLEHeader;
>> +
>> +/* struct contains XBZRLE cache and a static page
>> +   used by the compression */
>> +static struct {
>> +    /* buffer used for XBZRLE encoding */
>> +    uint8_t *encoded_buf;
>> +    /* Cache for XBZRLE */
>> +    Cache *cache;
>> +} XBZRLE = {0};
> 
> Use c99 initializers, please.
> 
> { .encoded_buf = NULL,
>   .cache = NULL,
> }
> 
> More instances in other parts.
> 
>> +
>>  static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>>          int cont, int flag)
>  >  {
>> @@ -169,19 +195,78 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>>  
>>  }
>>  
>> +#define ENCODING_FLAG_XBZRLE 0x1
>> +
>> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
>> +                            ram_addr_t current_addr, RAMBlock *block,
>> +                            ram_addr_t offset, int cont)
>> +{
>> +    int encoded_len = 0, bytes_sent = -1, ret = -1;
>> +    XBZRLEHeader hdr = {0};
>> +    uint8_t *prev_cached_page;
>> +
>> +    /* check to see if page is cached , if not cache and return */
>> +    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
>> +        cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data,
>> +                                                          TARGET_PAGE_SIZE));
>> +        goto done;
>> +    }
>> +
>> +    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
>> +
>> +    /* XBZRLE encoding (if there is no overflow) */
>> +    encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data,
>> +                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
>> +                                       TARGET_PAGE_SIZE);
>> +    if (encoded_len == 0) {
>> +        bytes_sent = 0;
>> +        DPRINTF("Unmodifed page or overflow skipping\n");
>> +        goto done;
>> +    } else if (encoded_len == -1) {
>> +        bytes_sent = -1;
>> +        DPRINTF("Overflow\n");
>> +        /* update data in the cache */
>> +        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
>> +        goto done;
>> +    }
>> +
>> +    /* we need to update the data in the cache, in order to get the same data
>> +       we cached we decode the encoded page on the cached data */
>> +    ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len,
>> +                               prev_cached_page, TARGET_PAGE_SIZE);
>> +    g_assert(ret != -1);
>> +
>> +    hdr.xh_len = encoded_len;
>> +    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
>> +
>> +    /* Send XBZRLE based compressed page */
>> +    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
>> +    qemu_put_byte(f, hdr.xh_flags);
>> +    qemu_put_be16(f, hdr.xh_len);
>> +    qemu_put_be32(f, hdr.xh_cksum);
>> +    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
>> +    bytes_sent = encoded_len + sizeof(hdr);
>> +
>> +done:
>> +    return bytes_sent;
>> +}
>> +
>>  static RAMBlock *last_block;
>>  static ram_addr_t last_offset;
>>  
>> -static int ram_save_block(QEMUFile *f)
>> +static int ram_save_block(QEMUFile *f, int stage)
>>  {
>>      RAMBlock *block = last_block;
>>      ram_addr_t offset = last_offset;
>> -    int bytes_sent = 0;
>> +    int bytes_sent = -1;
>>      MemoryRegion *mr;
>> +    ram_addr_t current_addr;
>>  
>>      if (!block)
>>          block = QLIST_FIRST(&ram_list.blocks);
>>  
>> +    current_addr = block->offset + offset;
>> +
>>      do {
>>          mr = block->mr;
>>          if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
>> @@ -198,7 +283,24 @@ static int ram_save_block(QEMUFile *f)
>>                  save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
>>                  qemu_put_byte(f, *p);
>>                  bytes_sent = 1;
>> -            } else {
>> +            } else if (migrate_use_xbzrle()) {
>> +                /* in stage 1 none of the pages are cached so we just want to
>> +                   cache them for next stages, and send the cached copy */
>> +                if (stage == 1) {
>> +                    cache_insert(XBZRLE.cache, current_addr,
>> +                                 g_memdup(p, TARGET_PAGE_SIZE));
>> +                } else {
>> +                    bytes_sent = save_xbzrle_page(f, p, current_addr, block,
>> +                                                  offset, cont);
>> +                }
>> +                /* send the cached page copy for stage 1 and 2*/
>> +                if (stage != 3) {
>> +                    p = get_cached_data(XBZRLE.cache, current_addr);
>> +                }
>> +            }
>> +
>> +            /* either we didn't send yet (we may got XBZRLE overflow) */
>> +            if (bytes_sent == -1) {
>>                  save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>>                  qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>>                  bytes_sent = TARGET_PAGE_SIZE;
> 
> 
> I think that code is not right when save_xbzrle_page() returns 0.  That
> means that page hasn't changed since last time we sent that page.  We
> shouldn't break in that case.  Just continue with next page, right?
> 
You are right I missed that , will be fixed

> On the other hand ... Why are we doing the stage == 1 test?  stage 1
> normally only sent part of the pages, so we could use the generic code
> there?  It would just return -1 as bytes_sent, and do the same code that
> we have now?

we need to add the pages to the cache in stage 1 (for the next stage),
and there is no need for checking if the page is cached.
and send the pages from the cache for consistency

> 
> The optimization for stage 3 is not done backwards?  We are inserting
> the page in the cache even if we are on stage 3.  In stage three we
> should:
> - look if page is on the cache: do usual xbrlze trick
> - if it is not, just sent the whole page without inserting it into the
> cache?  We are never going to reuse it, so putting it into the cache
> would not help us at all.  We are just making an extra copy?
right no need to insert the page into the cache in stage 3, I will remove it
> 
> 
>>  
>>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>  
>>      expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>>  
>> +    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
>> +        migrate_max_downtime());
>> +
> 
> This belongs to debugging patch.
> 
>> +    /* load data and decode */
>> +    xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE);
> 
> can't we have a static buffer of that size, and avoid all the
> malloc/free business?  If space is tight, we can allways put it on the
> xbrle structure and assign it only for migration.
good idea
> 
>> @@ -481,16 +657,33 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>>              void *host;
>>  
>>              host = host_from_stream_offset(f, addr, flags);
>> +            if (!host) {
>> +                return -EINVAL;
>> +            }
> 
> Why is this check only needed now?
I wish I knew, looks like it is missing in upstream.
Do you think I should fix it separately ?

Thanks,
Orit
> 
> Later, Juan.

Juan Quintela June 7, 2012, 10:38 a.m. UTC | #3

Orit Wasserman <owasserm@redhat.com> wrote:
> On 06/01/2012 02:42 PM, Juan Quintela wrote:
>> We are still not using this value, and we are sending it anyway (with a
>> value of zero).  What happens when we start using if for a checksum, and
>> we migration to a new version that "expects" it to be valid?  I would
>> preffer not to sent it, or sent the correct value.
> I think I will remove it, checksum should be used for all migration
> not just XBZRLE.
> I guess we can add it to the protocol in the future.

Agreed.
>> On the other hand ... Why are we doing the stage == 1 test?  stage 1
>> normally only sent part of the pages, so we could use the generic code
>> there?  It would just return -1 as bytes_sent, and do the same code that
>> we have now?
>
> we need to add the pages to the cache in stage 1 (for the next stage),
> and there is no need for checking if the page is cached.
> and send the pages from the cache for consistency

My question is: If we remove the check and just call the other function,
everything works, no?  So , why add the special case?

If it dont' work, we need to change it, because nothing warantees that
we fill the cache during stage .

>>>              void *host;
>>>  
>>>              host = host_from_stream_offset(f, addr, flags);
>>> +            if (!host) {
>>> +                return -EINVAL;
>>> +            }
>> 
>> Why is this check only needed now?
> I wish I knew, looks like it is missing in upstream.
> Do you think I should fix it separately ?

Yeap.

Thanks, Juan.

diff --git a/arch_init.c b/arch_init.c
index 071dc8d..536d34c 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -43,6 +43,15 @@ 
 #include "hw/smbios.h"
 #include "exec-memory.h"
 #include "hw/pcspk.h"
+#include "qemu/cache.h"
+
+#ifdef DEBUG_ARCH_INIT
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
 
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
@@ -94,6 +103,7 @@  const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_PAGE     0x08
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBZRLE   0x40
 
 #ifdef __ALTIVEC__
 #include <altivec.h>
@@ -157,6 +167,22 @@  static int is_dup_page(uint8_t *page)
     return 1;
 }
 
+/* XBZRLE (Xor Based Zero Length Encoding */
+typedef struct XBZRLEHeader {
+    uint32_t xh_cksum;
+    uint16_t xh_len;
+    uint8_t xh_flags;
+} XBZRLEHeader;
+
+/* struct contains XBZRLE cache and a static page
+   used by the compression */
+static struct {
+    /* buffer used for XBZRLE encoding */
+    uint8_t *encoded_buf;
+    /* Cache for XBZRLE */
+    Cache *cache;
+} XBZRLE = {0};
+
 static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
         int cont, int flag)
 {
@@ -169,19 +195,78 @@  static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
 
 }
 
+#define ENCODING_FLAG_XBZRLE 0x1
+
+static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
+                            ram_addr_t current_addr, RAMBlock *block,
+                            ram_addr_t offset, int cont)
+{
+    int encoded_len = 0, bytes_sent = -1, ret = -1;
+    XBZRLEHeader hdr = {0};
+    uint8_t *prev_cached_page;
+
+    /* check to see if page is cached , if not cache and return */
+    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
+        cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data,
+                                                          TARGET_PAGE_SIZE));
+        goto done;
+    }
+
+    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
+
+    /* XBZRLE encoding (if there is no overflow) */
+    encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data,
+                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+                                       TARGET_PAGE_SIZE);
+    if (encoded_len == 0) {
+        bytes_sent = 0;
+        DPRINTF("Unmodifed page or overflow skipping\n");
+        goto done;
+    } else if (encoded_len == -1) {
+        bytes_sent = -1;
+        DPRINTF("Overflow\n");
+        /* update data in the cache */
+        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
+        goto done;
+    }
+
+    /* we need to update the data in the cache, in order to get the same data
+       we cached we decode the encoded page on the cached data */
+    ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len,
+                               prev_cached_page, TARGET_PAGE_SIZE);
+    g_assert(ret != -1);
+
+    hdr.xh_len = encoded_len;
+    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
+
+    /* Send XBZRLE based compressed page */
+    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
+    qemu_put_byte(f, hdr.xh_flags);
+    qemu_put_be16(f, hdr.xh_len);
+    qemu_put_be32(f, hdr.xh_cksum);
+    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
+    bytes_sent = encoded_len + sizeof(hdr);
+
+done:
+    return bytes_sent;
+}
+
 static RAMBlock *last_block;
 static ram_addr_t last_offset;
 
-static int ram_save_block(QEMUFile *f)
+static int ram_save_block(QEMUFile *f, int stage)
 {
     RAMBlock *block = last_block;
     ram_addr_t offset = last_offset;
-    int bytes_sent = 0;
+    int bytes_sent = -1;
     MemoryRegion *mr;
+    ram_addr_t current_addr;
 
     if (!block)
         block = QLIST_FIRST(&ram_list.blocks);
 
+    current_addr = block->offset + offset;
+
     do {
         mr = block->mr;
         if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
@@ -198,7 +283,24 @@  static int ram_save_block(QEMUFile *f)
                 save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, *p);
                 bytes_sent = 1;
-            } else {
+            } else if (migrate_use_xbzrle()) {
+                /* in stage 1 none of the pages are cached so we just want to
+                   cache them for next stages, and send the cached copy */
+                if (stage == 1) {
+                    cache_insert(XBZRLE.cache, current_addr,
+                                 g_memdup(p, TARGET_PAGE_SIZE));
+                } else {
+                    bytes_sent = save_xbzrle_page(f, p, current_addr, block,
+                                                  offset, cont);
+                }
+                /* send the cached page copy for stage 1 and 2*/
+                if (stage != 3) {
+                    p = get_cached_data(XBZRLE.cache, current_addr);
+                }
+            }
+
+            /* either we didn't send yet (we may got XBZRLE overflow) */
+            if (bytes_sent == -1) {
                 save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                 bytes_sent = TARGET_PAGE_SIZE;
@@ -292,6 +394,17 @@  static void sort_ram_list(void)
     g_free(blocks);
 }
 
+static void migration_end(void)
+{
+    memory_global_dirty_log_stop();
+
+    if (migrate_use_xbzrle()) {
+        cache_fini(XBZRLE.cache);
+        g_free(XBZRLE.cache);
+        XBZRLE.cache = NULL;
+    }
+}
+
 int ram_save_live(QEMUFile *f, int stage, void *opaque)
 {
     ram_addr_t addr;
@@ -301,7 +414,7 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
     int ret;
 
     if (stage < 0) {
-        memory_global_dirty_log_stop();
+        migration_end();
         return 0;
     }
 
@@ -314,6 +427,17 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
         last_offset = 0;
         sort_ram_list();
 
+        if (migrate_use_xbzrle()) {
+            XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
+                                      TARGET_PAGE_SIZE,
+                                      TARGET_PAGE_SIZE);
+            if (!XBZRLE.cache) {
+                DPRINTF("Error creating cache\n");
+                return -1;
+            }
+            XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
+        }
+
         /* Make sure all dirty bits are set */
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
@@ -341,9 +465,12 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
     while ((ret = qemu_file_rate_limit(f)) == 0) {
         int bytes_sent;
 
-        bytes_sent = ram_save_block(f);
-        bytes_transferred += bytes_sent;
-        if (bytes_sent == 0) { /* no more blocks */
+        bytes_sent = ram_save_block(f, stage);
+        /* bytes_sent 0 represent unchanged page,
+           bytes_sent -1 represent no more blocks*/
+        if (bytes_sent > 0) {
+            bytes_transferred += bytes_sent;
+        } else if (bytes_sent == -1) { /* no more blocks */
             break;
         }
     }
@@ -366,19 +493,62 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
         int bytes_sent;
 
         /* flush all remaining blocks regardless of rate limiting */
-        while ((bytes_sent = ram_save_block(f)) != 0) {
+        while ((bytes_sent = ram_save_block(f, stage)) != -1) {
             bytes_transferred += bytes_sent;
         }
-        memory_global_dirty_log_stop();
+        migration_end();
     }
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
 
+    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
+        migrate_max_downtime());
+
     return (stage == 2) && (expected_time <= migrate_max_downtime());
 }
 
+static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    int ret, rc = 0;
+    uint8_t *xbzrle_buf = NULL;
+    XBZRLEHeader hdr = {0};
+
+    /* extract RLE header */
+    hdr.xh_flags = qemu_get_byte(f);
+    hdr.xh_len = qemu_get_be16(f);
+    hdr.xh_cksum = qemu_get_be32(f);
+
+    if (!(hdr.xh_flags & ENCODING_FLAG_XBZRLE)) {
+        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
+        return -1;
+    }
+
+    if (hdr.xh_len > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
+        return -1;
+    }
+
+    /* load data and decode */
+    xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE);
+    qemu_get_buffer(f, xbzrle_buf, hdr.xh_len);
+
+    /* decode RLE */
+    ret = xbzrle_decode_buffer(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
+        rc = -1;
+    } else  if (ret > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
+                ret, TARGET_PAGE_SIZE);
+        rc = -1;
+    }
+
+    g_free(xbzrle_buf);
+    return rc;
+}
+
 static inline void *host_from_stream_offset(QEMUFile *f,
                                             ram_addr_t offset,
                                             int flags)
@@ -412,8 +582,11 @@  static inline void *host_from_stream_offset(QEMUFile *f,
 int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
     ram_addr_t addr;
-    int flags;
+    int flags, ret = 0;
     int error;
+    static uint64_t seq_iter;
+
+    seq_iter++;
 
     if (version_id < 4 || version_id > 4) {
         return -EINVAL;
@@ -443,8 +616,10 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
 
                     QLIST_FOREACH(block, &ram_list.blocks, next) {
                         if (!strncmp(id, block->idstr, sizeof(id))) {
-                            if (block->length != length)
-                                return -EINVAL;
+                            if (block->length != length) {
+                                ret =  -EINVAL;
+                                goto done;
+                            }
                             break;
                         }
                     }
@@ -452,7 +627,8 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
                     if (!block) {
                         fprintf(stderr, "Unknown ramblock \"%s\", cannot "
                                 "accept migration\n", id);
-                        return -EINVAL;
+                        ret = -EINVAL;
+                        goto done;
                     }
 
                     total_ram_bytes -= length;
@@ -481,16 +657,33 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
             void *host;
 
             host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                return -EINVAL;
+            }
 
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
+            void *host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                return -EINVAL;
+            }
+
+            if (load_xbzrle(f, addr, host) < 0) {
+                ret = -EINVAL;
+                goto done;
+            }
         }
         error = qemu_file_get_error(f);
         if (error) {
-            return error;
+            ret = error;
+            goto done;
         }
     } while (!(flags & RAM_SAVE_FLAG_EOS));
 
-    return 0;
+done:
+    DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
+            ret, seq_iter);
+    return ret;
 }
 
 #ifdef HAS_AUDIO
diff --git a/migration.c b/migration.c
index 952f542..92c39e8 100644
--- a/migration.c
+++ b/migration.c
@@ -43,6 +43,9 @@  enum {
 
 #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
 
+/* Migration XBZRLE cache size */
+#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
+
 static NotifierList migration_state_notifiers =
     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
@@ -55,7 +58,8 @@  static MigrationState *migrate_get_current(void)
     static MigrationState current_migration = {
         .state = MIG_STATE_SETUP,
         .bandwidth_limit = MAX_THROTTLE,
-    };
+        .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
+};
 
     return &current_migration;
 }
@@ -410,6 +414,7 @@  static MigrationState *migrate_init(const MigrationParams *params)
     MigrationState *s = migrate_get_current();
     int64_t bandwidth_limit = s->bandwidth_limit;
     bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
+    int64_t xbzrle_cache_size = s->xbzrle_cache_size;
 
     memcpy(enabled_capabilities, s->enabled_capabilities,
            sizeof(enabled_capabilities));
@@ -419,6 +424,7 @@  static MigrationState *migrate_init(const MigrationParams *params)
     s->params = *params;
     memcpy(s->enabled_capabilities, enabled_capabilities,
            sizeof(enabled_capabilities));
+    s->xbzrle_cache_size = xbzrle_cache_size;
 
     s->state = MIG_STATE_SETUP;
 
@@ -516,3 +522,21 @@  void qmp_migrate_set_downtime(double value, Error **errp)
     value = MAX(0, MIN(UINT64_MAX, value));
     max_downtime = (uint64_t)value;
 }
+
+int migrate_use_xbzrle(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
+}
+
+int64_t migrate_xbzrle_cache_size(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->xbzrle_cache_size;
+}
diff --git a/migration.h b/migration.h
index 00d1992..eb0b822 100644
--- a/migration.h
+++ b/migration.h
@@ -39,6 +39,7 @@  struct MigrationState
     void *opaque;
     MigrationParams params;
     bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
+    int64_t xbzrle_cache_size;
 };
 
 void process_incoming_migration(QEMUFile *f);
@@ -99,4 +100,11 @@  void migrate_add_blocker(Error *reason);
  */
 void migrate_del_blocker(Error *reason);
 
+int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                         uint8_t *dst, int dlen);
+int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
+
+int migrate_use_xbzrle(void);
+int64_t migrate_xbzrle_cache_size(void);
+
 #endif
diff --git a/savevm.c b/savevm.c
index 42937a0..31db838 100644
--- a/savevm.c
+++ b/savevm.c
@@ -2374,3 +2374,94 @@  void vmstate_register_ram_global(MemoryRegion *mr)
 {
     vmstate_register_ram(mr, NULL);
 }
+
+/*
+  page = zrun nzrun
+       | zrun nzrun page
+
+  zrun = length
+
+  nzrun = length byte...
+
+  length = uleb128 encoded integer
+ */
+int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                         uint8_t *dst, int dlen)
+{
+    uint32_t zrun_len = 0, nzrun_len = 0;
+    int d = 0 , i = 0;
+    uint8_t *nzrun_start = NULL;
+
+    while (i < slen) {
+        /* overflow */
+        if (d + 2 > dlen) {
+            return -1;
+        }
+
+        while (!(old_buf[i] ^ new_buf[i]) && ++i <= slen) {
+            zrun_len++;
+        }
+
+        /* buffer unchanged */
+        if (zrun_len == slen) {
+            return 0;
+        }
+
+        /* skip last zero run */
+        if (i == slen + 1) {
+            return d;
+        }
+
+        d += uleb128_encode_small(dst + d, zrun_len);
+
+        zrun_len = 0;
+        nzrun_start = new_buf + i;
+        while ((old_buf[i] ^ new_buf[i]) != 0 && ++i <= slen) {
+            nzrun_len++;
+        }
+
+        /* overflow */
+        if (d + nzrun_len + 2 > dlen) {
+            return -1;
+        }
+
+        d += uleb128_encode_small(dst + d, nzrun_len);
+        memcpy(dst + d, nzrun_start, nzrun_len);
+        d += nzrun_len;
+        nzrun_len = 0;
+    }
+
+    return d;
+}
+
+int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int i = 0, d = 0;
+    uint32_t count = 0;
+
+    while (i < slen) {
+
+        /* zrun */
+        i += uleb128_decode_small(src + i, &count);
+        d += count;
+
+        /* overflow */
+        g_assert(d <= dlen);
+
+        /* completed decoding */
+        if (i == slen - 1) {
+            return d;
+        }
+
+        /* nzrun */
+        i += uleb128_decode_small(src + i, &count);
+
+        g_assert(d + count <= dlen);
+
+        memcpy(dst + d , src + i, count);
+        d += count;
+        i += count;
+    }
+
+    return d;
+}

[v11,7/9] Add XBZRLE to ram_save_block and ram_save_live

Commit Message

Comments

Patch