[RFC,5/6] savevm: Migrate RAM based on name/offset

Message ID	20100608191633.4451.59848.stgit@localhost.localdomain
State	New
Headers	show Return-Path: <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org> From: Alex Williamson <alex.williamson@redhat.com> To: qemu-devel@nongnu.org, anthony@codemonkey.ws Date: Tue, 08 Jun 2010 13:16:33 -0600 Message-ID: <20100608191633.4451.59848.stgit@localhost.localdomain> In-Reply-To: <20100608191447.4451.47795.stgit@localhost.localdomain> References: <20100608191447.4451.47795.stgit@localhost.localdomain> User-Agent: StGIT/0.14.3 MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Cc: chrisw@redhat.com, alex.williamson@redhat.com, kvm@vger.kernel.org, quintela@redhat.com Subject: [Qemu-devel] [RFC PATCH 5/6] savevm: Migrate RAM based on name/offset Precedence: list Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

Message ID

20100608191633.4451.59848.stgit@localhost.localdomain

State

New

Headers

From: Alex Williamson <alex.williamson@redhat.com>
To: qemu-devel@nongnu.org, anthony@codemonkey.ws
Date: Tue, 08 Jun 2010 13:16:33 -0600
Message-ID: <20100608191633.4451.59848.stgit@localhost.localdomain>
In-Reply-To: <20100608191447.4451.47795.stgit@localhost.localdomain>
References: <20100608191447.4451.47795.stgit@localhost.localdomain>
User-Agent: StGIT/0.14.3
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 7bit
Cc: chrisw@redhat.com, alex.williamson@redhat.com, kvm@vger.kernel.org,
	quintela@redhat.com
Subject: [Qemu-devel] [RFC PATCH 5/6] savevm: Migrate RAM based on
	name/offset
Precedence: list
Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org
Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

Commit Message

Alex Williamson June 8, 2010, 7:16 p.m. UTC

Synchronize RAM blocks with the target and migrate using name/offset
pairs.  This ensures both source and target have the same view of
RAM and that we get the right bits into the right slot.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 arch_init.c |  103 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
 vl.c        |    2 +
 2 files changed, 93 insertions(+), 12 deletions(-)

Comments

Anthony Liguori June 8, 2010, 8:12 p.m. UTC | #1

On 06/08/2010 02:16 PM, Alex Williamson wrote:
> Synchronize RAM blocks with the target and migrate using name/offset
> pairs.  This ensures both source and target have the same view of
> RAM and that we get the right bits into the right slot.
>
> Signed-off-by: Alex Williamson<alex.williamson@redhat.com>
> ---
>
>   arch_init.c |  103 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
>   vl.c        |    2 +
>   2 files changed, 93 insertions(+), 12 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 6103461..8ba92ec 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -113,20 +113,29 @@ static int ram_save_block(QEMUFile *f)
>
>       while (addr<  total_ram) {
>           if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
> +            RAMBlock *block;
> +            ram_addr_t offset;
>               uint8_t *p;
>
>               cpu_physical_memory_reset_dirty(current_addr,
>                                               current_addr + TARGET_PAGE_SIZE,
>                                               MIGRATION_DIRTY_FLAG);
>
> -            p = qemu_get_ram_ptr(current_addr);
> +            QLIST_FOREACH(block,&ram.blocks, next) {
> +                if (current_addr - block->offset<  block->length)
> +                    break;
> +            }
> +            offset = current_addr - block->offset;
> +            p = block->host + offset;
>
>               if (is_dup_page(p, *p)) {
> -                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
> +                qemu_put_be64(f, offset | RAM_SAVE_FLAG_COMPRESS);
> +                qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
>                   qemu_put_byte(f, *p);
>    

I think we could use some trickery like use another flag in 
current_address to determine whether this was a different section from 
the previous section and then only encode the section name if that's true.

Regards,

Anthony Liguori

>                   bytes_sent = 1;
>               } else {
> -                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
> +                qemu_put_be64(f, offset | RAM_SAVE_FLAG_PAGE);
> +                qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
>                   qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>                   bytes_sent = TARGET_PAGE_SIZE;
>               }
> @@ -196,6 +205,7 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>       }
>
>       if (stage == 1) {
> +        RAMBlock *block;
>           uint64_t total_ram = ram_bytes_total();
>           bytes_transferred = 0;
>
> @@ -210,6 +220,11 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>           cpu_physical_memory_set_dirty_tracking(1);
>
>           qemu_put_be64(f, total_ram | RAM_SAVE_FLAG_MEM_SIZE);
> +
> +        QLIST_FOREACH(block,&ram.blocks, next) {
> +            qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
> +            qemu_put_be64(f, block->length);
> +        }
>       }
>
>       bytes_transferred_last = bytes_transferred;
> @@ -257,7 +272,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>       ram_addr_t addr;
>       int flags;
>
> -    if (version_id != 3) {
> +    if (version_id<  3) {
>           return -EINVAL;
>       }
>
> @@ -268,23 +283,89 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>           addr&= TARGET_PAGE_MASK;
>
>           if (flags&  RAM_SAVE_FLAG_MEM_SIZE) {
> -            if (addr != ram_bytes_total()) {
> -                return -EINVAL;
> +            if (version_id == 3) {
> +                if (addr != ram_bytes_total()) {
> +                    return -EINVAL;
> +                }
> +            } else {
> +                /* Synchronize RAM block list */
> +                char name[64];
> +                ram_addr_t length;
> +                ram_addr_t total_ram_bytes = addr;
> +
> +                while (total_ram_bytes) {
> +                    RAMBlock *block;
> +                    qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
> +                    length = qemu_get_be64(f);
> +
> +                    QLIST_FOREACH(block,&ram.blocks, next) {
> +                        if (!strncmp(name, block->name, sizeof(name))) {
> +                            if (block->length != length)
> +                                return -EINVAL;
> +                            break;
> +                        }
> +                    }
> +
> +                    if (!block) {
> +                        if (!qemu_ram_alloc(name, length))
> +                            return -ENOMEM;
> +                    }
> +
> +                    total_ram_bytes -= length;
> +                }
>               }
>           }
>
>           if (flags&  RAM_SAVE_FLAG_COMPRESS) {
> -            uint8_t ch = qemu_get_byte(f);
> -            memset(qemu_get_ram_ptr(addr), ch, TARGET_PAGE_SIZE);
> +            void *host;
> +            uint8_t ch;
> +
> +            if (version_id == 3) {
> +                host = qemu_get_ram_ptr(addr);
> +            } else {
> +                RAMBlock *block;
> +                char name[64];
> +
> +                qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
> +
> +                QLIST_FOREACH(block,&ram.blocks, next) {
> +                    if (!strncmp(name, block->name, sizeof(name)))
> +                        break;
> +                }
> +                if (!block)
> +                    return -EINVAL;
> +
> +                host = block->host + addr;
> +            }
> +            ch = qemu_get_byte(f);
> +            memset(host, ch, TARGET_PAGE_SIZE);
>   #ifndef _WIN32
>               if (ch == 0&&
>                   (!kvm_enabled() || kvm_has_sync_mmu())) {
> -                madvise(qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE,
> -                        MADV_DONTNEED);
> +                madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED);
>               }
>   #endif
>           } else if (flags&  RAM_SAVE_FLAG_PAGE) {
> -            qemu_get_buffer(f, qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE);
> +            void *host;
> +
> +            if (version_id == 3) {
> +                host = qemu_get_ram_ptr(addr);
> +            } else {
> +                RAMBlock *block;
> +                char name[64];
> +
> +                qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
> +
> +                QLIST_FOREACH(block,&ram.blocks, next) {
> +                    if (!strncmp(name, block->name, sizeof(name)))
> +                        break;
> +                }
> +                if (!block)
> +                    return -EINVAL;
> +
> +                host = block->host + addr;
> +            }
> +            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
>           }
>           if (qemu_file_has_error(f)) {
>               return -EIO;
> diff --git a/vl.c b/vl.c
> index 9e9c176..a871895 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -3731,7 +3731,7 @@ int main(int argc, char **argv, char **envp)
>       if (qemu_opts_foreach(&qemu_drive_opts, drive_init_func, machine, 1) != 0)
>           exit(1);
>
> -    register_savevm_live("ram", 0, 3, NULL, ram_save_live, NULL,
> +    register_savevm_live("ram", 0, 4, NULL, ram_save_live, NULL,
>                            ram_load, NULL);
>
>       if (nb_numa_nodes>  0) {
>
>

Chris Wright June 8, 2010, 8:54 p.m. UTC | #2

* Alex Williamson (alex.williamson@redhat.com) wrote:
> @@ -257,7 +272,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>      ram_addr_t addr;
>      int flags;
>  
> -    if (version_id != 3) {
> +    if (version_id < 3) {
>          return -EINVAL;

Should we clamp to 3 and 4?

>      }
>  
> @@ -268,23 +283,89 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>          addr &= TARGET_PAGE_MASK;
>  
>          if (flags & RAM_SAVE_FLAG_MEM_SIZE) {

Does it simplify anything to simply add a new flag?

> -            if (addr != ram_bytes_total()) {
> -                return -EINVAL;
> +            if (version_id == 3) {
> +                if (addr != ram_bytes_total()) {
> +                    return -EINVAL;
> +                }
> +            } else {
> +                /* Synchronize RAM block list */
> +                char name[64];
> +                ram_addr_t length;
> +                ram_addr_t total_ram_bytes = addr;
> +
> +                while (total_ram_bytes) {
> +                    RAMBlock *block;
> +                    qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
> +                    length = qemu_get_be64(f);
> +
> +                    QLIST_FOREACH(block, &ram.blocks, next) {
> +                        if (!strncmp(name, block->name, sizeof(name))) {
> +                            if (block->length != length)
> +                                return -EINVAL;
> +                            break;
> +                        }
> +                    }
> +
> +                    if (!block) {
> +                        if (!qemu_ram_alloc(name, length))
> +                            return -ENOMEM;

Is there any usee to finding blocks in stream such that we simply allocate
them all dynamically?

thanks,
-chris

Alex Williamson June 8, 2010, 9:12 p.m. UTC | #3

On Tue, 2010-06-08 at 15:12 -0500, Anthony Liguori wrote:
> On 06/08/2010 02:16 PM, Alex Williamson wrote:
> >               if (is_dup_page(p, *p)) {
> > -                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
> > +                qemu_put_be64(f, offset | RAM_SAVE_FLAG_COMPRESS);
> > +                qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
> >                   qemu_put_byte(f, *p);
> >    
> 
> I think we could use some trickery like use another flag in 
> current_address to determine whether this was a different section from 
> the previous section and then only encode the section name if that's true.

Good suggestion, see patch 7/6 ;)

Alex

Alex Williamson June 8, 2010, 9:22 p.m. UTC | #4

On Tue, 2010-06-08 at 13:54 -0700, Chris Wright wrote:
> * Alex Williamson (alex.williamson@redhat.com) wrote:
> > @@ -257,7 +272,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
> >      ram_addr_t addr;
> >      int flags;
> >  
> > -    if (version_id != 3) {
> > +    if (version_id < 3) {
> >          return -EINVAL;
> 
> Should we clamp to 3 and 4?

Yep, definitely a good idea.

> >      }
> >  
> > @@ -268,23 +283,89 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
> >          addr &= TARGET_PAGE_MASK;
> >  
> >          if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
> 
> Does it simplify anything to simply add a new flag?

Not that I can see.  Appending it to this existing flag conveniently
lets the receiving side know when it's done since SUM(block->length)
equals MEM_SIZE.  Let me know if I'm missing an optimization or use case
you're thinking of.

> > +                    QLIST_FOREACH(block, &ram.blocks, next) {
> > +                        if (!strncmp(name, block->name, sizeof(name))) {
> > +                            if (block->length != length)
> > +                                return -EINVAL;
> > +                            break;
> > +                        }
> > +                    }
> > +
> > +                    if (!block) {
> > +                        if (!qemu_ram_alloc(name, length))
> > +                            return -ENOMEM;
> 
> Is there any use to finding blocks in stream such that we simply allocate
> them all dynamically?

Maybe.  It seems like we'd be doing a lot of reallocs as we go though.
I think we're pretty locked down once the migration starts, so nothing
should be changing on the source once we get started.  If that's the
case (someone correct me if it's not), sending the block list layout
first seems more efficient.  Thanks,

Alex

diff --git a/arch_init.c b/arch_init.c
index 6103461..8ba92ec 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -113,20 +113,29 @@  static int ram_save_block(QEMUFile *f)
 
     while (addr < total_ram) {
         if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
+            RAMBlock *block;
+            ram_addr_t offset;
             uint8_t *p;
 
             cpu_physical_memory_reset_dirty(current_addr,
                                             current_addr + TARGET_PAGE_SIZE,
                                             MIGRATION_DIRTY_FLAG);
 
-            p = qemu_get_ram_ptr(current_addr);
+            QLIST_FOREACH(block, &ram.blocks, next) {
+                if (current_addr - block->offset < block->length)
+                    break;
+            }
+            offset = current_addr - block->offset;
+            p = block->host + offset;
 
             if (is_dup_page(p, *p)) {
-                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
+                qemu_put_be64(f, offset | RAM_SAVE_FLAG_COMPRESS);
+                qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
                 qemu_put_byte(f, *p);
                 bytes_sent = 1;
             } else {
-                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
+                qemu_put_be64(f, offset | RAM_SAVE_FLAG_PAGE);
+                qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                 bytes_sent = TARGET_PAGE_SIZE;
             }
@@ -196,6 +205,7 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
     }
 
     if (stage == 1) {
+        RAMBlock *block;
         uint64_t total_ram = ram_bytes_total();
         bytes_transferred = 0;
 
@@ -210,6 +220,11 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         cpu_physical_memory_set_dirty_tracking(1);
 
         qemu_put_be64(f, total_ram | RAM_SAVE_FLAG_MEM_SIZE);
+
+        QLIST_FOREACH(block, &ram.blocks, next) {
+            qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
+            qemu_put_be64(f, block->length);
+        }
     }
 
     bytes_transferred_last = bytes_transferred;
@@ -257,7 +272,7 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
     ram_addr_t addr;
     int flags;
 
-    if (version_id != 3) {
+    if (version_id < 3) {
         return -EINVAL;
     }
 
@@ -268,23 +283,89 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
         addr &= TARGET_PAGE_MASK;
 
         if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
-            if (addr != ram_bytes_total()) {
-                return -EINVAL;
+            if (version_id == 3) {
+                if (addr != ram_bytes_total()) {
+                    return -EINVAL;
+                }
+            } else {
+                /* Synchronize RAM block list */
+                char name[64];
+                ram_addr_t length;
+                ram_addr_t total_ram_bytes = addr;
+
+                while (total_ram_bytes) {
+                    RAMBlock *block;
+                    qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
+                    length = qemu_get_be64(f);
+
+                    QLIST_FOREACH(block, &ram.blocks, next) {
+                        if (!strncmp(name, block->name, sizeof(name))) {
+                            if (block->length != length)
+                                return -EINVAL;
+                            break;
+                        }
+                    }
+
+                    if (!block) {
+                        if (!qemu_ram_alloc(name, length))
+                            return -ENOMEM;
+                    }
+
+                    total_ram_bytes -= length;
+                }
             }
         }
 
         if (flags & RAM_SAVE_FLAG_COMPRESS) {
-            uint8_t ch = qemu_get_byte(f);
-            memset(qemu_get_ram_ptr(addr), ch, TARGET_PAGE_SIZE);
+            void *host;
+            uint8_t ch;
+
+            if (version_id == 3) {
+                host = qemu_get_ram_ptr(addr);
+            } else {
+                RAMBlock *block;
+                char name[64];
+
+                qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
+
+                QLIST_FOREACH(block, &ram.blocks, next) {
+                    if (!strncmp(name, block->name, sizeof(name)))
+                        break;
+                }
+                if (!block)
+                    return -EINVAL;
+
+                host = block->host + addr;
+            }
+            ch = qemu_get_byte(f);
+            memset(host, ch, TARGET_PAGE_SIZE);
 #ifndef _WIN32
             if (ch == 0 &&
                 (!kvm_enabled() || kvm_has_sync_mmu())) {
-                madvise(qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE,
-                        MADV_DONTNEED);
+                madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED);
             }
 #endif
         } else if (flags & RAM_SAVE_FLAG_PAGE) {
-            qemu_get_buffer(f, qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE);
+            void *host;
+
+            if (version_id == 3) {
+                host = qemu_get_ram_ptr(addr);
+            } else {
+                RAMBlock *block;
+                char name[64];
+
+                qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
+
+                QLIST_FOREACH(block, &ram.blocks, next) {
+                    if (!strncmp(name, block->name, sizeof(name)))
+                        break;
+                }
+                if (!block)
+                    return -EINVAL;
+
+                host = block->host + addr;
+            }
+            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
         }
         if (qemu_file_has_error(f)) {
             return -EIO;
diff --git a/vl.c b/vl.c
index 9e9c176..a871895 100644
--- a/vl.c
+++ b/vl.c
@@ -3731,7 +3731,7 @@  int main(int argc, char **argv, char **envp)
     if (qemu_opts_foreach(&qemu_drive_opts, drive_init_func, machine, 1) != 0)
         exit(1);
 
-    register_savevm_live("ram", 0, 3, NULL, ram_save_live, NULL, 
+    register_savevm_live("ram", 0, 4, NULL, ram_save_live, NULL, 
                          ram_load, NULL);
 
     if (nb_numa_nodes > 0) {

[RFC,5/6] savevm: Migrate RAM based on name/offset

Commit Message

Comments

Patch