Message ID | 20100608191633.4451.59848.stgit@localhost.localdomain |
---|---|
State | New |
Headers | show |
On 06/08/2010 02:16 PM, Alex Williamson wrote: > Synchronize RAM blocks with the target and migrate using name/offset > pairs. This ensures both source and target have the same view of > RAM and that we get the right bits into the right slot. > > Signed-off-by: Alex Williamson<alex.williamson@redhat.com> > --- > > arch_init.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ > vl.c | 2 + > 2 files changed, 93 insertions(+), 12 deletions(-) > > diff --git a/arch_init.c b/arch_init.c > index 6103461..8ba92ec 100644 > --- a/arch_init.c > +++ b/arch_init.c > @@ -113,20 +113,29 @@ static int ram_save_block(QEMUFile *f) > > while (addr< total_ram) { > if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) { > + RAMBlock *block; > + ram_addr_t offset; > uint8_t *p; > > cpu_physical_memory_reset_dirty(current_addr, > current_addr + TARGET_PAGE_SIZE, > MIGRATION_DIRTY_FLAG); > > - p = qemu_get_ram_ptr(current_addr); > + QLIST_FOREACH(block,&ram.blocks, next) { > + if (current_addr - block->offset< block->length) > + break; > + } > + offset = current_addr - block->offset; > + p = block->host + offset; > > if (is_dup_page(p, *p)) { > - qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS); > + qemu_put_be64(f, offset | RAM_SAVE_FLAG_COMPRESS); > + qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name)); > qemu_put_byte(f, *p); > I think we could use some trickery like use another flag in current_address to determine whether this was a different section from the previous section and then only encode the section name if that's true. Regards, Anthony Liguori > bytes_sent = 1; > } else { > - qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE); > + qemu_put_be64(f, offset | RAM_SAVE_FLAG_PAGE); > + qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name)); > qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > bytes_sent = TARGET_PAGE_SIZE; > } > @@ -196,6 +205,7 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) > } > > if (stage == 1) { > + RAMBlock *block; > uint64_t total_ram = ram_bytes_total(); > bytes_transferred = 0; > > @@ -210,6 +220,11 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) > cpu_physical_memory_set_dirty_tracking(1); > > qemu_put_be64(f, total_ram | RAM_SAVE_FLAG_MEM_SIZE); > + > + QLIST_FOREACH(block,&ram.blocks, next) { > + qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name)); > + qemu_put_be64(f, block->length); > + } > } > > bytes_transferred_last = bytes_transferred; > @@ -257,7 +272,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > ram_addr_t addr; > int flags; > > - if (version_id != 3) { > + if (version_id< 3) { > return -EINVAL; > } > > @@ -268,23 +283,89 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > addr&= TARGET_PAGE_MASK; > > if (flags& RAM_SAVE_FLAG_MEM_SIZE) { > - if (addr != ram_bytes_total()) { > - return -EINVAL; > + if (version_id == 3) { > + if (addr != ram_bytes_total()) { > + return -EINVAL; > + } > + } else { > + /* Synchronize RAM block list */ > + char name[64]; > + ram_addr_t length; > + ram_addr_t total_ram_bytes = addr; > + > + while (total_ram_bytes) { > + RAMBlock *block; > + qemu_get_buffer(f, (uint8_t *)name, sizeof(name)); > + length = qemu_get_be64(f); > + > + QLIST_FOREACH(block,&ram.blocks, next) { > + if (!strncmp(name, block->name, sizeof(name))) { > + if (block->length != length) > + return -EINVAL; > + break; > + } > + } > + > + if (!block) { > + if (!qemu_ram_alloc(name, length)) > + return -ENOMEM; > + } > + > + total_ram_bytes -= length; > + } > } > } > > if (flags& RAM_SAVE_FLAG_COMPRESS) { > - uint8_t ch = qemu_get_byte(f); > - memset(qemu_get_ram_ptr(addr), ch, TARGET_PAGE_SIZE); > + void *host; > + uint8_t ch; > + > + if (version_id == 3) { > + host = qemu_get_ram_ptr(addr); > + } else { > + RAMBlock *block; > + char name[64]; > + > + qemu_get_buffer(f, (uint8_t *)name, sizeof(name)); > + > + QLIST_FOREACH(block,&ram.blocks, next) { > + if (!strncmp(name, block->name, sizeof(name))) > + break; > + } > + if (!block) > + return -EINVAL; > + > + host = block->host + addr; > + } > + ch = qemu_get_byte(f); > + memset(host, ch, TARGET_PAGE_SIZE); > #ifndef _WIN32 > if (ch == 0&& > (!kvm_enabled() || kvm_has_sync_mmu())) { > - madvise(qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE, > - MADV_DONTNEED); > + madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED); > } > #endif > } else if (flags& RAM_SAVE_FLAG_PAGE) { > - qemu_get_buffer(f, qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE); > + void *host; > + > + if (version_id == 3) { > + host = qemu_get_ram_ptr(addr); > + } else { > + RAMBlock *block; > + char name[64]; > + > + qemu_get_buffer(f, (uint8_t *)name, sizeof(name)); > + > + QLIST_FOREACH(block,&ram.blocks, next) { > + if (!strncmp(name, block->name, sizeof(name))) > + break; > + } > + if (!block) > + return -EINVAL; > + > + host = block->host + addr; > + } > + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); > } > if (qemu_file_has_error(f)) { > return -EIO; > diff --git a/vl.c b/vl.c > index 9e9c176..a871895 100644 > --- a/vl.c > +++ b/vl.c > @@ -3731,7 +3731,7 @@ int main(int argc, char **argv, char **envp) > if (qemu_opts_foreach(&qemu_drive_opts, drive_init_func, machine, 1) != 0) > exit(1); > > - register_savevm_live("ram", 0, 3, NULL, ram_save_live, NULL, > + register_savevm_live("ram", 0, 4, NULL, ram_save_live, NULL, > ram_load, NULL); > > if (nb_numa_nodes> 0) { > >
* Alex Williamson (alex.williamson@redhat.com) wrote: > @@ -257,7 +272,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > ram_addr_t addr; > int flags; > > - if (version_id != 3) { > + if (version_id < 3) { > return -EINVAL; Should we clamp to 3 and 4? > } > > @@ -268,23 +283,89 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > addr &= TARGET_PAGE_MASK; > > if (flags & RAM_SAVE_FLAG_MEM_SIZE) { Does it simplify anything to simply add a new flag? > - if (addr != ram_bytes_total()) { > - return -EINVAL; > + if (version_id == 3) { > + if (addr != ram_bytes_total()) { > + return -EINVAL; > + } > + } else { > + /* Synchronize RAM block list */ > + char name[64]; > + ram_addr_t length; > + ram_addr_t total_ram_bytes = addr; > + > + while (total_ram_bytes) { > + RAMBlock *block; > + qemu_get_buffer(f, (uint8_t *)name, sizeof(name)); > + length = qemu_get_be64(f); > + > + QLIST_FOREACH(block, &ram.blocks, next) { > + if (!strncmp(name, block->name, sizeof(name))) { > + if (block->length != length) > + return -EINVAL; > + break; > + } > + } > + > + if (!block) { > + if (!qemu_ram_alloc(name, length)) > + return -ENOMEM; Is there any usee to finding blocks in stream such that we simply allocate them all dynamically? thanks, -chris
On Tue, 2010-06-08 at 15:12 -0500, Anthony Liguori wrote: > On 06/08/2010 02:16 PM, Alex Williamson wrote: > > if (is_dup_page(p, *p)) { > > - qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS); > > + qemu_put_be64(f, offset | RAM_SAVE_FLAG_COMPRESS); > > + qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name)); > > qemu_put_byte(f, *p); > > > > I think we could use some trickery like use another flag in > current_address to determine whether this was a different section from > the previous section and then only encode the section name if that's true. Good suggestion, see patch 7/6 ;) Alex
On Tue, 2010-06-08 at 13:54 -0700, Chris Wright wrote: > * Alex Williamson (alex.williamson@redhat.com) wrote: > > @@ -257,7 +272,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > > ram_addr_t addr; > > int flags; > > > > - if (version_id != 3) { > > + if (version_id < 3) { > > return -EINVAL; > > Should we clamp to 3 and 4? Yep, definitely a good idea. > > } > > > > @@ -268,23 +283,89 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > > addr &= TARGET_PAGE_MASK; > > > > if (flags & RAM_SAVE_FLAG_MEM_SIZE) { > > Does it simplify anything to simply add a new flag? Not that I can see. Appending it to this existing flag conveniently lets the receiving side know when it's done since SUM(block->length) equals MEM_SIZE. Let me know if I'm missing an optimization or use case you're thinking of. > > + QLIST_FOREACH(block, &ram.blocks, next) { > > + if (!strncmp(name, block->name, sizeof(name))) { > > + if (block->length != length) > > + return -EINVAL; > > + break; > > + } > > + } > > + > > + if (!block) { > > + if (!qemu_ram_alloc(name, length)) > > + return -ENOMEM; > > Is there any use to finding blocks in stream such that we simply allocate > them all dynamically? Maybe. It seems like we'd be doing a lot of reallocs as we go though. I think we're pretty locked down once the migration starts, so nothing should be changing on the source once we get started. If that's the case (someone correct me if it's not), sending the block list layout first seems more efficient. Thanks, Alex
diff --git a/arch_init.c b/arch_init.c index 6103461..8ba92ec 100644 --- a/arch_init.c +++ b/arch_init.c @@ -113,20 +113,29 @@ static int ram_save_block(QEMUFile *f) while (addr < total_ram) { if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) { + RAMBlock *block; + ram_addr_t offset; uint8_t *p; cpu_physical_memory_reset_dirty(current_addr, current_addr + TARGET_PAGE_SIZE, MIGRATION_DIRTY_FLAG); - p = qemu_get_ram_ptr(current_addr); + QLIST_FOREACH(block, &ram.blocks, next) { + if (current_addr - block->offset < block->length) + break; + } + offset = current_addr - block->offset; + p = block->host + offset; if (is_dup_page(p, *p)) { - qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS); + qemu_put_be64(f, offset | RAM_SAVE_FLAG_COMPRESS); + qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name)); qemu_put_byte(f, *p); bytes_sent = 1; } else { - qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE); + qemu_put_be64(f, offset | RAM_SAVE_FLAG_PAGE); + qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name)); qemu_put_buffer(f, p, TARGET_PAGE_SIZE); bytes_sent = TARGET_PAGE_SIZE; } @@ -196,6 +205,7 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) } if (stage == 1) { + RAMBlock *block; uint64_t total_ram = ram_bytes_total(); bytes_transferred = 0; @@ -210,6 +220,11 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) cpu_physical_memory_set_dirty_tracking(1); qemu_put_be64(f, total_ram | RAM_SAVE_FLAG_MEM_SIZE); + + QLIST_FOREACH(block, &ram.blocks, next) { + qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name)); + qemu_put_be64(f, block->length); + } } bytes_transferred_last = bytes_transferred; @@ -257,7 +272,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) ram_addr_t addr; int flags; - if (version_id != 3) { + if (version_id < 3) { return -EINVAL; } @@ -268,23 +283,89 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) addr &= TARGET_PAGE_MASK; if (flags & RAM_SAVE_FLAG_MEM_SIZE) { - if (addr != ram_bytes_total()) { - return -EINVAL; + if (version_id == 3) { + if (addr != ram_bytes_total()) { + return -EINVAL; + } + } else { + /* Synchronize RAM block list */ + char name[64]; + ram_addr_t length; + ram_addr_t total_ram_bytes = addr; + + while (total_ram_bytes) { + RAMBlock *block; + qemu_get_buffer(f, (uint8_t *)name, sizeof(name)); + length = qemu_get_be64(f); + + QLIST_FOREACH(block, &ram.blocks, next) { + if (!strncmp(name, block->name, sizeof(name))) { + if (block->length != length) + return -EINVAL; + break; + } + } + + if (!block) { + if (!qemu_ram_alloc(name, length)) + return -ENOMEM; + } + + total_ram_bytes -= length; + } } } if (flags & RAM_SAVE_FLAG_COMPRESS) { - uint8_t ch = qemu_get_byte(f); - memset(qemu_get_ram_ptr(addr), ch, TARGET_PAGE_SIZE); + void *host; + uint8_t ch; + + if (version_id == 3) { + host = qemu_get_ram_ptr(addr); + } else { + RAMBlock *block; + char name[64]; + + qemu_get_buffer(f, (uint8_t *)name, sizeof(name)); + + QLIST_FOREACH(block, &ram.blocks, next) { + if (!strncmp(name, block->name, sizeof(name))) + break; + } + if (!block) + return -EINVAL; + + host = block->host + addr; + } + ch = qemu_get_byte(f); + memset(host, ch, TARGET_PAGE_SIZE); #ifndef _WIN32 if (ch == 0 && (!kvm_enabled() || kvm_has_sync_mmu())) { - madvise(qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE, - MADV_DONTNEED); + madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED); } #endif } else if (flags & RAM_SAVE_FLAG_PAGE) { - qemu_get_buffer(f, qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE); + void *host; + + if (version_id == 3) { + host = qemu_get_ram_ptr(addr); + } else { + RAMBlock *block; + char name[64]; + + qemu_get_buffer(f, (uint8_t *)name, sizeof(name)); + + QLIST_FOREACH(block, &ram.blocks, next) { + if (!strncmp(name, block->name, sizeof(name))) + break; + } + if (!block) + return -EINVAL; + + host = block->host + addr; + } + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); } if (qemu_file_has_error(f)) { return -EIO; diff --git a/vl.c b/vl.c index 9e9c176..a871895 100644 --- a/vl.c +++ b/vl.c @@ -3731,7 +3731,7 @@ int main(int argc, char **argv, char **envp) if (qemu_opts_foreach(&qemu_drive_opts, drive_init_func, machine, 1) != 0) exit(1); - register_savevm_live("ram", 0, 3, NULL, ram_save_live, NULL, + register_savevm_live("ram", 0, 4, NULL, ram_save_live, NULL, ram_load, NULL); if (nb_numa_nodes > 0) {
Synchronize RAM blocks with the target and migrate using name/offset pairs. This ensures both source and target have the same view of RAM and that we get the right bits into the right slot. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> --- arch_init.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ vl.c | 2 + 2 files changed, 93 insertions(+), 12 deletions(-)