Message ID | 20130307133248.GA27676@redhat.com |
---|---|
State | New |
Headers | show |
Il 07/03/2013 14:32, Michael S. Tsirkin ha scritto: > +#ifdef DEBUG_ARCH_INIT > +#define DPRINTF(fmt, ...) \ > + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) These need to be adjusted, but it can be a follow-up. Paolo > +#else > +#define DPRINTF(fmt, ...) \ > + do { } while (0) > +#endif > + > +/***********************************************************/ > +/* ram save/restore */ > + > +#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ > +#define RAM_SAVE_FLAG_COMPRESS 0x02 > +#define RAM_SAVE_FLAG_MEM_SIZE 0x04 > +#define RAM_SAVE_FLAG_PAGE 0x08 > +#define RAM_SAVE_FLAG_EOS 0x10 > +#define RAM_SAVE_FLAG_CONTINUE 0x20 > +#define RAM_SAVE_FLAG_XBZRLE 0x40 > + > +#ifdef __ALTIVEC__ > +#include <altivec.h> > +#define VECTYPE vector unsigned char > +#define SPLAT(p) vec_splat(vec_ld(0, p), 0) > +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) > +/* altivec.h may redefine the bool macro as vector type. > + * Reset it to POSIX semantics. */ > +#undef bool > +#define bool _Bool > +#elif defined __SSE2__ > +#include <emmintrin.h> > +#define VECTYPE __m128i > +#define SPLAT(p) _mm_set1_epi8(*(p)) > +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) > +#else > +#define VECTYPE unsigned long > +#define SPLAT(p) (*(p) * (~0UL / 255)) > +#define ALL_EQ(v1, v2) ((v1) == (v2)) > +#endif > + > +static int is_dup_page(uint8_t *page) > +{ > + VECTYPE *p = (VECTYPE *)page; > + VECTYPE val = SPLAT(page); > + int i; > + > + for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) { > + if (!ALL_EQ(val, p[i])) { > + return 0; > + } > + } > + > + return 1; > +} > + > +/* struct contains XBZRLE cache and a static page > + used by the compression */ > +static struct { > + /* buffer used for XBZRLE encoding */ > + uint8_t *encoded_buf; > + /* buffer for storing page content */ > + uint8_t *current_buf; > + /* buffer used for XBZRLE decoding */ > + uint8_t *decoded_buf; > + /* Cache for XBZRLE */ > + PageCache *cache; > +} XBZRLE = { > + .encoded_buf = NULL, > + .current_buf = NULL, > + .decoded_buf = NULL, > + .cache = NULL, > +}; > + > + > +int64_t xbzrle_cache_resize(int64_t new_size) > +{ > + if (XBZRLE.cache != NULL) { > + return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * > + TARGET_PAGE_SIZE; > + } > + return pow2floor(new_size); > +} > + > +/* accounting for migration statistics */ > +typedef struct AccountingInfo { > + uint64_t dup_pages; > + uint64_t norm_pages; > + uint64_t iterations; > + uint64_t xbzrle_bytes; > + uint64_t xbzrle_pages; > + uint64_t xbzrle_cache_miss; > + uint64_t xbzrle_overflows; > +} AccountingInfo; > + > +static AccountingInfo acct_info; > + > +static void acct_clear(void) > +{ > + memset(&acct_info, 0, sizeof(acct_info)); > +} > + > +uint64_t dup_mig_bytes_transferred(void) > +{ > + return acct_info.dup_pages * TARGET_PAGE_SIZE; > +} > + > +uint64_t dup_mig_pages_transferred(void) > +{ > + return acct_info.dup_pages; > +} > + > +uint64_t norm_mig_bytes_transferred(void) > +{ > + return acct_info.norm_pages * TARGET_PAGE_SIZE; > +} > + > +uint64_t norm_mig_pages_transferred(void) > +{ > + return acct_info.norm_pages; > +} > + > +uint64_t xbzrle_mig_bytes_transferred(void) > +{ > + return acct_info.xbzrle_bytes; > +} > + > +uint64_t xbzrle_mig_pages_transferred(void) > +{ > + return acct_info.xbzrle_pages; > +} > + > +uint64_t xbzrle_mig_pages_cache_miss(void) > +{ > + return acct_info.xbzrle_cache_miss; > +} > + > +uint64_t xbzrle_mig_pages_overflow(void) > +{ > + return acct_info.xbzrle_overflows; > +} > + > +static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, > + int cont, int flag) > +{ > + size_t size; > + > + qemu_put_be64(f, offset | cont | flag); > + size = 8; > + > + if (!cont) { > + qemu_put_byte(f, strlen(block->idstr)); > + qemu_put_buffer(f, (uint8_t *)block->idstr, > + strlen(block->idstr)); > + size += 1 + strlen(block->idstr); > + } > + return size; > +} > + > +#define ENCODING_FLAG_XBZRLE 0x1 > + > +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, > + ram_addr_t current_addr, RAMBlock *block, > + ram_addr_t offset, int cont, bool last_stage) > +{ > + int encoded_len = 0, bytes_sent = -1; > + uint8_t *prev_cached_page; > + > + if (!cache_is_cached(XBZRLE.cache, current_addr)) { > + if (!last_stage) { > + cache_insert(XBZRLE.cache, current_addr, current_data); > + } > + acct_info.xbzrle_cache_miss++; > + return -1; > + } > + > + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); > + > + /* save current buffer into memory */ > + memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); > + > + /* XBZRLE encoding (if there is no overflow) */ > + encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, > + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, > + TARGET_PAGE_SIZE); > + if (encoded_len == 0) { > + DPRINTF("Skipping unmodified page\n"); > + return 0; > + } else if (encoded_len == -1) { > + DPRINTF("Overflow\n"); > + acct_info.xbzrle_overflows++; > + /* update data in the cache */ > + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); > + return -1; > + } > + > + /* we need to update the data in the cache, in order to get the same data */ > + if (!last_stage) { > + memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); > + } > + > + /* Send XBZRLE based compressed page */ > + bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); > + qemu_put_byte(f, ENCODING_FLAG_XBZRLE); > + qemu_put_be16(f, encoded_len); > + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); > + bytes_sent += encoded_len + 1 + 2; > + acct_info.xbzrle_pages++; > + acct_info.xbzrle_bytes += bytes_sent; > + > + return bytes_sent; > +} > + > + > +/* This is the last block that we have visited serching for dirty pages > + */ > +static RAMBlock *last_seen_block; > +/* This is the last block from where we have sent data */ > +static RAMBlock *last_sent_block; > +static ram_addr_t last_offset; > +static unsigned long *migration_bitmap; > +static uint64_t migration_dirty_pages; > +static uint32_t last_version; > + > +static inline > +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, > + ram_addr_t start) > +{ > + unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; > + unsigned long nr = base + (start >> TARGET_PAGE_BITS); > + unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS); > + > + unsigned long next = find_next_bit(migration_bitmap, size, nr); > + > + if (next < size) { > + clear_bit(next, migration_bitmap); > + migration_dirty_pages--; > + } > + return (next - base) << TARGET_PAGE_BITS; > +} > + > +static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, > + ram_addr_t offset) > +{ > + bool ret; > + int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; > + > + ret = test_and_set_bit(nr, migration_bitmap); > + > + if (!ret) { > + migration_dirty_pages++; > + } > + return ret; > +} > + > +/* Needs iothread lock! */ > + > +static void migration_bitmap_sync(void) > +{ > + RAMBlock *block; > + ram_addr_t addr; > + uint64_t num_dirty_pages_init = migration_dirty_pages; > + MigrationState *s = migrate_get_current(); > + static int64_t start_time; > + static int64_t num_dirty_pages_period; > + int64_t end_time; > + > + if (!start_time) { > + start_time = qemu_get_clock_ms(rt_clock); > + } > + > + trace_migration_bitmap_sync_start(); > + memory_global_sync_dirty_bitmap(get_system_memory()); > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { > + if (memory_region_test_and_clear_dirty(block->mr, > + addr, TARGET_PAGE_SIZE, > + DIRTY_MEMORY_MIGRATION)) { > + migration_bitmap_set_dirty(block->mr, addr); > + } > + } > + } > + trace_migration_bitmap_sync_end(migration_dirty_pages > + - num_dirty_pages_init); > + num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; > + end_time = qemu_get_clock_ms(rt_clock); > + > + /* more than 1 second = 1000 millisecons */ > + if (end_time > start_time + 1000) { > + s->dirty_pages_rate = num_dirty_pages_period * 1000 > + / (end_time - start_time); > + s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; > + start_time = end_time; > + num_dirty_pages_period = 0; > + } > +} > + > +/* > + * ram_save_block: Writes a page of memory to the stream f > + * > + * Returns: The number of bytes written. > + * 0 means no dirty pages > + */ > + > +static int ram_save_block(QEMUFile *f, bool last_stage) > +{ > + RAMBlock *block = last_seen_block; > + ram_addr_t offset = last_offset; > + bool complete_round = false; > + int bytes_sent = 0; > + MemoryRegion *mr; > + ram_addr_t current_addr; > + > + if (!block) > + block = QTAILQ_FIRST(&ram_list.blocks); > + > + while (true) { > + mr = block->mr; > + offset = migration_bitmap_find_and_reset_dirty(mr, offset); > + if (complete_round && block == last_seen_block && > + offset >= last_offset) { > + break; > + } > + if (offset >= block->length) { > + offset = 0; > + block = QTAILQ_NEXT(block, next); > + if (!block) { > + block = QTAILQ_FIRST(&ram_list.blocks); > + complete_round = true; > + } > + } else { > + uint8_t *p; > + int cont = (block == last_sent_block) ? > + RAM_SAVE_FLAG_CONTINUE : 0; > + > + p = memory_region_get_ram_ptr(mr) + offset; > + > + /* In doubt sent page as normal */ > + bytes_sent = -1; > + if (is_dup_page(p)) { > + acct_info.dup_pages++; > + bytes_sent = save_block_hdr(f, block, offset, cont, > + RAM_SAVE_FLAG_COMPRESS); > + qemu_put_byte(f, *p); > + bytes_sent += 1; > + } else if (migrate_use_xbzrle()) { > + current_addr = block->offset + offset; > + bytes_sent = save_xbzrle_page(f, p, current_addr, block, > + offset, cont, last_stage); > + if (!last_stage) { > + p = get_cached_data(XBZRLE.cache, current_addr); > + } > + } > + > + /* XBZRLE overflow or normal page */ > + if (bytes_sent == -1) { > + bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); > + qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > + bytes_sent += TARGET_PAGE_SIZE; > + acct_info.norm_pages++; > + } > + > + /* if page is unmodified, continue to the next */ > + if (bytes_sent > 0) { > + last_sent_block = block; > + break; > + } > + } > + } > + last_seen_block = block; > + last_offset = offset; > + > + return bytes_sent; > +} > + > +static uint64_t bytes_transferred; > + > +static ram_addr_t ram_save_remaining(void) > +{ > + return migration_dirty_pages; > +} > + > +uint64_t ram_bytes_remaining(void) > +{ > + return ram_save_remaining() * TARGET_PAGE_SIZE; > +} > + > +uint64_t ram_bytes_transferred(void) > +{ > + return bytes_transferred; > +} > + > +uint64_t ram_bytes_total(void) > +{ > + RAMBlock *block; > + uint64_t total = 0; > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) > + total += block->length; > + > + return total; > +} > + > +static void migration_end(void) > +{ > + if (migration_bitmap) { > + memory_global_dirty_log_stop(); > + g_free(migration_bitmap); > + migration_bitmap = NULL; > + } > + > + if (XBZRLE.cache) { > + cache_fini(XBZRLE.cache); > + g_free(XBZRLE.cache); > + g_free(XBZRLE.encoded_buf); > + g_free(XBZRLE.current_buf); > + g_free(XBZRLE.decoded_buf); > + XBZRLE.cache = NULL; > + } > +} > + > +static void ram_migration_cancel(void *opaque) > +{ > + migration_end(); > +} > + > +static void reset_ram_globals(void) > +{ > + last_seen_block = NULL; > + last_sent_block = NULL; > + last_offset = 0; > + last_version = ram_list.version; > +} > + > +#define MAX_WAIT 50 /* ms, half buffered_file limit */ > + > +static int ram_save_setup(QEMUFile *f, void *opaque) > +{ > + RAMBlock *block; > + int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; > + > + migration_bitmap = bitmap_new(ram_pages); > + bitmap_set(migration_bitmap, 0, ram_pages); > + migration_dirty_pages = ram_pages; > + > + if (migrate_use_xbzrle()) { > + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / > + TARGET_PAGE_SIZE, > + TARGET_PAGE_SIZE); > + if (!XBZRLE.cache) { > + DPRINTF("Error creating cache\n"); > + return -1; > + } > + XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); > + XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); > + acct_clear(); > + } > + > + qemu_mutex_lock_iothread(); > + qemu_mutex_lock_ramlist(); > + bytes_transferred = 0; > + reset_ram_globals(); > + > + memory_global_dirty_log_start(); > + migration_bitmap_sync(); > + qemu_mutex_unlock_iothread(); > + > + qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + qemu_put_byte(f, strlen(block->idstr)); > + qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); > + qemu_put_be64(f, block->length); > + } > + > + qemu_mutex_unlock_ramlist(); > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > + > + return 0; > +} > + > +static int ram_save_iterate(QEMUFile *f, void *opaque) > +{ > + int ret; > + int i; > + int64_t t0; > + int total_sent = 0; > + > + qemu_mutex_lock_ramlist(); > + > + if (ram_list.version != last_version) { > + reset_ram_globals(); > + } > + > + t0 = qemu_get_clock_ns(rt_clock); > + i = 0; > + while ((ret = qemu_file_rate_limit(f)) == 0) { > + int bytes_sent; > + > + bytes_sent = ram_save_block(f, false); > + /* no more blocks to sent */ > + if (bytes_sent == 0) { > + break; > + } > + total_sent += bytes_sent; > + acct_info.iterations++; > + /* we want to check in the 1st loop, just in case it was the 1st time > + and we had to sync the dirty bitmap. > + qemu_get_clock_ns() is a bit expensive, so we only check each some > + iterations > + */ > + if ((i & 63) == 0) { > + uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000; > + if (t1 > MAX_WAIT) { > + DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", > + t1, i); > + break; > + } > + } > + i++; > + } > + > + qemu_mutex_unlock_ramlist(); > + > + if (ret < 0) { > + bytes_transferred += total_sent; > + return ret; > + } > + > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > + total_sent += 8; > + bytes_transferred += total_sent; > + > + return total_sent; > +} > + > +static int ram_save_complete(QEMUFile *f, void *opaque) > +{ > + qemu_mutex_lock_ramlist(); > + migration_bitmap_sync(); > + > + /* try transferring iterative blocks of memory */ > + > + /* flush all remaining blocks regardless of rate limiting */ > + while (true) { > + int bytes_sent; > + > + bytes_sent = ram_save_block(f, true); > + /* no more blocks to sent */ > + if (bytes_sent == 0) { > + break; > + } > + bytes_transferred += bytes_sent; > + } > + migration_end(); > + > + qemu_mutex_unlock_ramlist(); > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > + > + return 0; > +} > + > +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) > +{ > + uint64_t remaining_size; > + > + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; > + > + if (remaining_size < max_size) { > + qemu_mutex_lock_iothread(); > + migration_bitmap_sync(); > + qemu_mutex_unlock_iothread(); > + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; > + } > + return remaining_size; > +} > + > +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) > +{ > + int ret, rc = 0; > + unsigned int xh_len; > + int xh_flags; > + > + if (!XBZRLE.decoded_buf) { > + XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); > + } > + > + /* extract RLE header */ > + xh_flags = qemu_get_byte(f); > + xh_len = qemu_get_be16(f); > + > + if (xh_flags != ENCODING_FLAG_XBZRLE) { > + fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); > + return -1; > + } > + > + if (xh_len > TARGET_PAGE_SIZE) { > + fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); > + return -1; > + } > + /* load data and decode */ > + qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); > + > + /* decode RLE */ > + ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, > + TARGET_PAGE_SIZE); > + if (ret == -1) { > + fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); > + rc = -1; > + } else if (ret > TARGET_PAGE_SIZE) { > + fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", > + ret, TARGET_PAGE_SIZE); > + abort(); > + } > + > + return rc; > +} > + > +static inline void *host_from_stream_offset(QEMUFile *f, > + ram_addr_t offset, > + int flags) > +{ > + static RAMBlock *block = NULL; > + char id[256]; > + uint8_t len; > + > + if (flags & RAM_SAVE_FLAG_CONTINUE) { > + if (!block) { > + fprintf(stderr, "Ack, bad migration stream!\n"); > + return NULL; > + } > + > + return memory_region_get_ram_ptr(block->mr) + offset; > + } > + > + len = qemu_get_byte(f); > + qemu_get_buffer(f, (uint8_t *)id, len); > + id[len] = 0; > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + if (!strncmp(id, block->idstr, sizeof(id))) > + return memory_region_get_ram_ptr(block->mr) + offset; > + } > + > + fprintf(stderr, "Can't find block %s!\n", id); > + return NULL; > +} > + > +static int ram_load(QEMUFile *f, void *opaque, int version_id) > +{ > + ram_addr_t addr; > + int flags, ret = 0; > + int error; > + static uint64_t seq_iter; > + > + seq_iter++; > + > + if (version_id < 4 || version_id > 4) { > + return -EINVAL; > + } > + > + do { > + addr = qemu_get_be64(f); > + > + flags = addr & ~TARGET_PAGE_MASK; > + addr &= TARGET_PAGE_MASK; > + > + if (flags & RAM_SAVE_FLAG_MEM_SIZE) { > + if (version_id == 4) { > + /* Synchronize RAM block list */ > + char id[256]; > + ram_addr_t length; > + ram_addr_t total_ram_bytes = addr; > + > + while (total_ram_bytes) { > + RAMBlock *block; > + uint8_t len; > + > + len = qemu_get_byte(f); > + qemu_get_buffer(f, (uint8_t *)id, len); > + id[len] = 0; > + length = qemu_get_be64(f); > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + if (!strncmp(id, block->idstr, sizeof(id))) { > + if (block->length != length) { > + ret = -EINVAL; > + goto done; > + } > + break; > + } > + } > + > + if (!block) { > + fprintf(stderr, "Unknown ramblock \"%s\", cannot " > + "accept migration\n", id); > + ret = -EINVAL; > + goto done; > + } > + > + total_ram_bytes -= length; > + } > + } > + } > + > + if (flags & RAM_SAVE_FLAG_COMPRESS) { > + void *host; > + uint8_t ch; > + > + host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + return -EINVAL; > + } > + > + ch = qemu_get_byte(f); > + memset(host, ch, TARGET_PAGE_SIZE); > +#ifndef _WIN32 > + if (ch == 0 && > + (!kvm_enabled() || kvm_has_sync_mmu()) && > + getpagesize() <= TARGET_PAGE_SIZE) { > + qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); > + } > +#endif > + } else if (flags & RAM_SAVE_FLAG_PAGE) { > + void *host; > + > + host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + return -EINVAL; > + } > + > + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); > + } else if (flags & RAM_SAVE_FLAG_XBZRLE) { > + void *host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + return -EINVAL; > + } > + > + if (load_xbzrle(f, addr, host) < 0) { > + ret = -EINVAL; > + goto done; > + } > + } > + error = qemu_file_get_error(f); > + if (error) { > + ret = error; > + goto done; > + } > + } while (!(flags & RAM_SAVE_FLAG_EOS)); > + > +done: > + DPRINTF("Completed load of VM with exit code %d seq iteration " > + "%" PRIu64 "\n", ret, seq_iter); > + return ret; > +} > + > +SaveVMHandlers savevm_ram_handlers = { > + .save_live_setup = ram_save_setup, > + .save_live_iterate = ram_save_iterate, > + .save_live_complete = ram_save_complete, > + .save_live_pending = ram_save_pending, > + .load_state = ram_load, > + .cancel = ram_migration_cancel, > +}; >
Il 07/03/2013 14:32, Michael S. Tsirkin ha scritto: > Move RAM migration code from arch_init to savevm-ram. > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > --- > > Note: this is on top of Juan's pull request > > Changes from v1: > - renamed source file, rebased on top of migration.next as > suggested by Paolo The output of diff -u <(sed -n 's/^-//p' foo ) <(sed -n 's/^+//p' foo ) is trivial. Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> > Makefile.target | 2 +- > arch_init.c | 763 ----------------------------------------------------- > savevm-ram.c | 804 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 805 insertions(+), 764 deletions(-) > create mode 100644 savevm-ram.c > > diff --git a/Makefile.target b/Makefile.target > index ca657b3..54bc21b 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -108,7 +108,7 @@ CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y) > CONFIG_NO_GET_MEMORY_MAPPING = $(if $(subst n,,$(CONFIG_HAVE_GET_MEMORY_MAPPING)),n,y) > CONFIG_NO_CORE_DUMP = $(if $(subst n,,$(CONFIG_HAVE_CORE_DUMP)),n,y) > > -obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o > +obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o savevm-ram.o > obj-y += qtest.o > obj-y += hw/ > obj-$(CONFIG_KVM) += kvm-all.o > diff --git a/arch_init.c b/arch_init.c > index 98e2bc6..9943ed4 100644 > --- a/arch_init.c > +++ b/arch_init.c > @@ -31,20 +31,15 @@ > #include "config.h" > #include "monitor/monitor.h" > #include "sysemu/sysemu.h" > -#include "qemu/bitops.h" > -#include "qemu/bitmap.h" > #include "sysemu/arch_init.h" > #include "audio/audio.h" > #include "hw/pc.h" > #include "hw/pci/pci.h" > #include "hw/audiodev.h" > #include "sysemu/kvm.h" > -#include "migration/migration.h" > #include "exec/gdbstub.h" > #include "hw/smbios.h" > -#include "exec/address-spaces.h" > #include "hw/pcspk.h" > -#include "migration/page_cache.h" > #include "qemu/config-file.h" > #include "qmp-commands.h" > #include "trace.h" > @@ -103,38 +98,6 @@ int graphic_depth = 15; > > const uint32_t arch_type = QEMU_ARCH; > > -/***********************************************************/ > -/* ram save/restore */ > - > -#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ > -#define RAM_SAVE_FLAG_COMPRESS 0x02 > -#define RAM_SAVE_FLAG_MEM_SIZE 0x04 > -#define RAM_SAVE_FLAG_PAGE 0x08 > -#define RAM_SAVE_FLAG_EOS 0x10 > -#define RAM_SAVE_FLAG_CONTINUE 0x20 > -#define RAM_SAVE_FLAG_XBZRLE 0x40 > - > -#ifdef __ALTIVEC__ > -#include <altivec.h> > -#define VECTYPE vector unsigned char > -#define SPLAT(p) vec_splat(vec_ld(0, p), 0) > -#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) > -/* altivec.h may redefine the bool macro as vector type. > - * Reset it to POSIX semantics. */ > -#undef bool > -#define bool _Bool > -#elif defined __SSE2__ > -#include <emmintrin.h> > -#define VECTYPE __m128i > -#define SPLAT(p) _mm_set1_epi8(*(p)) > -#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) > -#else > -#define VECTYPE unsigned long > -#define SPLAT(p) (*(p) * (~0UL / 255)) > -#define ALL_EQ(v1, v2) ((v1) == (v2)) > -#endif > - > - > static struct defconfig_file { > const char *filename; > /* Indicates it is an user config file (disabled by -no-user-config) */ > @@ -145,7 +108,6 @@ static struct defconfig_file { > { NULL }, /* end of list */ > }; > > - > int qemu_read_default_config_files(bool userconfig) > { > int ret; > @@ -164,731 +126,6 @@ int qemu_read_default_config_files(bool userconfig) > return 0; > } > > -static int is_dup_page(uint8_t *page) > -{ > - VECTYPE *p = (VECTYPE *)page; > - VECTYPE val = SPLAT(page); > - int i; > - > - for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) { > - if (!ALL_EQ(val, p[i])) { > - return 0; > - } > - } > - > - return 1; > -} > - > -/* struct contains XBZRLE cache and a static page > - used by the compression */ > -static struct { > - /* buffer used for XBZRLE encoding */ > - uint8_t *encoded_buf; > - /* buffer for storing page content */ > - uint8_t *current_buf; > - /* buffer used for XBZRLE decoding */ > - uint8_t *decoded_buf; > - /* Cache for XBZRLE */ > - PageCache *cache; > -} XBZRLE = { > - .encoded_buf = NULL, > - .current_buf = NULL, > - .decoded_buf = NULL, > - .cache = NULL, > -}; > - > - > -int64_t xbzrle_cache_resize(int64_t new_size) > -{ > - if (XBZRLE.cache != NULL) { > - return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * > - TARGET_PAGE_SIZE; > - } > - return pow2floor(new_size); > -} > - > -/* accounting for migration statistics */ > -typedef struct AccountingInfo { > - uint64_t dup_pages; > - uint64_t norm_pages; > - uint64_t iterations; > - uint64_t xbzrle_bytes; > - uint64_t xbzrle_pages; > - uint64_t xbzrle_cache_miss; > - uint64_t xbzrle_overflows; > -} AccountingInfo; > - > -static AccountingInfo acct_info; > - > -static void acct_clear(void) > -{ > - memset(&acct_info, 0, sizeof(acct_info)); > -} > - > -uint64_t dup_mig_bytes_transferred(void) > -{ > - return acct_info.dup_pages * TARGET_PAGE_SIZE; > -} > - > -uint64_t dup_mig_pages_transferred(void) > -{ > - return acct_info.dup_pages; > -} > - > -uint64_t norm_mig_bytes_transferred(void) > -{ > - return acct_info.norm_pages * TARGET_PAGE_SIZE; > -} > - > -uint64_t norm_mig_pages_transferred(void) > -{ > - return acct_info.norm_pages; > -} > - > -uint64_t xbzrle_mig_bytes_transferred(void) > -{ > - return acct_info.xbzrle_bytes; > -} > - > -uint64_t xbzrle_mig_pages_transferred(void) > -{ > - return acct_info.xbzrle_pages; > -} > - > -uint64_t xbzrle_mig_pages_cache_miss(void) > -{ > - return acct_info.xbzrle_cache_miss; > -} > - > -uint64_t xbzrle_mig_pages_overflow(void) > -{ > - return acct_info.xbzrle_overflows; > -} > - > -static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, > - int cont, int flag) > -{ > - size_t size; > - > - qemu_put_be64(f, offset | cont | flag); > - size = 8; > - > - if (!cont) { > - qemu_put_byte(f, strlen(block->idstr)); > - qemu_put_buffer(f, (uint8_t *)block->idstr, > - strlen(block->idstr)); > - size += 1 + strlen(block->idstr); > - } > - return size; > -} > - > -#define ENCODING_FLAG_XBZRLE 0x1 > - > -static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, > - ram_addr_t current_addr, RAMBlock *block, > - ram_addr_t offset, int cont, bool last_stage) > -{ > - int encoded_len = 0, bytes_sent = -1; > - uint8_t *prev_cached_page; > - > - if (!cache_is_cached(XBZRLE.cache, current_addr)) { > - if (!last_stage) { > - cache_insert(XBZRLE.cache, current_addr, current_data); > - } > - acct_info.xbzrle_cache_miss++; > - return -1; > - } > - > - prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); > - > - /* save current buffer into memory */ > - memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); > - > - /* XBZRLE encoding (if there is no overflow) */ > - encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, > - TARGET_PAGE_SIZE, XBZRLE.encoded_buf, > - TARGET_PAGE_SIZE); > - if (encoded_len == 0) { > - DPRINTF("Skipping unmodified page\n"); > - return 0; > - } else if (encoded_len == -1) { > - DPRINTF("Overflow\n"); > - acct_info.xbzrle_overflows++; > - /* update data in the cache */ > - memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); > - return -1; > - } > - > - /* we need to update the data in the cache, in order to get the same data */ > - if (!last_stage) { > - memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); > - } > - > - /* Send XBZRLE based compressed page */ > - bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); > - qemu_put_byte(f, ENCODING_FLAG_XBZRLE); > - qemu_put_be16(f, encoded_len); > - qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); > - bytes_sent += encoded_len + 1 + 2; > - acct_info.xbzrle_pages++; > - acct_info.xbzrle_bytes += bytes_sent; > - > - return bytes_sent; > -} > - > - > -/* This is the last block that we have visited serching for dirty pages > - */ > -static RAMBlock *last_seen_block; > -/* This is the last block from where we have sent data */ > -static RAMBlock *last_sent_block; > -static ram_addr_t last_offset; > -static unsigned long *migration_bitmap; > -static uint64_t migration_dirty_pages; > -static uint32_t last_version; > - > -static inline > -ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, > - ram_addr_t start) > -{ > - unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; > - unsigned long nr = base + (start >> TARGET_PAGE_BITS); > - unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS); > - > - unsigned long next = find_next_bit(migration_bitmap, size, nr); > - > - if (next < size) { > - clear_bit(next, migration_bitmap); > - migration_dirty_pages--; > - } > - return (next - base) << TARGET_PAGE_BITS; > -} > - > -static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, > - ram_addr_t offset) > -{ > - bool ret; > - int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; > - > - ret = test_and_set_bit(nr, migration_bitmap); > - > - if (!ret) { > - migration_dirty_pages++; > - } > - return ret; > -} > - > -/* Needs iothread lock! */ > - > -static void migration_bitmap_sync(void) > -{ > - RAMBlock *block; > - ram_addr_t addr; > - uint64_t num_dirty_pages_init = migration_dirty_pages; > - MigrationState *s = migrate_get_current(); > - static int64_t start_time; > - static int64_t num_dirty_pages_period; > - int64_t end_time; > - > - if (!start_time) { > - start_time = qemu_get_clock_ms(rt_clock); > - } > - > - trace_migration_bitmap_sync_start(); > - memory_global_sync_dirty_bitmap(get_system_memory()); > - > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > - for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { > - if (memory_region_test_and_clear_dirty(block->mr, > - addr, TARGET_PAGE_SIZE, > - DIRTY_MEMORY_MIGRATION)) { > - migration_bitmap_set_dirty(block->mr, addr); > - } > - } > - } > - trace_migration_bitmap_sync_end(migration_dirty_pages > - - num_dirty_pages_init); > - num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; > - end_time = qemu_get_clock_ms(rt_clock); > - > - /* more than 1 second = 1000 millisecons */ > - if (end_time > start_time + 1000) { > - s->dirty_pages_rate = num_dirty_pages_period * 1000 > - / (end_time - start_time); > - s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; > - start_time = end_time; > - num_dirty_pages_period = 0; > - } > -} > - > -/* > - * ram_save_block: Writes a page of memory to the stream f > - * > - * Returns: The number of bytes written. > - * 0 means no dirty pages > - */ > - > -static int ram_save_block(QEMUFile *f, bool last_stage) > -{ > - RAMBlock *block = last_seen_block; > - ram_addr_t offset = last_offset; > - bool complete_round = false; > - int bytes_sent = 0; > - MemoryRegion *mr; > - ram_addr_t current_addr; > - > - if (!block) > - block = QTAILQ_FIRST(&ram_list.blocks); > - > - while (true) { > - mr = block->mr; > - offset = migration_bitmap_find_and_reset_dirty(mr, offset); > - if (complete_round && block == last_seen_block && > - offset >= last_offset) { > - break; > - } > - if (offset >= block->length) { > - offset = 0; > - block = QTAILQ_NEXT(block, next); > - if (!block) { > - block = QTAILQ_FIRST(&ram_list.blocks); > - complete_round = true; > - } > - } else { > - uint8_t *p; > - int cont = (block == last_sent_block) ? > - RAM_SAVE_FLAG_CONTINUE : 0; > - > - p = memory_region_get_ram_ptr(mr) + offset; > - > - /* In doubt sent page as normal */ > - bytes_sent = -1; > - if (is_dup_page(p)) { > - acct_info.dup_pages++; > - bytes_sent = save_block_hdr(f, block, offset, cont, > - RAM_SAVE_FLAG_COMPRESS); > - qemu_put_byte(f, *p); > - bytes_sent += 1; > - } else if (migrate_use_xbzrle()) { > - current_addr = block->offset + offset; > - bytes_sent = save_xbzrle_page(f, p, current_addr, block, > - offset, cont, last_stage); > - if (!last_stage) { > - p = get_cached_data(XBZRLE.cache, current_addr); > - } > - } > - > - /* XBZRLE overflow or normal page */ > - if (bytes_sent == -1) { > - bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); > - qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > - bytes_sent += TARGET_PAGE_SIZE; > - acct_info.norm_pages++; > - } > - > - /* if page is unmodified, continue to the next */ > - if (bytes_sent > 0) { > - last_sent_block = block; > - break; > - } > - } > - } > - last_seen_block = block; > - last_offset = offset; > - > - return bytes_sent; > -} > - > -static uint64_t bytes_transferred; > - > -static ram_addr_t ram_save_remaining(void) > -{ > - return migration_dirty_pages; > -} > - > -uint64_t ram_bytes_remaining(void) > -{ > - return ram_save_remaining() * TARGET_PAGE_SIZE; > -} > - > -uint64_t ram_bytes_transferred(void) > -{ > - return bytes_transferred; > -} > - > -uint64_t ram_bytes_total(void) > -{ > - RAMBlock *block; > - uint64_t total = 0; > - > - QTAILQ_FOREACH(block, &ram_list.blocks, next) > - total += block->length; > - > - return total; > -} > - > -static void migration_end(void) > -{ > - if (migration_bitmap) { > - memory_global_dirty_log_stop(); > - g_free(migration_bitmap); > - migration_bitmap = NULL; > - } > - > - if (XBZRLE.cache) { > - cache_fini(XBZRLE.cache); > - g_free(XBZRLE.cache); > - g_free(XBZRLE.encoded_buf); > - g_free(XBZRLE.current_buf); > - g_free(XBZRLE.decoded_buf); > - XBZRLE.cache = NULL; > - } > -} > - > -static void ram_migration_cancel(void *opaque) > -{ > - migration_end(); > -} > - > -static void reset_ram_globals(void) > -{ > - last_seen_block = NULL; > - last_sent_block = NULL; > - last_offset = 0; > - last_version = ram_list.version; > -} > - > -#define MAX_WAIT 50 /* ms, half buffered_file limit */ > - > -static int ram_save_setup(QEMUFile *f, void *opaque) > -{ > - RAMBlock *block; > - int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; > - > - migration_bitmap = bitmap_new(ram_pages); > - bitmap_set(migration_bitmap, 0, ram_pages); > - migration_dirty_pages = ram_pages; > - > - if (migrate_use_xbzrle()) { > - XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / > - TARGET_PAGE_SIZE, > - TARGET_PAGE_SIZE); > - if (!XBZRLE.cache) { > - DPRINTF("Error creating cache\n"); > - return -1; > - } > - XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); > - XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); > - acct_clear(); > - } > - > - qemu_mutex_lock_iothread(); > - qemu_mutex_lock_ramlist(); > - bytes_transferred = 0; > - reset_ram_globals(); > - > - memory_global_dirty_log_start(); > - migration_bitmap_sync(); > - qemu_mutex_unlock_iothread(); > - > - qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); > - > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > - qemu_put_byte(f, strlen(block->idstr)); > - qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); > - qemu_put_be64(f, block->length); > - } > - > - qemu_mutex_unlock_ramlist(); > - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > - > - return 0; > -} > - > -static int ram_save_iterate(QEMUFile *f, void *opaque) > -{ > - int ret; > - int i; > - int64_t t0; > - int total_sent = 0; > - > - qemu_mutex_lock_ramlist(); > - > - if (ram_list.version != last_version) { > - reset_ram_globals(); > - } > - > - t0 = qemu_get_clock_ns(rt_clock); > - i = 0; > - while ((ret = qemu_file_rate_limit(f)) == 0) { > - int bytes_sent; > - > - bytes_sent = ram_save_block(f, false); > - /* no more blocks to sent */ > - if (bytes_sent == 0) { > - break; > - } > - total_sent += bytes_sent; > - acct_info.iterations++; > - /* we want to check in the 1st loop, just in case it was the 1st time > - and we had to sync the dirty bitmap. > - qemu_get_clock_ns() is a bit expensive, so we only check each some > - iterations > - */ > - if ((i & 63) == 0) { > - uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000; > - if (t1 > MAX_WAIT) { > - DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", > - t1, i); > - break; > - } > - } > - i++; > - } > - > - qemu_mutex_unlock_ramlist(); > - > - if (ret < 0) { > - bytes_transferred += total_sent; > - return ret; > - } > - > - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > - total_sent += 8; > - bytes_transferred += total_sent; > - > - return total_sent; > -} > - > -static int ram_save_complete(QEMUFile *f, void *opaque) > -{ > - qemu_mutex_lock_ramlist(); > - migration_bitmap_sync(); > - > - /* try transferring iterative blocks of memory */ > - > - /* flush all remaining blocks regardless of rate limiting */ > - while (true) { > - int bytes_sent; > - > - bytes_sent = ram_save_block(f, true); > - /* no more blocks to sent */ > - if (bytes_sent == 0) { > - break; > - } > - bytes_transferred += bytes_sent; > - } > - migration_end(); > - > - qemu_mutex_unlock_ramlist(); > - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > - > - return 0; > -} > - > -static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) > -{ > - uint64_t remaining_size; > - > - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; > - > - if (remaining_size < max_size) { > - qemu_mutex_lock_iothread(); > - migration_bitmap_sync(); > - qemu_mutex_unlock_iothread(); > - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; > - } > - return remaining_size; > -} > - > -static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) > -{ > - int ret, rc = 0; > - unsigned int xh_len; > - int xh_flags; > - > - if (!XBZRLE.decoded_buf) { > - XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); > - } > - > - /* extract RLE header */ > - xh_flags = qemu_get_byte(f); > - xh_len = qemu_get_be16(f); > - > - if (xh_flags != ENCODING_FLAG_XBZRLE) { > - fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); > - return -1; > - } > - > - if (xh_len > TARGET_PAGE_SIZE) { > - fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); > - return -1; > - } > - /* load data and decode */ > - qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); > - > - /* decode RLE */ > - ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, > - TARGET_PAGE_SIZE); > - if (ret == -1) { > - fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); > - rc = -1; > - } else if (ret > TARGET_PAGE_SIZE) { > - fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", > - ret, TARGET_PAGE_SIZE); > - abort(); > - } > - > - return rc; > -} > - > -static inline void *host_from_stream_offset(QEMUFile *f, > - ram_addr_t offset, > - int flags) > -{ > - static RAMBlock *block = NULL; > - char id[256]; > - uint8_t len; > - > - if (flags & RAM_SAVE_FLAG_CONTINUE) { > - if (!block) { > - fprintf(stderr, "Ack, bad migration stream!\n"); > - return NULL; > - } > - > - return memory_region_get_ram_ptr(block->mr) + offset; > - } > - > - len = qemu_get_byte(f); > - qemu_get_buffer(f, (uint8_t *)id, len); > - id[len] = 0; > - > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > - if (!strncmp(id, block->idstr, sizeof(id))) > - return memory_region_get_ram_ptr(block->mr) + offset; > - } > - > - fprintf(stderr, "Can't find block %s!\n", id); > - return NULL; > -} > - > -static int ram_load(QEMUFile *f, void *opaque, int version_id) > -{ > - ram_addr_t addr; > - int flags, ret = 0; > - int error; > - static uint64_t seq_iter; > - > - seq_iter++; > - > - if (version_id < 4 || version_id > 4) { > - return -EINVAL; > - } > - > - do { > - addr = qemu_get_be64(f); > - > - flags = addr & ~TARGET_PAGE_MASK; > - addr &= TARGET_PAGE_MASK; > - > - if (flags & RAM_SAVE_FLAG_MEM_SIZE) { > - if (version_id == 4) { > - /* Synchronize RAM block list */ > - char id[256]; > - ram_addr_t length; > - ram_addr_t total_ram_bytes = addr; > - > - while (total_ram_bytes) { > - RAMBlock *block; > - uint8_t len; > - > - len = qemu_get_byte(f); > - qemu_get_buffer(f, (uint8_t *)id, len); > - id[len] = 0; > - length = qemu_get_be64(f); > - > - QTAILQ_FOREACH(block, &ram_list.blocks, next) { > - if (!strncmp(id, block->idstr, sizeof(id))) { > - if (block->length != length) { > - ret = -EINVAL; > - goto done; > - } > - break; > - } > - } > - > - if (!block) { > - fprintf(stderr, "Unknown ramblock \"%s\", cannot " > - "accept migration\n", id); > - ret = -EINVAL; > - goto done; > - } > - > - total_ram_bytes -= length; > - } > - } > - } > - > - if (flags & RAM_SAVE_FLAG_COMPRESS) { > - void *host; > - uint8_t ch; > - > - host = host_from_stream_offset(f, addr, flags); > - if (!host) { > - return -EINVAL; > - } > - > - ch = qemu_get_byte(f); > - memset(host, ch, TARGET_PAGE_SIZE); > -#ifndef _WIN32 > - if (ch == 0 && > - (!kvm_enabled() || kvm_has_sync_mmu()) && > - getpagesize() <= TARGET_PAGE_SIZE) { > - qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); > - } > -#endif > - } else if (flags & RAM_SAVE_FLAG_PAGE) { > - void *host; > - > - host = host_from_stream_offset(f, addr, flags); > - if (!host) { > - return -EINVAL; > - } > - > - qemu_get_buffer(f, host, TARGET_PAGE_SIZE); > - } else if (flags & RAM_SAVE_FLAG_XBZRLE) { > - void *host = host_from_stream_offset(f, addr, flags); > - if (!host) { > - return -EINVAL; > - } > - > - if (load_xbzrle(f, addr, host) < 0) { > - ret = -EINVAL; > - goto done; > - } > - } > - error = qemu_file_get_error(f); > - if (error) { > - ret = error; > - goto done; > - } > - } while (!(flags & RAM_SAVE_FLAG_EOS)); > - > -done: > - DPRINTF("Completed load of VM with exit code %d seq iteration " > - "%" PRIu64 "\n", ret, seq_iter); > - return ret; > -} > - > -SaveVMHandlers savevm_ram_handlers = { > - .save_live_setup = ram_save_setup, > - .save_live_iterate = ram_save_iterate, > - .save_live_complete = ram_save_complete, > - .save_live_pending = ram_save_pending, > - .load_state = ram_load, > - .cancel = ram_migration_cancel, > -}; > - > #ifdef HAS_AUDIO > struct soundhw { > const char *name; > diff --git a/savevm-ram.c b/savevm-ram.c > new file mode 100644 > index 0000000..cea656c > --- /dev/null > +++ b/savevm-ram.c > @@ -0,0 +1,804 @@ > +/* > + * RAM Migration support > + * > + * Copyright (c) 2003-2008 Fabrice Bellard > + * > + * Permission is hereby granted, free of charge, to any person obtaining a copy > + * of this software and associated documentation files (the "Software"), to deal > + * in the Software without restriction, including without limitation the rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > +#include <stdint.h> > +#include <stdarg.h> > +#include <stdlib.h> > +#ifndef _WIN32 > +#include <sys/types.h> > +#include <sys/mman.h> > +#endif > +#include "sysemu/sysemu.h" > +#include "qemu/bitops.h" > +#include "qemu/bitmap.h" > +#include "sysemu/kvm.h" > +#include "migration/migration.h" > +#include "exec/address-spaces.h" > +#include "migration/page_cache.h" > +#include "migration/qemu-file.h" > +#include "trace.h" > +#include "exec/cpu-all.h" > + > +#ifdef DEBUG_ARCH_INIT > +#define DPRINTF(fmt, ...) \ > + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) > +#else > +#define DPRINTF(fmt, ...) \ > + do { } while (0) > +#endif > + > +/***********************************************************/ > +/* ram save/restore */ > + > +#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ > +#define RAM_SAVE_FLAG_COMPRESS 0x02 > +#define RAM_SAVE_FLAG_MEM_SIZE 0x04 > +#define RAM_SAVE_FLAG_PAGE 0x08 > +#define RAM_SAVE_FLAG_EOS 0x10 > +#define RAM_SAVE_FLAG_CONTINUE 0x20 > +#define RAM_SAVE_FLAG_XBZRLE 0x40 > + > +#ifdef __ALTIVEC__ > +#include <altivec.h> > +#define VECTYPE vector unsigned char > +#define SPLAT(p) vec_splat(vec_ld(0, p), 0) > +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) > +/* altivec.h may redefine the bool macro as vector type. > + * Reset it to POSIX semantics. */ > +#undef bool > +#define bool _Bool > +#elif defined __SSE2__ > +#include <emmintrin.h> > +#define VECTYPE __m128i > +#define SPLAT(p) _mm_set1_epi8(*(p)) > +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) > +#else > +#define VECTYPE unsigned long > +#define SPLAT(p) (*(p) * (~0UL / 255)) > +#define ALL_EQ(v1, v2) ((v1) == (v2)) > +#endif > + > +static int is_dup_page(uint8_t *page) > +{ > + VECTYPE *p = (VECTYPE *)page; > + VECTYPE val = SPLAT(page); > + int i; > + > + for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) { > + if (!ALL_EQ(val, p[i])) { > + return 0; > + } > + } > + > + return 1; > +} > + > +/* struct contains XBZRLE cache and a static page > + used by the compression */ > +static struct { > + /* buffer used for XBZRLE encoding */ > + uint8_t *encoded_buf; > + /* buffer for storing page content */ > + uint8_t *current_buf; > + /* buffer used for XBZRLE decoding */ > + uint8_t *decoded_buf; > + /* Cache for XBZRLE */ > + PageCache *cache; > +} XBZRLE = { > + .encoded_buf = NULL, > + .current_buf = NULL, > + .decoded_buf = NULL, > + .cache = NULL, > +}; > + > + > +int64_t xbzrle_cache_resize(int64_t new_size) > +{ > + if (XBZRLE.cache != NULL) { > + return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * > + TARGET_PAGE_SIZE; > + } > + return pow2floor(new_size); > +} > + > +/* accounting for migration statistics */ > +typedef struct AccountingInfo { > + uint64_t dup_pages; > + uint64_t norm_pages; > + uint64_t iterations; > + uint64_t xbzrle_bytes; > + uint64_t xbzrle_pages; > + uint64_t xbzrle_cache_miss; > + uint64_t xbzrle_overflows; > +} AccountingInfo; > + > +static AccountingInfo acct_info; > + > +static void acct_clear(void) > +{ > + memset(&acct_info, 0, sizeof(acct_info)); > +} > + > +uint64_t dup_mig_bytes_transferred(void) > +{ > + return acct_info.dup_pages * TARGET_PAGE_SIZE; > +} > + > +uint64_t dup_mig_pages_transferred(void) > +{ > + return acct_info.dup_pages; > +} > + > +uint64_t norm_mig_bytes_transferred(void) > +{ > + return acct_info.norm_pages * TARGET_PAGE_SIZE; > +} > + > +uint64_t norm_mig_pages_transferred(void) > +{ > + return acct_info.norm_pages; > +} > + > +uint64_t xbzrle_mig_bytes_transferred(void) > +{ > + return acct_info.xbzrle_bytes; > +} > + > +uint64_t xbzrle_mig_pages_transferred(void) > +{ > + return acct_info.xbzrle_pages; > +} > + > +uint64_t xbzrle_mig_pages_cache_miss(void) > +{ > + return acct_info.xbzrle_cache_miss; > +} > + > +uint64_t xbzrle_mig_pages_overflow(void) > +{ > + return acct_info.xbzrle_overflows; > +} > + > +static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, > + int cont, int flag) > +{ > + size_t size; > + > + qemu_put_be64(f, offset | cont | flag); > + size = 8; > + > + if (!cont) { > + qemu_put_byte(f, strlen(block->idstr)); > + qemu_put_buffer(f, (uint8_t *)block->idstr, > + strlen(block->idstr)); > + size += 1 + strlen(block->idstr); > + } > + return size; > +} > + > +#define ENCODING_FLAG_XBZRLE 0x1 > + > +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, > + ram_addr_t current_addr, RAMBlock *block, > + ram_addr_t offset, int cont, bool last_stage) > +{ > + int encoded_len = 0, bytes_sent = -1; > + uint8_t *prev_cached_page; > + > + if (!cache_is_cached(XBZRLE.cache, current_addr)) { > + if (!last_stage) { > + cache_insert(XBZRLE.cache, current_addr, current_data); > + } > + acct_info.xbzrle_cache_miss++; > + return -1; > + } > + > + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); > + > + /* save current buffer into memory */ > + memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); > + > + /* XBZRLE encoding (if there is no overflow) */ > + encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, > + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, > + TARGET_PAGE_SIZE); > + if (encoded_len == 0) { > + DPRINTF("Skipping unmodified page\n"); > + return 0; > + } else if (encoded_len == -1) { > + DPRINTF("Overflow\n"); > + acct_info.xbzrle_overflows++; > + /* update data in the cache */ > + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); > + return -1; > + } > + > + /* we need to update the data in the cache, in order to get the same data */ > + if (!last_stage) { > + memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); > + } > + > + /* Send XBZRLE based compressed page */ > + bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); > + qemu_put_byte(f, ENCODING_FLAG_XBZRLE); > + qemu_put_be16(f, encoded_len); > + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); > + bytes_sent += encoded_len + 1 + 2; > + acct_info.xbzrle_pages++; > + acct_info.xbzrle_bytes += bytes_sent; > + > + return bytes_sent; > +} > + > + > +/* This is the last block that we have visited serching for dirty pages > + */ > +static RAMBlock *last_seen_block; > +/* This is the last block from where we have sent data */ > +static RAMBlock *last_sent_block; > +static ram_addr_t last_offset; > +static unsigned long *migration_bitmap; > +static uint64_t migration_dirty_pages; > +static uint32_t last_version; > + > +static inline > +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, > + ram_addr_t start) > +{ > + unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; > + unsigned long nr = base + (start >> TARGET_PAGE_BITS); > + unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS); > + > + unsigned long next = find_next_bit(migration_bitmap, size, nr); > + > + if (next < size) { > + clear_bit(next, migration_bitmap); > + migration_dirty_pages--; > + } > + return (next - base) << TARGET_PAGE_BITS; > +} > + > +static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, > + ram_addr_t offset) > +{ > + bool ret; > + int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; > + > + ret = test_and_set_bit(nr, migration_bitmap); > + > + if (!ret) { > + migration_dirty_pages++; > + } > + return ret; > +} > + > +/* Needs iothread lock! */ > + > +static void migration_bitmap_sync(void) > +{ > + RAMBlock *block; > + ram_addr_t addr; > + uint64_t num_dirty_pages_init = migration_dirty_pages; > + MigrationState *s = migrate_get_current(); > + static int64_t start_time; > + static int64_t num_dirty_pages_period; > + int64_t end_time; > + > + if (!start_time) { > + start_time = qemu_get_clock_ms(rt_clock); > + } > + > + trace_migration_bitmap_sync_start(); > + memory_global_sync_dirty_bitmap(get_system_memory()); > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { > + if (memory_region_test_and_clear_dirty(block->mr, > + addr, TARGET_PAGE_SIZE, > + DIRTY_MEMORY_MIGRATION)) { > + migration_bitmap_set_dirty(block->mr, addr); > + } > + } > + } > + trace_migration_bitmap_sync_end(migration_dirty_pages > + - num_dirty_pages_init); > + num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; > + end_time = qemu_get_clock_ms(rt_clock); > + > + /* more than 1 second = 1000 millisecons */ > + if (end_time > start_time + 1000) { > + s->dirty_pages_rate = num_dirty_pages_period * 1000 > + / (end_time - start_time); > + s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; > + start_time = end_time; > + num_dirty_pages_period = 0; > + } > +} > + > +/* > + * ram_save_block: Writes a page of memory to the stream f > + * > + * Returns: The number of bytes written. > + * 0 means no dirty pages > + */ > + > +static int ram_save_block(QEMUFile *f, bool last_stage) > +{ > + RAMBlock *block = last_seen_block; > + ram_addr_t offset = last_offset; > + bool complete_round = false; > + int bytes_sent = 0; > + MemoryRegion *mr; > + ram_addr_t current_addr; > + > + if (!block) > + block = QTAILQ_FIRST(&ram_list.blocks); > + > + while (true) { > + mr = block->mr; > + offset = migration_bitmap_find_and_reset_dirty(mr, offset); > + if (complete_round && block == last_seen_block && > + offset >= last_offset) { > + break; > + } > + if (offset >= block->length) { > + offset = 0; > + block = QTAILQ_NEXT(block, next); > + if (!block) { > + block = QTAILQ_FIRST(&ram_list.blocks); > + complete_round = true; > + } > + } else { > + uint8_t *p; > + int cont = (block == last_sent_block) ? > + RAM_SAVE_FLAG_CONTINUE : 0; > + > + p = memory_region_get_ram_ptr(mr) + offset; > + > + /* In doubt sent page as normal */ > + bytes_sent = -1; > + if (is_dup_page(p)) { > + acct_info.dup_pages++; > + bytes_sent = save_block_hdr(f, block, offset, cont, > + RAM_SAVE_FLAG_COMPRESS); > + qemu_put_byte(f, *p); > + bytes_sent += 1; > + } else if (migrate_use_xbzrle()) { > + current_addr = block->offset + offset; > + bytes_sent = save_xbzrle_page(f, p, current_addr, block, > + offset, cont, last_stage); > + if (!last_stage) { > + p = get_cached_data(XBZRLE.cache, current_addr); > + } > + } > + > + /* XBZRLE overflow or normal page */ > + if (bytes_sent == -1) { > + bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); > + qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > + bytes_sent += TARGET_PAGE_SIZE; > + acct_info.norm_pages++; > + } > + > + /* if page is unmodified, continue to the next */ > + if (bytes_sent > 0) { > + last_sent_block = block; > + break; > + } > + } > + } > + last_seen_block = block; > + last_offset = offset; > + > + return bytes_sent; > +} > + > +static uint64_t bytes_transferred; > + > +static ram_addr_t ram_save_remaining(void) > +{ > + return migration_dirty_pages; > +} > + > +uint64_t ram_bytes_remaining(void) > +{ > + return ram_save_remaining() * TARGET_PAGE_SIZE; > +} > + > +uint64_t ram_bytes_transferred(void) > +{ > + return bytes_transferred; > +} > + > +uint64_t ram_bytes_total(void) > +{ > + RAMBlock *block; > + uint64_t total = 0; > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) > + total += block->length; > + > + return total; > +} > + > +static void migration_end(void) > +{ > + if (migration_bitmap) { > + memory_global_dirty_log_stop(); > + g_free(migration_bitmap); > + migration_bitmap = NULL; > + } > + > + if (XBZRLE.cache) { > + cache_fini(XBZRLE.cache); > + g_free(XBZRLE.cache); > + g_free(XBZRLE.encoded_buf); > + g_free(XBZRLE.current_buf); > + g_free(XBZRLE.decoded_buf); > + XBZRLE.cache = NULL; > + } > +} > + > +static void ram_migration_cancel(void *opaque) > +{ > + migration_end(); > +} > + > +static void reset_ram_globals(void) > +{ > + last_seen_block = NULL; > + last_sent_block = NULL; > + last_offset = 0; > + last_version = ram_list.version; > +} > + > +#define MAX_WAIT 50 /* ms, half buffered_file limit */ > + > +static int ram_save_setup(QEMUFile *f, void *opaque) > +{ > + RAMBlock *block; > + int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; > + > + migration_bitmap = bitmap_new(ram_pages); > + bitmap_set(migration_bitmap, 0, ram_pages); > + migration_dirty_pages = ram_pages; > + > + if (migrate_use_xbzrle()) { > + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / > + TARGET_PAGE_SIZE, > + TARGET_PAGE_SIZE); > + if (!XBZRLE.cache) { > + DPRINTF("Error creating cache\n"); > + return -1; > + } > + XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); > + XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); > + acct_clear(); > + } > + > + qemu_mutex_lock_iothread(); > + qemu_mutex_lock_ramlist(); > + bytes_transferred = 0; > + reset_ram_globals(); > + > + memory_global_dirty_log_start(); > + migration_bitmap_sync(); > + qemu_mutex_unlock_iothread(); > + > + qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + qemu_put_byte(f, strlen(block->idstr)); > + qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); > + qemu_put_be64(f, block->length); > + } > + > + qemu_mutex_unlock_ramlist(); > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > + > + return 0; > +} > + > +static int ram_save_iterate(QEMUFile *f, void *opaque) > +{ > + int ret; > + int i; > + int64_t t0; > + int total_sent = 0; > + > + qemu_mutex_lock_ramlist(); > + > + if (ram_list.version != last_version) { > + reset_ram_globals(); > + } > + > + t0 = qemu_get_clock_ns(rt_clock); > + i = 0; > + while ((ret = qemu_file_rate_limit(f)) == 0) { > + int bytes_sent; > + > + bytes_sent = ram_save_block(f, false); > + /* no more blocks to sent */ > + if (bytes_sent == 0) { > + break; > + } > + total_sent += bytes_sent; > + acct_info.iterations++; > + /* we want to check in the 1st loop, just in case it was the 1st time > + and we had to sync the dirty bitmap. > + qemu_get_clock_ns() is a bit expensive, so we only check each some > + iterations > + */ > + if ((i & 63) == 0) { > + uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000; > + if (t1 > MAX_WAIT) { > + DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", > + t1, i); > + break; > + } > + } > + i++; > + } > + > + qemu_mutex_unlock_ramlist(); > + > + if (ret < 0) { > + bytes_transferred += total_sent; > + return ret; > + } > + > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > + total_sent += 8; > + bytes_transferred += total_sent; > + > + return total_sent; > +} > + > +static int ram_save_complete(QEMUFile *f, void *opaque) > +{ > + qemu_mutex_lock_ramlist(); > + migration_bitmap_sync(); > + > + /* try transferring iterative blocks of memory */ > + > + /* flush all remaining blocks regardless of rate limiting */ > + while (true) { > + int bytes_sent; > + > + bytes_sent = ram_save_block(f, true); > + /* no more blocks to sent */ > + if (bytes_sent == 0) { > + break; > + } > + bytes_transferred += bytes_sent; > + } > + migration_end(); > + > + qemu_mutex_unlock_ramlist(); > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > + > + return 0; > +} > + > +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) > +{ > + uint64_t remaining_size; > + > + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; > + > + if (remaining_size < max_size) { > + qemu_mutex_lock_iothread(); > + migration_bitmap_sync(); > + qemu_mutex_unlock_iothread(); > + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; > + } > + return remaining_size; > +} > + > +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) > +{ > + int ret, rc = 0; > + unsigned int xh_len; > + int xh_flags; > + > + if (!XBZRLE.decoded_buf) { > + XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); > + } > + > + /* extract RLE header */ > + xh_flags = qemu_get_byte(f); > + xh_len = qemu_get_be16(f); > + > + if (xh_flags != ENCODING_FLAG_XBZRLE) { > + fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); > + return -1; > + } > + > + if (xh_len > TARGET_PAGE_SIZE) { > + fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); > + return -1; > + } > + /* load data and decode */ > + qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); > + > + /* decode RLE */ > + ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, > + TARGET_PAGE_SIZE); > + if (ret == -1) { > + fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); > + rc = -1; > + } else if (ret > TARGET_PAGE_SIZE) { > + fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", > + ret, TARGET_PAGE_SIZE); > + abort(); > + } > + > + return rc; > +} > + > +static inline void *host_from_stream_offset(QEMUFile *f, > + ram_addr_t offset, > + int flags) > +{ > + static RAMBlock *block = NULL; > + char id[256]; > + uint8_t len; > + > + if (flags & RAM_SAVE_FLAG_CONTINUE) { > + if (!block) { > + fprintf(stderr, "Ack, bad migration stream!\n"); > + return NULL; > + } > + > + return memory_region_get_ram_ptr(block->mr) + offset; > + } > + > + len = qemu_get_byte(f); > + qemu_get_buffer(f, (uint8_t *)id, len); > + id[len] = 0; > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + if (!strncmp(id, block->idstr, sizeof(id))) > + return memory_region_get_ram_ptr(block->mr) + offset; > + } > + > + fprintf(stderr, "Can't find block %s!\n", id); > + return NULL; > +} > + > +static int ram_load(QEMUFile *f, void *opaque, int version_id) > +{ > + ram_addr_t addr; > + int flags, ret = 0; > + int error; > + static uint64_t seq_iter; > + > + seq_iter++; > + > + if (version_id < 4 || version_id > 4) { > + return -EINVAL; > + } > + > + do { > + addr = qemu_get_be64(f); > + > + flags = addr & ~TARGET_PAGE_MASK; > + addr &= TARGET_PAGE_MASK; > + > + if (flags & RAM_SAVE_FLAG_MEM_SIZE) { > + if (version_id == 4) { > + /* Synchronize RAM block list */ > + char id[256]; > + ram_addr_t length; > + ram_addr_t total_ram_bytes = addr; > + > + while (total_ram_bytes) { > + RAMBlock *block; > + uint8_t len; > + > + len = qemu_get_byte(f); > + qemu_get_buffer(f, (uint8_t *)id, len); > + id[len] = 0; > + length = qemu_get_be64(f); > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + if (!strncmp(id, block->idstr, sizeof(id))) { > + if (block->length != length) { > + ret = -EINVAL; > + goto done; > + } > + break; > + } > + } > + > + if (!block) { > + fprintf(stderr, "Unknown ramblock \"%s\", cannot " > + "accept migration\n", id); > + ret = -EINVAL; > + goto done; > + } > + > + total_ram_bytes -= length; > + } > + } > + } > + > + if (flags & RAM_SAVE_FLAG_COMPRESS) { > + void *host; > + uint8_t ch; > + > + host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + return -EINVAL; > + } > + > + ch = qemu_get_byte(f); > + memset(host, ch, TARGET_PAGE_SIZE); > +#ifndef _WIN32 > + if (ch == 0 && > + (!kvm_enabled() || kvm_has_sync_mmu()) && > + getpagesize() <= TARGET_PAGE_SIZE) { > + qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); > + } > +#endif > + } else if (flags & RAM_SAVE_FLAG_PAGE) { > + void *host; > + > + host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + return -EINVAL; > + } > + > + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); > + } else if (flags & RAM_SAVE_FLAG_XBZRLE) { > + void *host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + return -EINVAL; > + } > + > + if (load_xbzrle(f, addr, host) < 0) { > + ret = -EINVAL; > + goto done; > + } > + } > + error = qemu_file_get_error(f); > + if (error) { > + ret = error; > + goto done; > + } > + } while (!(flags & RAM_SAVE_FLAG_EOS)); > + > +done: > + DPRINTF("Completed load of VM with exit code %d seq iteration " > + "%" PRIu64 "\n", ret, seq_iter); > + return ret; > +} > + > +SaveVMHandlers savevm_ram_handlers = { > + .save_live_setup = ram_save_setup, > + .save_live_iterate = ram_save_iterate, > + .save_live_complete = ram_save_complete, > + .save_live_pending = ram_save_pending, > + .load_state = ram_load, > + .cancel = ram_migration_cancel, > +}; >
On Thu, Mar 07, 2013 at 04:20:48PM +0100, Paolo Bonzini wrote: > Il 07/03/2013 14:32, Michael S. Tsirkin ha scritto: > > +#ifdef DEBUG_ARCH_INIT > > +#define DPRINTF(fmt, ...) \ > > + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) > > These need to be adjusted, but it can be a follow-up. > > Paolo Thanks, will do a follow-up. > > +#else > > +#define DPRINTF(fmt, ...) \ > > + do { } while (0) > > +#endif > > + > > +/***********************************************************/ > > +/* ram save/restore */ > > + > > +#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ > > +#define RAM_SAVE_FLAG_COMPRESS 0x02 > > +#define RAM_SAVE_FLAG_MEM_SIZE 0x04 > > +#define RAM_SAVE_FLAG_PAGE 0x08 > > +#define RAM_SAVE_FLAG_EOS 0x10 > > +#define RAM_SAVE_FLAG_CONTINUE 0x20 > > +#define RAM_SAVE_FLAG_XBZRLE 0x40 > > + > > +#ifdef __ALTIVEC__ > > +#include <altivec.h> > > +#define VECTYPE vector unsigned char > > +#define SPLAT(p) vec_splat(vec_ld(0, p), 0) > > +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) > > +/* altivec.h may redefine the bool macro as vector type. > > + * Reset it to POSIX semantics. */ > > +#undef bool > > +#define bool _Bool > > +#elif defined __SSE2__ > > +#include <emmintrin.h> > > +#define VECTYPE __m128i > > +#define SPLAT(p) _mm_set1_epi8(*(p)) > > +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) > > +#else > > +#define VECTYPE unsigned long > > +#define SPLAT(p) (*(p) * (~0UL / 255)) > > +#define ALL_EQ(v1, v2) ((v1) == (v2)) > > +#endif > > + > > +static int is_dup_page(uint8_t *page) > > +{ > > + VECTYPE *p = (VECTYPE *)page; > > + VECTYPE val = SPLAT(page); > > + int i; > > + > > + for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) { > > + if (!ALL_EQ(val, p[i])) { > > + return 0; > > + } > > + } > > + > > + return 1; > > +} > > + > > +/* struct contains XBZRLE cache and a static page > > + used by the compression */ > > +static struct { > > + /* buffer used for XBZRLE encoding */ > > + uint8_t *encoded_buf; > > + /* buffer for storing page content */ > > + uint8_t *current_buf; > > + /* buffer used for XBZRLE decoding */ > > + uint8_t *decoded_buf; > > + /* Cache for XBZRLE */ > > + PageCache *cache; > > +} XBZRLE = { > > + .encoded_buf = NULL, > > + .current_buf = NULL, > > + .decoded_buf = NULL, > > + .cache = NULL, > > +}; > > + > > + > > +int64_t xbzrle_cache_resize(int64_t new_size) > > +{ > > + if (XBZRLE.cache != NULL) { > > + return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * > > + TARGET_PAGE_SIZE; > > + } > > + return pow2floor(new_size); > > +} > > + > > +/* accounting for migration statistics */ > > +typedef struct AccountingInfo { > > + uint64_t dup_pages; > > + uint64_t norm_pages; > > + uint64_t iterations; > > + uint64_t xbzrle_bytes; > > + uint64_t xbzrle_pages; > > + uint64_t xbzrle_cache_miss; > > + uint64_t xbzrle_overflows; > > +} AccountingInfo; > > + > > +static AccountingInfo acct_info; > > + > > +static void acct_clear(void) > > +{ > > + memset(&acct_info, 0, sizeof(acct_info)); > > +} > > + > > +uint64_t dup_mig_bytes_transferred(void) > > +{ > > + return acct_info.dup_pages * TARGET_PAGE_SIZE; > > +} > > + > > +uint64_t dup_mig_pages_transferred(void) > > +{ > > + return acct_info.dup_pages; > > +} > > + > > +uint64_t norm_mig_bytes_transferred(void) > > +{ > > + return acct_info.norm_pages * TARGET_PAGE_SIZE; > > +} > > + > > +uint64_t norm_mig_pages_transferred(void) > > +{ > > + return acct_info.norm_pages; > > +} > > + > > +uint64_t xbzrle_mig_bytes_transferred(void) > > +{ > > + return acct_info.xbzrle_bytes; > > +} > > + > > +uint64_t xbzrle_mig_pages_transferred(void) > > +{ > > + return acct_info.xbzrle_pages; > > +} > > + > > +uint64_t xbzrle_mig_pages_cache_miss(void) > > +{ > > + return acct_info.xbzrle_cache_miss; > > +} > > + > > +uint64_t xbzrle_mig_pages_overflow(void) > > +{ > > + return acct_info.xbzrle_overflows; > > +} > > + > > +static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, > > + int cont, int flag) > > +{ > > + size_t size; > > + > > + qemu_put_be64(f, offset | cont | flag); > > + size = 8; > > + > > + if (!cont) { > > + qemu_put_byte(f, strlen(block->idstr)); > > + qemu_put_buffer(f, (uint8_t *)block->idstr, > > + strlen(block->idstr)); > > + size += 1 + strlen(block->idstr); > > + } > > + return size; > > +} > > + > > +#define ENCODING_FLAG_XBZRLE 0x1 > > + > > +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, > > + ram_addr_t current_addr, RAMBlock *block, > > + ram_addr_t offset, int cont, bool last_stage) > > +{ > > + int encoded_len = 0, bytes_sent = -1; > > + uint8_t *prev_cached_page; > > + > > + if (!cache_is_cached(XBZRLE.cache, current_addr)) { > > + if (!last_stage) { > > + cache_insert(XBZRLE.cache, current_addr, current_data); > > + } > > + acct_info.xbzrle_cache_miss++; > > + return -1; > > + } > > + > > + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); > > + > > + /* save current buffer into memory */ > > + memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); > > + > > + /* XBZRLE encoding (if there is no overflow) */ > > + encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, > > + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, > > + TARGET_PAGE_SIZE); > > + if (encoded_len == 0) { > > + DPRINTF("Skipping unmodified page\n"); > > + return 0; > > + } else if (encoded_len == -1) { > > + DPRINTF("Overflow\n"); > > + acct_info.xbzrle_overflows++; > > + /* update data in the cache */ > > + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); > > + return -1; > > + } > > + > > + /* we need to update the data in the cache, in order to get the same data */ > > + if (!last_stage) { > > + memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); > > + } > > + > > + /* Send XBZRLE based compressed page */ > > + bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); > > + qemu_put_byte(f, ENCODING_FLAG_XBZRLE); > > + qemu_put_be16(f, encoded_len); > > + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); > > + bytes_sent += encoded_len + 1 + 2; > > + acct_info.xbzrle_pages++; > > + acct_info.xbzrle_bytes += bytes_sent; > > + > > + return bytes_sent; > > +} > > + > > + > > +/* This is the last block that we have visited serching for dirty pages > > + */ > > +static RAMBlock *last_seen_block; > > +/* This is the last block from where we have sent data */ > > +static RAMBlock *last_sent_block; > > +static ram_addr_t last_offset; > > +static unsigned long *migration_bitmap; > > +static uint64_t migration_dirty_pages; > > +static uint32_t last_version; > > + > > +static inline > > +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, > > + ram_addr_t start) > > +{ > > + unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; > > + unsigned long nr = base + (start >> TARGET_PAGE_BITS); > > + unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS); > > + > > + unsigned long next = find_next_bit(migration_bitmap, size, nr); > > + > > + if (next < size) { > > + clear_bit(next, migration_bitmap); > > + migration_dirty_pages--; > > + } > > + return (next - base) << TARGET_PAGE_BITS; > > +} > > + > > +static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, > > + ram_addr_t offset) > > +{ > > + bool ret; > > + int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; > > + > > + ret = test_and_set_bit(nr, migration_bitmap); > > + > > + if (!ret) { > > + migration_dirty_pages++; > > + } > > + return ret; > > +} > > + > > +/* Needs iothread lock! */ > > + > > +static void migration_bitmap_sync(void) > > +{ > > + RAMBlock *block; > > + ram_addr_t addr; > > + uint64_t num_dirty_pages_init = migration_dirty_pages; > > + MigrationState *s = migrate_get_current(); > > + static int64_t start_time; > > + static int64_t num_dirty_pages_period; > > + int64_t end_time; > > + > > + if (!start_time) { > > + start_time = qemu_get_clock_ms(rt_clock); > > + } > > + > > + trace_migration_bitmap_sync_start(); > > + memory_global_sync_dirty_bitmap(get_system_memory()); > > + > > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > > + for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { > > + if (memory_region_test_and_clear_dirty(block->mr, > > + addr, TARGET_PAGE_SIZE, > > + DIRTY_MEMORY_MIGRATION)) { > > + migration_bitmap_set_dirty(block->mr, addr); > > + } > > + } > > + } > > + trace_migration_bitmap_sync_end(migration_dirty_pages > > + - num_dirty_pages_init); > > + num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; > > + end_time = qemu_get_clock_ms(rt_clock); > > + > > + /* more than 1 second = 1000 millisecons */ > > + if (end_time > start_time + 1000) { > > + s->dirty_pages_rate = num_dirty_pages_period * 1000 > > + / (end_time - start_time); > > + s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; > > + start_time = end_time; > > + num_dirty_pages_period = 0; > > + } > > +} > > + > > +/* > > + * ram_save_block: Writes a page of memory to the stream f > > + * > > + * Returns: The number of bytes written. > > + * 0 means no dirty pages > > + */ > > + > > +static int ram_save_block(QEMUFile *f, bool last_stage) > > +{ > > + RAMBlock *block = last_seen_block; > > + ram_addr_t offset = last_offset; > > + bool complete_round = false; > > + int bytes_sent = 0; > > + MemoryRegion *mr; > > + ram_addr_t current_addr; > > + > > + if (!block) > > + block = QTAILQ_FIRST(&ram_list.blocks); > > + > > + while (true) { > > + mr = block->mr; > > + offset = migration_bitmap_find_and_reset_dirty(mr, offset); > > + if (complete_round && block == last_seen_block && > > + offset >= last_offset) { > > + break; > > + } > > + if (offset >= block->length) { > > + offset = 0; > > + block = QTAILQ_NEXT(block, next); > > + if (!block) { > > + block = QTAILQ_FIRST(&ram_list.blocks); > > + complete_round = true; > > + } > > + } else { > > + uint8_t *p; > > + int cont = (block == last_sent_block) ? > > + RAM_SAVE_FLAG_CONTINUE : 0; > > + > > + p = memory_region_get_ram_ptr(mr) + offset; > > + > > + /* In doubt sent page as normal */ > > + bytes_sent = -1; > > + if (is_dup_page(p)) { > > + acct_info.dup_pages++; > > + bytes_sent = save_block_hdr(f, block, offset, cont, > > + RAM_SAVE_FLAG_COMPRESS); > > + qemu_put_byte(f, *p); > > + bytes_sent += 1; > > + } else if (migrate_use_xbzrle()) { > > + current_addr = block->offset + offset; > > + bytes_sent = save_xbzrle_page(f, p, current_addr, block, > > + offset, cont, last_stage); > > + if (!last_stage) { > > + p = get_cached_data(XBZRLE.cache, current_addr); > > + } > > + } > > + > > + /* XBZRLE overflow or normal page */ > > + if (bytes_sent == -1) { > > + bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); > > + qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > > + bytes_sent += TARGET_PAGE_SIZE; > > + acct_info.norm_pages++; > > + } > > + > > + /* if page is unmodified, continue to the next */ > > + if (bytes_sent > 0) { > > + last_sent_block = block; > > + break; > > + } > > + } > > + } > > + last_seen_block = block; > > + last_offset = offset; > > + > > + return bytes_sent; > > +} > > + > > +static uint64_t bytes_transferred; > > + > > +static ram_addr_t ram_save_remaining(void) > > +{ > > + return migration_dirty_pages; > > +} > > + > > +uint64_t ram_bytes_remaining(void) > > +{ > > + return ram_save_remaining() * TARGET_PAGE_SIZE; > > +} > > + > > +uint64_t ram_bytes_transferred(void) > > +{ > > + return bytes_transferred; > > +} > > + > > +uint64_t ram_bytes_total(void) > > +{ > > + RAMBlock *block; > > + uint64_t total = 0; > > + > > + QTAILQ_FOREACH(block, &ram_list.blocks, next) > > + total += block->length; > > + > > + return total; > > +} > > + > > +static void migration_end(void) > > +{ > > + if (migration_bitmap) { > > + memory_global_dirty_log_stop(); > > + g_free(migration_bitmap); > > + migration_bitmap = NULL; > > + } > > + > > + if (XBZRLE.cache) { > > + cache_fini(XBZRLE.cache); > > + g_free(XBZRLE.cache); > > + g_free(XBZRLE.encoded_buf); > > + g_free(XBZRLE.current_buf); > > + g_free(XBZRLE.decoded_buf); > > + XBZRLE.cache = NULL; > > + } > > +} > > + > > +static void ram_migration_cancel(void *opaque) > > +{ > > + migration_end(); > > +} > > + > > +static void reset_ram_globals(void) > > +{ > > + last_seen_block = NULL; > > + last_sent_block = NULL; > > + last_offset = 0; > > + last_version = ram_list.version; > > +} > > + > > +#define MAX_WAIT 50 /* ms, half buffered_file limit */ > > + > > +static int ram_save_setup(QEMUFile *f, void *opaque) > > +{ > > + RAMBlock *block; > > + int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; > > + > > + migration_bitmap = bitmap_new(ram_pages); > > + bitmap_set(migration_bitmap, 0, ram_pages); > > + migration_dirty_pages = ram_pages; > > + > > + if (migrate_use_xbzrle()) { > > + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / > > + TARGET_PAGE_SIZE, > > + TARGET_PAGE_SIZE); > > + if (!XBZRLE.cache) { > > + DPRINTF("Error creating cache\n"); > > + return -1; > > + } > > + XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); > > + XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); > > + acct_clear(); > > + } > > + > > + qemu_mutex_lock_iothread(); > > + qemu_mutex_lock_ramlist(); > > + bytes_transferred = 0; > > + reset_ram_globals(); > > + > > + memory_global_dirty_log_start(); > > + migration_bitmap_sync(); > > + qemu_mutex_unlock_iothread(); > > + > > + qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); > > + > > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > > + qemu_put_byte(f, strlen(block->idstr)); > > + qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); > > + qemu_put_be64(f, block->length); > > + } > > + > > + qemu_mutex_unlock_ramlist(); > > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > + > > + return 0; > > +} > > + > > +static int ram_save_iterate(QEMUFile *f, void *opaque) > > +{ > > + int ret; > > + int i; > > + int64_t t0; > > + int total_sent = 0; > > + > > + qemu_mutex_lock_ramlist(); > > + > > + if (ram_list.version != last_version) { > > + reset_ram_globals(); > > + } > > + > > + t0 = qemu_get_clock_ns(rt_clock); > > + i = 0; > > + while ((ret = qemu_file_rate_limit(f)) == 0) { > > + int bytes_sent; > > + > > + bytes_sent = ram_save_block(f, false); > > + /* no more blocks to sent */ > > + if (bytes_sent == 0) { > > + break; > > + } > > + total_sent += bytes_sent; > > + acct_info.iterations++; > > + /* we want to check in the 1st loop, just in case it was the 1st time > > + and we had to sync the dirty bitmap. > > + qemu_get_clock_ns() is a bit expensive, so we only check each some > > + iterations > > + */ > > + if ((i & 63) == 0) { > > + uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000; > > + if (t1 > MAX_WAIT) { > > + DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", > > + t1, i); > > + break; > > + } > > + } > > + i++; > > + } > > + > > + qemu_mutex_unlock_ramlist(); > > + > > + if (ret < 0) { > > + bytes_transferred += total_sent; > > + return ret; > > + } > > + > > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > + total_sent += 8; > > + bytes_transferred += total_sent; > > + > > + return total_sent; > > +} > > + > > +static int ram_save_complete(QEMUFile *f, void *opaque) > > +{ > > + qemu_mutex_lock_ramlist(); > > + migration_bitmap_sync(); > > + > > + /* try transferring iterative blocks of memory */ > > + > > + /* flush all remaining blocks regardless of rate limiting */ > > + while (true) { > > + int bytes_sent; > > + > > + bytes_sent = ram_save_block(f, true); > > + /* no more blocks to sent */ > > + if (bytes_sent == 0) { > > + break; > > + } > > + bytes_transferred += bytes_sent; > > + } > > + migration_end(); > > + > > + qemu_mutex_unlock_ramlist(); > > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > + > > + return 0; > > +} > > + > > +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) > > +{ > > + uint64_t remaining_size; > > + > > + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; > > + > > + if (remaining_size < max_size) { > > + qemu_mutex_lock_iothread(); > > + migration_bitmap_sync(); > > + qemu_mutex_unlock_iothread(); > > + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; > > + } > > + return remaining_size; > > +} > > + > > +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) > > +{ > > + int ret, rc = 0; > > + unsigned int xh_len; > > + int xh_flags; > > + > > + if (!XBZRLE.decoded_buf) { > > + XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); > > + } > > + > > + /* extract RLE header */ > > + xh_flags = qemu_get_byte(f); > > + xh_len = qemu_get_be16(f); > > + > > + if (xh_flags != ENCODING_FLAG_XBZRLE) { > > + fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); > > + return -1; > > + } > > + > > + if (xh_len > TARGET_PAGE_SIZE) { > > + fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); > > + return -1; > > + } > > + /* load data and decode */ > > + qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); > > + > > + /* decode RLE */ > > + ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, > > + TARGET_PAGE_SIZE); > > + if (ret == -1) { > > + fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); > > + rc = -1; > > + } else if (ret > TARGET_PAGE_SIZE) { > > + fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", > > + ret, TARGET_PAGE_SIZE); > > + abort(); > > + } > > + > > + return rc; > > +} > > + > > +static inline void *host_from_stream_offset(QEMUFile *f, > > + ram_addr_t offset, > > + int flags) > > +{ > > + static RAMBlock *block = NULL; > > + char id[256]; > > + uint8_t len; > > + > > + if (flags & RAM_SAVE_FLAG_CONTINUE) { > > + if (!block) { > > + fprintf(stderr, "Ack, bad migration stream!\n"); > > + return NULL; > > + } > > + > > + return memory_region_get_ram_ptr(block->mr) + offset; > > + } > > + > > + len = qemu_get_byte(f); > > + qemu_get_buffer(f, (uint8_t *)id, len); > > + id[len] = 0; > > + > > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > > + if (!strncmp(id, block->idstr, sizeof(id))) > > + return memory_region_get_ram_ptr(block->mr) + offset; > > + } > > + > > + fprintf(stderr, "Can't find block %s!\n", id); > > + return NULL; > > +} > > + > > +static int ram_load(QEMUFile *f, void *opaque, int version_id) > > +{ > > + ram_addr_t addr; > > + int flags, ret = 0; > > + int error; > > + static uint64_t seq_iter; > > + > > + seq_iter++; > > + > > + if (version_id < 4 || version_id > 4) { > > + return -EINVAL; > > + } > > + > > + do { > > + addr = qemu_get_be64(f); > > + > > + flags = addr & ~TARGET_PAGE_MASK; > > + addr &= TARGET_PAGE_MASK; > > + > > + if (flags & RAM_SAVE_FLAG_MEM_SIZE) { > > + if (version_id == 4) { > > + /* Synchronize RAM block list */ > > + char id[256]; > > + ram_addr_t length; > > + ram_addr_t total_ram_bytes = addr; > > + > > + while (total_ram_bytes) { > > + RAMBlock *block; > > + uint8_t len; > > + > > + len = qemu_get_byte(f); > > + qemu_get_buffer(f, (uint8_t *)id, len); > > + id[len] = 0; > > + length = qemu_get_be64(f); > > + > > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > > + if (!strncmp(id, block->idstr, sizeof(id))) { > > + if (block->length != length) { > > + ret = -EINVAL; > > + goto done; > > + } > > + break; > > + } > > + } > > + > > + if (!block) { > > + fprintf(stderr, "Unknown ramblock \"%s\", cannot " > > + "accept migration\n", id); > > + ret = -EINVAL; > > + goto done; > > + } > > + > > + total_ram_bytes -= length; > > + } > > + } > > + } > > + > > + if (flags & RAM_SAVE_FLAG_COMPRESS) { > > + void *host; > > + uint8_t ch; > > + > > + host = host_from_stream_offset(f, addr, flags); > > + if (!host) { > > + return -EINVAL; > > + } > > + > > + ch = qemu_get_byte(f); > > + memset(host, ch, TARGET_PAGE_SIZE); > > +#ifndef _WIN32 > > + if (ch == 0 && > > + (!kvm_enabled() || kvm_has_sync_mmu()) && > > + getpagesize() <= TARGET_PAGE_SIZE) { > > + qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); > > + } > > +#endif > > + } else if (flags & RAM_SAVE_FLAG_PAGE) { > > + void *host; > > + > > + host = host_from_stream_offset(f, addr, flags); > > + if (!host) { > > + return -EINVAL; > > + } > > + > > + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); > > + } else if (flags & RAM_SAVE_FLAG_XBZRLE) { > > + void *host = host_from_stream_offset(f, addr, flags); > > + if (!host) { > > + return -EINVAL; > > + } > > + > > + if (load_xbzrle(f, addr, host) < 0) { > > + ret = -EINVAL; > > + goto done; > > + } > > + } > > + error = qemu_file_get_error(f); > > + if (error) { > > + ret = error; > > + goto done; > > + } > > + } while (!(flags & RAM_SAVE_FLAG_EOS)); > > + > > +done: > > + DPRINTF("Completed load of VM with exit code %d seq iteration " > > + "%" PRIu64 "\n", ret, seq_iter); > > + return ret; > > +} > > + > > +SaveVMHandlers savevm_ram_handlers = { > > + .save_live_setup = ram_save_setup, > > + .save_live_iterate = ram_save_iterate, > > + .save_live_complete = ram_save_complete, > > + .save_live_pending = ram_save_pending, > > + .load_state = ram_load, > > + .cancel = ram_migration_cancel, > > +}; > >
diff --git a/Makefile.target b/Makefile.target index ca657b3..54bc21b 100644 --- a/Makefile.target +++ b/Makefile.target @@ -108,7 +108,7 @@ CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y) CONFIG_NO_GET_MEMORY_MAPPING = $(if $(subst n,,$(CONFIG_HAVE_GET_MEMORY_MAPPING)),n,y) CONFIG_NO_CORE_DUMP = $(if $(subst n,,$(CONFIG_HAVE_CORE_DUMP)),n,y) -obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o +obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o savevm-ram.o obj-y += qtest.o obj-y += hw/ obj-$(CONFIG_KVM) += kvm-all.o diff --git a/arch_init.c b/arch_init.c index 98e2bc6..9943ed4 100644 --- a/arch_init.c +++ b/arch_init.c @@ -31,20 +31,15 @@ #include "config.h" #include "monitor/monitor.h" #include "sysemu/sysemu.h" -#include "qemu/bitops.h" -#include "qemu/bitmap.h" #include "sysemu/arch_init.h" #include "audio/audio.h" #include "hw/pc.h" #include "hw/pci/pci.h" #include "hw/audiodev.h" #include "sysemu/kvm.h" -#include "migration/migration.h" #include "exec/gdbstub.h" #include "hw/smbios.h" -#include "exec/address-spaces.h" #include "hw/pcspk.h" -#include "migration/page_cache.h" #include "qemu/config-file.h" #include "qmp-commands.h" #include "trace.h" @@ -103,38 +98,6 @@ int graphic_depth = 15; const uint32_t arch_type = QEMU_ARCH; -/***********************************************************/ -/* ram save/restore */ - -#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ -#define RAM_SAVE_FLAG_COMPRESS 0x02 -#define RAM_SAVE_FLAG_MEM_SIZE 0x04 -#define RAM_SAVE_FLAG_PAGE 0x08 -#define RAM_SAVE_FLAG_EOS 0x10 -#define RAM_SAVE_FLAG_CONTINUE 0x20 -#define RAM_SAVE_FLAG_XBZRLE 0x40 - -#ifdef __ALTIVEC__ -#include <altivec.h> -#define VECTYPE vector unsigned char -#define SPLAT(p) vec_splat(vec_ld(0, p), 0) -#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) -/* altivec.h may redefine the bool macro as vector type. - * Reset it to POSIX semantics. */ -#undef bool -#define bool _Bool -#elif defined __SSE2__ -#include <emmintrin.h> -#define VECTYPE __m128i -#define SPLAT(p) _mm_set1_epi8(*(p)) -#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) -#else -#define VECTYPE unsigned long -#define SPLAT(p) (*(p) * (~0UL / 255)) -#define ALL_EQ(v1, v2) ((v1) == (v2)) -#endif - - static struct defconfig_file { const char *filename; /* Indicates it is an user config file (disabled by -no-user-config) */ @@ -145,7 +108,6 @@ static struct defconfig_file { { NULL }, /* end of list */ }; - int qemu_read_default_config_files(bool userconfig) { int ret; @@ -164,731 +126,6 @@ int qemu_read_default_config_files(bool userconfig) return 0; } -static int is_dup_page(uint8_t *page) -{ - VECTYPE *p = (VECTYPE *)page; - VECTYPE val = SPLAT(page); - int i; - - for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) { - if (!ALL_EQ(val, p[i])) { - return 0; - } - } - - return 1; -} - -/* struct contains XBZRLE cache and a static page - used by the compression */ -static struct { - /* buffer used for XBZRLE encoding */ - uint8_t *encoded_buf; - /* buffer for storing page content */ - uint8_t *current_buf; - /* buffer used for XBZRLE decoding */ - uint8_t *decoded_buf; - /* Cache for XBZRLE */ - PageCache *cache; -} XBZRLE = { - .encoded_buf = NULL, - .current_buf = NULL, - .decoded_buf = NULL, - .cache = NULL, -}; - - -int64_t xbzrle_cache_resize(int64_t new_size) -{ - if (XBZRLE.cache != NULL) { - return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * - TARGET_PAGE_SIZE; - } - return pow2floor(new_size); -} - -/* accounting for migration statistics */ -typedef struct AccountingInfo { - uint64_t dup_pages; - uint64_t norm_pages; - uint64_t iterations; - uint64_t xbzrle_bytes; - uint64_t xbzrle_pages; - uint64_t xbzrle_cache_miss; - uint64_t xbzrle_overflows; -} AccountingInfo; - -static AccountingInfo acct_info; - -static void acct_clear(void) -{ - memset(&acct_info, 0, sizeof(acct_info)); -} - -uint64_t dup_mig_bytes_transferred(void) -{ - return acct_info.dup_pages * TARGET_PAGE_SIZE; -} - -uint64_t dup_mig_pages_transferred(void) -{ - return acct_info.dup_pages; -} - -uint64_t norm_mig_bytes_transferred(void) -{ - return acct_info.norm_pages * TARGET_PAGE_SIZE; -} - -uint64_t norm_mig_pages_transferred(void) -{ - return acct_info.norm_pages; -} - -uint64_t xbzrle_mig_bytes_transferred(void) -{ - return acct_info.xbzrle_bytes; -} - -uint64_t xbzrle_mig_pages_transferred(void) -{ - return acct_info.xbzrle_pages; -} - -uint64_t xbzrle_mig_pages_cache_miss(void) -{ - return acct_info.xbzrle_cache_miss; -} - -uint64_t xbzrle_mig_pages_overflow(void) -{ - return acct_info.xbzrle_overflows; -} - -static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, - int cont, int flag) -{ - size_t size; - - qemu_put_be64(f, offset | cont | flag); - size = 8; - - if (!cont) { - qemu_put_byte(f, strlen(block->idstr)); - qemu_put_buffer(f, (uint8_t *)block->idstr, - strlen(block->idstr)); - size += 1 + strlen(block->idstr); - } - return size; -} - -#define ENCODING_FLAG_XBZRLE 0x1 - -static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, - ram_addr_t current_addr, RAMBlock *block, - ram_addr_t offset, int cont, bool last_stage) -{ - int encoded_len = 0, bytes_sent = -1; - uint8_t *prev_cached_page; - - if (!cache_is_cached(XBZRLE.cache, current_addr)) { - if (!last_stage) { - cache_insert(XBZRLE.cache, current_addr, current_data); - } - acct_info.xbzrle_cache_miss++; - return -1; - } - - prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); - - /* save current buffer into memory */ - memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); - - /* XBZRLE encoding (if there is no overflow) */ - encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, - TARGET_PAGE_SIZE, XBZRLE.encoded_buf, - TARGET_PAGE_SIZE); - if (encoded_len == 0) { - DPRINTF("Skipping unmodified page\n"); - return 0; - } else if (encoded_len == -1) { - DPRINTF("Overflow\n"); - acct_info.xbzrle_overflows++; - /* update data in the cache */ - memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); - return -1; - } - - /* we need to update the data in the cache, in order to get the same data */ - if (!last_stage) { - memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); - } - - /* Send XBZRLE based compressed page */ - bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); - qemu_put_byte(f, ENCODING_FLAG_XBZRLE); - qemu_put_be16(f, encoded_len); - qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); - bytes_sent += encoded_len + 1 + 2; - acct_info.xbzrle_pages++; - acct_info.xbzrle_bytes += bytes_sent; - - return bytes_sent; -} - - -/* This is the last block that we have visited serching for dirty pages - */ -static RAMBlock *last_seen_block; -/* This is the last block from where we have sent data */ -static RAMBlock *last_sent_block; -static ram_addr_t last_offset; -static unsigned long *migration_bitmap; -static uint64_t migration_dirty_pages; -static uint32_t last_version; - -static inline -ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, - ram_addr_t start) -{ - unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; - unsigned long nr = base + (start >> TARGET_PAGE_BITS); - unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS); - - unsigned long next = find_next_bit(migration_bitmap, size, nr); - - if (next < size) { - clear_bit(next, migration_bitmap); - migration_dirty_pages--; - } - return (next - base) << TARGET_PAGE_BITS; -} - -static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, - ram_addr_t offset) -{ - bool ret; - int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; - - ret = test_and_set_bit(nr, migration_bitmap); - - if (!ret) { - migration_dirty_pages++; - } - return ret; -} - -/* Needs iothread lock! */ - -static void migration_bitmap_sync(void) -{ - RAMBlock *block; - ram_addr_t addr; - uint64_t num_dirty_pages_init = migration_dirty_pages; - MigrationState *s = migrate_get_current(); - static int64_t start_time; - static int64_t num_dirty_pages_period; - int64_t end_time; - - if (!start_time) { - start_time = qemu_get_clock_ms(rt_clock); - } - - trace_migration_bitmap_sync_start(); - memory_global_sync_dirty_bitmap(get_system_memory()); - - QTAILQ_FOREACH(block, &ram_list.blocks, next) { - for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { - if (memory_region_test_and_clear_dirty(block->mr, - addr, TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION)) { - migration_bitmap_set_dirty(block->mr, addr); - } - } - } - trace_migration_bitmap_sync_end(migration_dirty_pages - - num_dirty_pages_init); - num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; - end_time = qemu_get_clock_ms(rt_clock); - - /* more than 1 second = 1000 millisecons */ - if (end_time > start_time + 1000) { - s->dirty_pages_rate = num_dirty_pages_period * 1000 - / (end_time - start_time); - s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; - start_time = end_time; - num_dirty_pages_period = 0; - } -} - -/* - * ram_save_block: Writes a page of memory to the stream f - * - * Returns: The number of bytes written. - * 0 means no dirty pages - */ - -static int ram_save_block(QEMUFile *f, bool last_stage) -{ - RAMBlock *block = last_seen_block; - ram_addr_t offset = last_offset; - bool complete_round = false; - int bytes_sent = 0; - MemoryRegion *mr; - ram_addr_t current_addr; - - if (!block) - block = QTAILQ_FIRST(&ram_list.blocks); - - while (true) { - mr = block->mr; - offset = migration_bitmap_find_and_reset_dirty(mr, offset); - if (complete_round && block == last_seen_block && - offset >= last_offset) { - break; - } - if (offset >= block->length) { - offset = 0; - block = QTAILQ_NEXT(block, next); - if (!block) { - block = QTAILQ_FIRST(&ram_list.blocks); - complete_round = true; - } - } else { - uint8_t *p; - int cont = (block == last_sent_block) ? - RAM_SAVE_FLAG_CONTINUE : 0; - - p = memory_region_get_ram_ptr(mr) + offset; - - /* In doubt sent page as normal */ - bytes_sent = -1; - if (is_dup_page(p)) { - acct_info.dup_pages++; - bytes_sent = save_block_hdr(f, block, offset, cont, - RAM_SAVE_FLAG_COMPRESS); - qemu_put_byte(f, *p); - bytes_sent += 1; - } else if (migrate_use_xbzrle()) { - current_addr = block->offset + offset; - bytes_sent = save_xbzrle_page(f, p, current_addr, block, - offset, cont, last_stage); - if (!last_stage) { - p = get_cached_data(XBZRLE.cache, current_addr); - } - } - - /* XBZRLE overflow or normal page */ - if (bytes_sent == -1) { - bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); - qemu_put_buffer(f, p, TARGET_PAGE_SIZE); - bytes_sent += TARGET_PAGE_SIZE; - acct_info.norm_pages++; - } - - /* if page is unmodified, continue to the next */ - if (bytes_sent > 0) { - last_sent_block = block; - break; - } - } - } - last_seen_block = block; - last_offset = offset; - - return bytes_sent; -} - -static uint64_t bytes_transferred; - -static ram_addr_t ram_save_remaining(void) -{ - return migration_dirty_pages; -} - -uint64_t ram_bytes_remaining(void) -{ - return ram_save_remaining() * TARGET_PAGE_SIZE; -} - -uint64_t ram_bytes_transferred(void) -{ - return bytes_transferred; -} - -uint64_t ram_bytes_total(void) -{ - RAMBlock *block; - uint64_t total = 0; - - QTAILQ_FOREACH(block, &ram_list.blocks, next) - total += block->length; - - return total; -} - -static void migration_end(void) -{ - if (migration_bitmap) { - memory_global_dirty_log_stop(); - g_free(migration_bitmap); - migration_bitmap = NULL; - } - - if (XBZRLE.cache) { - cache_fini(XBZRLE.cache); - g_free(XBZRLE.cache); - g_free(XBZRLE.encoded_buf); - g_free(XBZRLE.current_buf); - g_free(XBZRLE.decoded_buf); - XBZRLE.cache = NULL; - } -} - -static void ram_migration_cancel(void *opaque) -{ - migration_end(); -} - -static void reset_ram_globals(void) -{ - last_seen_block = NULL; - last_sent_block = NULL; - last_offset = 0; - last_version = ram_list.version; -} - -#define MAX_WAIT 50 /* ms, half buffered_file limit */ - -static int ram_save_setup(QEMUFile *f, void *opaque) -{ - RAMBlock *block; - int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; - - migration_bitmap = bitmap_new(ram_pages); - bitmap_set(migration_bitmap, 0, ram_pages); - migration_dirty_pages = ram_pages; - - if (migrate_use_xbzrle()) { - XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / - TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE); - if (!XBZRLE.cache) { - DPRINTF("Error creating cache\n"); - return -1; - } - XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); - XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); - acct_clear(); - } - - qemu_mutex_lock_iothread(); - qemu_mutex_lock_ramlist(); - bytes_transferred = 0; - reset_ram_globals(); - - memory_global_dirty_log_start(); - migration_bitmap_sync(); - qemu_mutex_unlock_iothread(); - - qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); - - QTAILQ_FOREACH(block, &ram_list.blocks, next) { - qemu_put_byte(f, strlen(block->idstr)); - qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); - qemu_put_be64(f, block->length); - } - - qemu_mutex_unlock_ramlist(); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - - return 0; -} - -static int ram_save_iterate(QEMUFile *f, void *opaque) -{ - int ret; - int i; - int64_t t0; - int total_sent = 0; - - qemu_mutex_lock_ramlist(); - - if (ram_list.version != last_version) { - reset_ram_globals(); - } - - t0 = qemu_get_clock_ns(rt_clock); - i = 0; - while ((ret = qemu_file_rate_limit(f)) == 0) { - int bytes_sent; - - bytes_sent = ram_save_block(f, false); - /* no more blocks to sent */ - if (bytes_sent == 0) { - break; - } - total_sent += bytes_sent; - acct_info.iterations++; - /* we want to check in the 1st loop, just in case it was the 1st time - and we had to sync the dirty bitmap. - qemu_get_clock_ns() is a bit expensive, so we only check each some - iterations - */ - if ((i & 63) == 0) { - uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000; - if (t1 > MAX_WAIT) { - DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", - t1, i); - break; - } - } - i++; - } - - qemu_mutex_unlock_ramlist(); - - if (ret < 0) { - bytes_transferred += total_sent; - return ret; - } - - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - total_sent += 8; - bytes_transferred += total_sent; - - return total_sent; -} - -static int ram_save_complete(QEMUFile *f, void *opaque) -{ - qemu_mutex_lock_ramlist(); - migration_bitmap_sync(); - - /* try transferring iterative blocks of memory */ - - /* flush all remaining blocks regardless of rate limiting */ - while (true) { - int bytes_sent; - - bytes_sent = ram_save_block(f, true); - /* no more blocks to sent */ - if (bytes_sent == 0) { - break; - } - bytes_transferred += bytes_sent; - } - migration_end(); - - qemu_mutex_unlock_ramlist(); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - - return 0; -} - -static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) -{ - uint64_t remaining_size; - - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; - - if (remaining_size < max_size) { - qemu_mutex_lock_iothread(); - migration_bitmap_sync(); - qemu_mutex_unlock_iothread(); - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; - } - return remaining_size; -} - -static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) -{ - int ret, rc = 0; - unsigned int xh_len; - int xh_flags; - - if (!XBZRLE.decoded_buf) { - XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); - } - - /* extract RLE header */ - xh_flags = qemu_get_byte(f); - xh_len = qemu_get_be16(f); - - if (xh_flags != ENCODING_FLAG_XBZRLE) { - fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); - return -1; - } - - if (xh_len > TARGET_PAGE_SIZE) { - fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); - return -1; - } - /* load data and decode */ - qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); - - /* decode RLE */ - ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, - TARGET_PAGE_SIZE); - if (ret == -1) { - fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); - rc = -1; - } else if (ret > TARGET_PAGE_SIZE) { - fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", - ret, TARGET_PAGE_SIZE); - abort(); - } - - return rc; -} - -static inline void *host_from_stream_offset(QEMUFile *f, - ram_addr_t offset, - int flags) -{ - static RAMBlock *block = NULL; - char id[256]; - uint8_t len; - - if (flags & RAM_SAVE_FLAG_CONTINUE) { - if (!block) { - fprintf(stderr, "Ack, bad migration stream!\n"); - return NULL; - } - - return memory_region_get_ram_ptr(block->mr) + offset; - } - - len = qemu_get_byte(f); - qemu_get_buffer(f, (uint8_t *)id, len); - id[len] = 0; - - QTAILQ_FOREACH(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id))) - return memory_region_get_ram_ptr(block->mr) + offset; - } - - fprintf(stderr, "Can't find block %s!\n", id); - return NULL; -} - -static int ram_load(QEMUFile *f, void *opaque, int version_id) -{ - ram_addr_t addr; - int flags, ret = 0; - int error; - static uint64_t seq_iter; - - seq_iter++; - - if (version_id < 4 || version_id > 4) { - return -EINVAL; - } - - do { - addr = qemu_get_be64(f); - - flags = addr & ~TARGET_PAGE_MASK; - addr &= TARGET_PAGE_MASK; - - if (flags & RAM_SAVE_FLAG_MEM_SIZE) { - if (version_id == 4) { - /* Synchronize RAM block list */ - char id[256]; - ram_addr_t length; - ram_addr_t total_ram_bytes = addr; - - while (total_ram_bytes) { - RAMBlock *block; - uint8_t len; - - len = qemu_get_byte(f); - qemu_get_buffer(f, (uint8_t *)id, len); - id[len] = 0; - length = qemu_get_be64(f); - - QTAILQ_FOREACH(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id))) { - if (block->length != length) { - ret = -EINVAL; - goto done; - } - break; - } - } - - if (!block) { - fprintf(stderr, "Unknown ramblock \"%s\", cannot " - "accept migration\n", id); - ret = -EINVAL; - goto done; - } - - total_ram_bytes -= length; - } - } - } - - if (flags & RAM_SAVE_FLAG_COMPRESS) { - void *host; - uint8_t ch; - - host = host_from_stream_offset(f, addr, flags); - if (!host) { - return -EINVAL; - } - - ch = qemu_get_byte(f); - memset(host, ch, TARGET_PAGE_SIZE); -#ifndef _WIN32 - if (ch == 0 && - (!kvm_enabled() || kvm_has_sync_mmu()) && - getpagesize() <= TARGET_PAGE_SIZE) { - qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); - } -#endif - } else if (flags & RAM_SAVE_FLAG_PAGE) { - void *host; - - host = host_from_stream_offset(f, addr, flags); - if (!host) { - return -EINVAL; - } - - qemu_get_buffer(f, host, TARGET_PAGE_SIZE); - } else if (flags & RAM_SAVE_FLAG_XBZRLE) { - void *host = host_from_stream_offset(f, addr, flags); - if (!host) { - return -EINVAL; - } - - if (load_xbzrle(f, addr, host) < 0) { - ret = -EINVAL; - goto done; - } - } - error = qemu_file_get_error(f); - if (error) { - ret = error; - goto done; - } - } while (!(flags & RAM_SAVE_FLAG_EOS)); - -done: - DPRINTF("Completed load of VM with exit code %d seq iteration " - "%" PRIu64 "\n", ret, seq_iter); - return ret; -} - -SaveVMHandlers savevm_ram_handlers = { - .save_live_setup = ram_save_setup, - .save_live_iterate = ram_save_iterate, - .save_live_complete = ram_save_complete, - .save_live_pending = ram_save_pending, - .load_state = ram_load, - .cancel = ram_migration_cancel, -}; - #ifdef HAS_AUDIO struct soundhw { const char *name; diff --git a/savevm-ram.c b/savevm-ram.c new file mode 100644 index 0000000..cea656c --- /dev/null +++ b/savevm-ram.c @@ -0,0 +1,804 @@ +/* + * RAM Migration support + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include <stdint.h> +#include <stdarg.h> +#include <stdlib.h> +#ifndef _WIN32 +#include <sys/types.h> +#include <sys/mman.h> +#endif +#include "sysemu/sysemu.h" +#include "qemu/bitops.h" +#include "qemu/bitmap.h" +#include "sysemu/kvm.h" +#include "migration/migration.h" +#include "exec/address-spaces.h" +#include "migration/page_cache.h" +#include "migration/qemu-file.h" +#include "trace.h" +#include "exec/cpu-all.h" + +#ifdef DEBUG_ARCH_INIT +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +/***********************************************************/ +/* ram save/restore */ + +#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ +#define RAM_SAVE_FLAG_COMPRESS 0x02 +#define RAM_SAVE_FLAG_MEM_SIZE 0x04 +#define RAM_SAVE_FLAG_PAGE 0x08 +#define RAM_SAVE_FLAG_EOS 0x10 +#define RAM_SAVE_FLAG_CONTINUE 0x20 +#define RAM_SAVE_FLAG_XBZRLE 0x40 + +#ifdef __ALTIVEC__ +#include <altivec.h> +#define VECTYPE vector unsigned char +#define SPLAT(p) vec_splat(vec_ld(0, p), 0) +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) +/* altivec.h may redefine the bool macro as vector type. + * Reset it to POSIX semantics. */ +#undef bool +#define bool _Bool +#elif defined __SSE2__ +#include <emmintrin.h> +#define VECTYPE __m128i +#define SPLAT(p) _mm_set1_epi8(*(p)) +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) +#else +#define VECTYPE unsigned long +#define SPLAT(p) (*(p) * (~0UL / 255)) +#define ALL_EQ(v1, v2) ((v1) == (v2)) +#endif + +static int is_dup_page(uint8_t *page) +{ + VECTYPE *p = (VECTYPE *)page; + VECTYPE val = SPLAT(page); + int i; + + for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) { + if (!ALL_EQ(val, p[i])) { + return 0; + } + } + + return 1; +} + +/* struct contains XBZRLE cache and a static page + used by the compression */ +static struct { + /* buffer used for XBZRLE encoding */ + uint8_t *encoded_buf; + /* buffer for storing page content */ + uint8_t *current_buf; + /* buffer used for XBZRLE decoding */ + uint8_t *decoded_buf; + /* Cache for XBZRLE */ + PageCache *cache; +} XBZRLE = { + .encoded_buf = NULL, + .current_buf = NULL, + .decoded_buf = NULL, + .cache = NULL, +}; + + +int64_t xbzrle_cache_resize(int64_t new_size) +{ + if (XBZRLE.cache != NULL) { + return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * + TARGET_PAGE_SIZE; + } + return pow2floor(new_size); +} + +/* accounting for migration statistics */ +typedef struct AccountingInfo { + uint64_t dup_pages; + uint64_t norm_pages; + uint64_t iterations; + uint64_t xbzrle_bytes; + uint64_t xbzrle_pages; + uint64_t xbzrle_cache_miss; + uint64_t xbzrle_overflows; +} AccountingInfo; + +static AccountingInfo acct_info; + +static void acct_clear(void) +{ + memset(&acct_info, 0, sizeof(acct_info)); +} + +uint64_t dup_mig_bytes_transferred(void) +{ + return acct_info.dup_pages * TARGET_PAGE_SIZE; +} + +uint64_t dup_mig_pages_transferred(void) +{ + return acct_info.dup_pages; +} + +uint64_t norm_mig_bytes_transferred(void) +{ + return acct_info.norm_pages * TARGET_PAGE_SIZE; +} + +uint64_t norm_mig_pages_transferred(void) +{ + return acct_info.norm_pages; +} + +uint64_t xbzrle_mig_bytes_transferred(void) +{ + return acct_info.xbzrle_bytes; +} + +uint64_t xbzrle_mig_pages_transferred(void) +{ + return acct_info.xbzrle_pages; +} + +uint64_t xbzrle_mig_pages_cache_miss(void) +{ + return acct_info.xbzrle_cache_miss; +} + +uint64_t xbzrle_mig_pages_overflow(void) +{ + return acct_info.xbzrle_overflows; +} + +static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, + int cont, int flag) +{ + size_t size; + + qemu_put_be64(f, offset | cont | flag); + size = 8; + + if (!cont) { + qemu_put_byte(f, strlen(block->idstr)); + qemu_put_buffer(f, (uint8_t *)block->idstr, + strlen(block->idstr)); + size += 1 + strlen(block->idstr); + } + return size; +} + +#define ENCODING_FLAG_XBZRLE 0x1 + +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, + ram_addr_t current_addr, RAMBlock *block, + ram_addr_t offset, int cont, bool last_stage) +{ + int encoded_len = 0, bytes_sent = -1; + uint8_t *prev_cached_page; + + if (!cache_is_cached(XBZRLE.cache, current_addr)) { + if (!last_stage) { + cache_insert(XBZRLE.cache, current_addr, current_data); + } + acct_info.xbzrle_cache_miss++; + return -1; + } + + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); + + /* save current buffer into memory */ + memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); + + /* XBZRLE encoding (if there is no overflow) */ + encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); + if (encoded_len == 0) { + DPRINTF("Skipping unmodified page\n"); + return 0; + } else if (encoded_len == -1) { + DPRINTF("Overflow\n"); + acct_info.xbzrle_overflows++; + /* update data in the cache */ + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); + return -1; + } + + /* we need to update the data in the cache, in order to get the same data */ + if (!last_stage) { + memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); + } + + /* Send XBZRLE based compressed page */ + bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); + qemu_put_byte(f, ENCODING_FLAG_XBZRLE); + qemu_put_be16(f, encoded_len); + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); + bytes_sent += encoded_len + 1 + 2; + acct_info.xbzrle_pages++; + acct_info.xbzrle_bytes += bytes_sent; + + return bytes_sent; +} + + +/* This is the last block that we have visited serching for dirty pages + */ +static RAMBlock *last_seen_block; +/* This is the last block from where we have sent data */ +static RAMBlock *last_sent_block; +static ram_addr_t last_offset; +static unsigned long *migration_bitmap; +static uint64_t migration_dirty_pages; +static uint32_t last_version; + +static inline +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, + ram_addr_t start) +{ + unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; + unsigned long nr = base + (start >> TARGET_PAGE_BITS); + unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS); + + unsigned long next = find_next_bit(migration_bitmap, size, nr); + + if (next < size) { + clear_bit(next, migration_bitmap); + migration_dirty_pages--; + } + return (next - base) << TARGET_PAGE_BITS; +} + +static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, + ram_addr_t offset) +{ + bool ret; + int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; + + ret = test_and_set_bit(nr, migration_bitmap); + + if (!ret) { + migration_dirty_pages++; + } + return ret; +} + +/* Needs iothread lock! */ + +static void migration_bitmap_sync(void) +{ + RAMBlock *block; + ram_addr_t addr; + uint64_t num_dirty_pages_init = migration_dirty_pages; + MigrationState *s = migrate_get_current(); + static int64_t start_time; + static int64_t num_dirty_pages_period; + int64_t end_time; + + if (!start_time) { + start_time = qemu_get_clock_ms(rt_clock); + } + + trace_migration_bitmap_sync_start(); + memory_global_sync_dirty_bitmap(get_system_memory()); + + QTAILQ_FOREACH(block, &ram_list.blocks, next) { + for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { + if (memory_region_test_and_clear_dirty(block->mr, + addr, TARGET_PAGE_SIZE, + DIRTY_MEMORY_MIGRATION)) { + migration_bitmap_set_dirty(block->mr, addr); + } + } + } + trace_migration_bitmap_sync_end(migration_dirty_pages + - num_dirty_pages_init); + num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; + end_time = qemu_get_clock_ms(rt_clock); + + /* more than 1 second = 1000 millisecons */ + if (end_time > start_time + 1000) { + s->dirty_pages_rate = num_dirty_pages_period * 1000 + / (end_time - start_time); + s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; + start_time = end_time; + num_dirty_pages_period = 0; + } +} + +/* + * ram_save_block: Writes a page of memory to the stream f + * + * Returns: The number of bytes written. + * 0 means no dirty pages + */ + +static int ram_save_block(QEMUFile *f, bool last_stage) +{ + RAMBlock *block = last_seen_block; + ram_addr_t offset = last_offset; + bool complete_round = false; + int bytes_sent = 0; + MemoryRegion *mr; + ram_addr_t current_addr; + + if (!block) + block = QTAILQ_FIRST(&ram_list.blocks); + + while (true) { + mr = block->mr; + offset = migration_bitmap_find_and_reset_dirty(mr, offset); + if (complete_round && block == last_seen_block && + offset >= last_offset) { + break; + } + if (offset >= block->length) { + offset = 0; + block = QTAILQ_NEXT(block, next); + if (!block) { + block = QTAILQ_FIRST(&ram_list.blocks); + complete_round = true; + } + } else { + uint8_t *p; + int cont = (block == last_sent_block) ? + RAM_SAVE_FLAG_CONTINUE : 0; + + p = memory_region_get_ram_ptr(mr) + offset; + + /* In doubt sent page as normal */ + bytes_sent = -1; + if (is_dup_page(p)) { + acct_info.dup_pages++; + bytes_sent = save_block_hdr(f, block, offset, cont, + RAM_SAVE_FLAG_COMPRESS); + qemu_put_byte(f, *p); + bytes_sent += 1; + } else if (migrate_use_xbzrle()) { + current_addr = block->offset + offset; + bytes_sent = save_xbzrle_page(f, p, current_addr, block, + offset, cont, last_stage); + if (!last_stage) { + p = get_cached_data(XBZRLE.cache, current_addr); + } + } + + /* XBZRLE overflow or normal page */ + if (bytes_sent == -1) { + bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); + qemu_put_buffer(f, p, TARGET_PAGE_SIZE); + bytes_sent += TARGET_PAGE_SIZE; + acct_info.norm_pages++; + } + + /* if page is unmodified, continue to the next */ + if (bytes_sent > 0) { + last_sent_block = block; + break; + } + } + } + last_seen_block = block; + last_offset = offset; + + return bytes_sent; +} + +static uint64_t bytes_transferred; + +static ram_addr_t ram_save_remaining(void) +{ + return migration_dirty_pages; +} + +uint64_t ram_bytes_remaining(void) +{ + return ram_save_remaining() * TARGET_PAGE_SIZE; +} + +uint64_t ram_bytes_transferred(void) +{ + return bytes_transferred; +} + +uint64_t ram_bytes_total(void) +{ + RAMBlock *block; + uint64_t total = 0; + + QTAILQ_FOREACH(block, &ram_list.blocks, next) + total += block->length; + + return total; +} + +static void migration_end(void) +{ + if (migration_bitmap) { + memory_global_dirty_log_stop(); + g_free(migration_bitmap); + migration_bitmap = NULL; + } + + if (XBZRLE.cache) { + cache_fini(XBZRLE.cache); + g_free(XBZRLE.cache); + g_free(XBZRLE.encoded_buf); + g_free(XBZRLE.current_buf); + g_free(XBZRLE.decoded_buf); + XBZRLE.cache = NULL; + } +} + +static void ram_migration_cancel(void *opaque) +{ + migration_end(); +} + +static void reset_ram_globals(void) +{ + last_seen_block = NULL; + last_sent_block = NULL; + last_offset = 0; + last_version = ram_list.version; +} + +#define MAX_WAIT 50 /* ms, half buffered_file limit */ + +static int ram_save_setup(QEMUFile *f, void *opaque) +{ + RAMBlock *block; + int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + + migration_bitmap = bitmap_new(ram_pages); + bitmap_set(migration_bitmap, 0, ram_pages); + migration_dirty_pages = ram_pages; + + if (migrate_use_xbzrle()) { + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / + TARGET_PAGE_SIZE, + TARGET_PAGE_SIZE); + if (!XBZRLE.cache) { + DPRINTF("Error creating cache\n"); + return -1; + } + XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); + XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); + acct_clear(); + } + + qemu_mutex_lock_iothread(); + qemu_mutex_lock_ramlist(); + bytes_transferred = 0; + reset_ram_globals(); + + memory_global_dirty_log_start(); + migration_bitmap_sync(); + qemu_mutex_unlock_iothread(); + + qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); + + QTAILQ_FOREACH(block, &ram_list.blocks, next) { + qemu_put_byte(f, strlen(block->idstr)); + qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); + qemu_put_be64(f, block->length); + } + + qemu_mutex_unlock_ramlist(); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + + return 0; +} + +static int ram_save_iterate(QEMUFile *f, void *opaque) +{ + int ret; + int i; + int64_t t0; + int total_sent = 0; + + qemu_mutex_lock_ramlist(); + + if (ram_list.version != last_version) { + reset_ram_globals(); + } + + t0 = qemu_get_clock_ns(rt_clock); + i = 0; + while ((ret = qemu_file_rate_limit(f)) == 0) { + int bytes_sent; + + bytes_sent = ram_save_block(f, false); + /* no more blocks to sent */ + if (bytes_sent == 0) { + break; + } + total_sent += bytes_sent; + acct_info.iterations++; + /* we want to check in the 1st loop, just in case it was the 1st time + and we had to sync the dirty bitmap. + qemu_get_clock_ns() is a bit expensive, so we only check each some + iterations + */ + if ((i & 63) == 0) { + uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000; + if (t1 > MAX_WAIT) { + DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", + t1, i); + break; + } + } + i++; + } + + qemu_mutex_unlock_ramlist(); + + if (ret < 0) { + bytes_transferred += total_sent; + return ret; + } + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + total_sent += 8; + bytes_transferred += total_sent; + + return total_sent; +} + +static int ram_save_complete(QEMUFile *f, void *opaque) +{ + qemu_mutex_lock_ramlist(); + migration_bitmap_sync(); + + /* try transferring iterative blocks of memory */ + + /* flush all remaining blocks regardless of rate limiting */ + while (true) { + int bytes_sent; + + bytes_sent = ram_save_block(f, true); + /* no more blocks to sent */ + if (bytes_sent == 0) { + break; + } + bytes_transferred += bytes_sent; + } + migration_end(); + + qemu_mutex_unlock_ramlist(); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + + return 0; +} + +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) +{ + uint64_t remaining_size; + + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + + if (remaining_size < max_size) { + qemu_mutex_lock_iothread(); + migration_bitmap_sync(); + qemu_mutex_unlock_iothread(); + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + } + return remaining_size; +} + +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) +{ + int ret, rc = 0; + unsigned int xh_len; + int xh_flags; + + if (!XBZRLE.decoded_buf) { + XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); + } + + /* extract RLE header */ + xh_flags = qemu_get_byte(f); + xh_len = qemu_get_be16(f); + + if (xh_flags != ENCODING_FLAG_XBZRLE) { + fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); + return -1; + } + + if (xh_len > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); + return -1; + } + /* load data and decode */ + qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); + + /* decode RLE */ + ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, + TARGET_PAGE_SIZE); + if (ret == -1) { + fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); + rc = -1; + } else if (ret > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", + ret, TARGET_PAGE_SIZE); + abort(); + } + + return rc; +} + +static inline void *host_from_stream_offset(QEMUFile *f, + ram_addr_t offset, + int flags) +{ + static RAMBlock *block = NULL; + char id[256]; + uint8_t len; + + if (flags & RAM_SAVE_FLAG_CONTINUE) { + if (!block) { + fprintf(stderr, "Ack, bad migration stream!\n"); + return NULL; + } + + return memory_region_get_ram_ptr(block->mr) + offset; + } + + len = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t *)id, len); + id[len] = 0; + + QTAILQ_FOREACH(block, &ram_list.blocks, next) { + if (!strncmp(id, block->idstr, sizeof(id))) + return memory_region_get_ram_ptr(block->mr) + offset; + } + + fprintf(stderr, "Can't find block %s!\n", id); + return NULL; +} + +static int ram_load(QEMUFile *f, void *opaque, int version_id) +{ + ram_addr_t addr; + int flags, ret = 0; + int error; + static uint64_t seq_iter; + + seq_iter++; + + if (version_id < 4 || version_id > 4) { + return -EINVAL; + } + + do { + addr = qemu_get_be64(f); + + flags = addr & ~TARGET_PAGE_MASK; + addr &= TARGET_PAGE_MASK; + + if (flags & RAM_SAVE_FLAG_MEM_SIZE) { + if (version_id == 4) { + /* Synchronize RAM block list */ + char id[256]; + ram_addr_t length; + ram_addr_t total_ram_bytes = addr; + + while (total_ram_bytes) { + RAMBlock *block; + uint8_t len; + + len = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t *)id, len); + id[len] = 0; + length = qemu_get_be64(f); + + QTAILQ_FOREACH(block, &ram_list.blocks, next) { + if (!strncmp(id, block->idstr, sizeof(id))) { + if (block->length != length) { + ret = -EINVAL; + goto done; + } + break; + } + } + + if (!block) { + fprintf(stderr, "Unknown ramblock \"%s\", cannot " + "accept migration\n", id); + ret = -EINVAL; + goto done; + } + + total_ram_bytes -= length; + } + } + } + + if (flags & RAM_SAVE_FLAG_COMPRESS) { + void *host; + uint8_t ch; + + host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } + + ch = qemu_get_byte(f); + memset(host, ch, TARGET_PAGE_SIZE); +#ifndef _WIN32 + if (ch == 0 && + (!kvm_enabled() || kvm_has_sync_mmu()) && + getpagesize() <= TARGET_PAGE_SIZE) { + qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); + } +#endif + } else if (flags & RAM_SAVE_FLAG_PAGE) { + void *host; + + host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } + + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + } else if (flags & RAM_SAVE_FLAG_XBZRLE) { + void *host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } + + if (load_xbzrle(f, addr, host) < 0) { + ret = -EINVAL; + goto done; + } + } + error = qemu_file_get_error(f); + if (error) { + ret = error; + goto done; + } + } while (!(flags & RAM_SAVE_FLAG_EOS)); + +done: + DPRINTF("Completed load of VM with exit code %d seq iteration " + "%" PRIu64 "\n", ret, seq_iter); + return ret; +} + +SaveVMHandlers savevm_ram_handlers = { + .save_live_setup = ram_save_setup, + .save_live_iterate = ram_save_iterate, + .save_live_complete = ram_save_complete, + .save_live_pending = ram_save_pending, + .load_state = ram_load, + .cancel = ram_migration_cancel, +};
Move RAM migration code from arch_init to savevm-ram. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- Note: this is on top of Juan's pull request Changes from v1: - renamed source file, rebased on top of migration.next as suggested by Paolo Makefile.target | 2 +- arch_init.c | 763 ----------------------------------------------------- savevm-ram.c | 804 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 805 insertions(+), 764 deletions(-) create mode 100644 savevm-ram.c