@@ -40,6 +40,7 @@
#include "hw/audio/audio.h"
#include "sysemu/kvm.h"
#include "migration/migration.h"
+#include "migration/postcopy-ram.h"
#include "hw/i386/smbios.h"
#include "exec/address-spaces.h"
#include "hw/audio/pcspk.h"
@@ -413,9 +414,15 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
return bytes_sent;
}
+/* mr: The region to search for dirty pages in
+ * start: Start address (typically so we can continue from previous page)
+ * bitoffset: Pointer into which to store the offset into the dirty map
+ * at which the bit was found.
+ */
static inline
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
- ram_addr_t start)
+ ram_addr_t start,
+ unsigned long *bitoffset)
{
unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
unsigned long nr = base + (start >> TARGET_PAGE_BITS);
@@ -434,6 +441,7 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
clear_bit(next, migration_bitmap);
migration_dirty_pages--;
}
+ *bitoffset = next;
return (next - base) << TARGET_PAGE_BITS;
}
@@ -562,6 +570,19 @@ static void migration_bitmap_sync(void)
}
}
+static RAMBlock *ram_find_block(const char *id)
+{
+ RAMBlock *block;
+
+ QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+ if (!strcmp(id, block->idstr)) {
+ return block;
+ }
+ }
+
+ return NULL;
+}
+
/*
* ram_save_page: Send the given page to the stream
*
@@ -650,13 +671,14 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
bool complete_round = false;
int bytes_sent = 0;
MemoryRegion *mr;
+ unsigned long bitoffset;
if (!block)
block = QTAILQ_FIRST(&ram_list.blocks);
while (true) {
mr = block->mr;
- offset = migration_bitmap_find_and_reset_dirty(mr, offset);
+ offset = migration_bitmap_find_and_reset_dirty(mr, offset, &bitoffset);
if (complete_round && block == last_seen_block &&
offset >= last_offset) {
break;
@@ -674,6 +696,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
/* if page is unmodified, continue to the next */
if (bytes_sent > 0) {
+ MigrationState *s = migrate_get_current();
+ if (s->sentmap) {
+ set_bit(bitoffset, s->sentmap);
+ }
+
last_sent_block = block;
break;
}
@@ -733,12 +760,19 @@ void free_xbzrle_decoded_buf(void)
static void migration_end(void)
{
+ MigrationState *s = migrate_get_current();
+
if (migration_bitmap) {
memory_global_dirty_log_stop();
g_free(migration_bitmap);
migration_bitmap = NULL;
}
+ if (s->sentmap) {
+ g_free(s->sentmap);
+ s->sentmap = NULL;
+ }
+
XBZRLE_cache_lock();
if (XBZRLE.cache) {
cache_fini(XBZRLE.cache);
@@ -806,6 +840,123 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
}
}
+/*
+ * Utility for the outgoing postcopy code; this performs
+ * sentmap &= migration_bitmap
+ * returning the length of the bitmap
+ */
+int64_t ram_mask_postcopy_bitmap(MigrationState *ms)
+{
+ int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+
+ migration_bitmap_sync();
+ bitmap_and(ms->sentmap, ms->sentmap, migration_bitmap, ram_pages);
+ return ram_pages;
+}
+
+/*
+ * Utility for the outgoing postcopy code.
+ * Calls postcopy_send_discard_bm_ram for each RAMBlock
+ * passing it bitmap indexes and name.
+ * Returns: 0 on success
+ * (qemu_ram_foreach_block ends up passing unscaled lengths
+ * which would mean postcopy code would have to deal with target page)
+ */
+int ram_postcopy_each_ram_discard(MigrationState *ms)
+{
+ struct RAMBlock *block;
+ int ret;
+
+ QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+ /*
+ * Postcopy sends chunks of bitmap over the wire, but it
+ * just needs indexes at this point, avoids it having
+ * target page specific code.
+ */
+ unsigned long first, last;
+ first = block->offset >> TARGET_PAGE_BITS;
+ last = (block->offset + (block->length-1)) >> TARGET_PAGE_BITS;
+ ret = postcopy_send_discard_bm_ram(ms, block->idstr, first, last);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * At the start of the postcopy phase of migration, any now-dirty
+ * precopied pages are discarded.
+ *
+ * start..end is an inclusive range of bits indexed in the source
+ * VMs bitmap for this RAMBlock, source_target_page_bits tells
+ * us what one of those bits represents.
+ *
+ * start/end are offsets from the start of the bitmap for RAMBlock 'block_name'
+ *
+ * Returns 0 on success.
+ */
+int ram_discard_range(MigrationIncomingState *mis,
+ const char *block_name,
+ int source_target_page_bits,
+ uint64_t start, uint64_t end)
+{
+ assert(end >= start);
+ unsigned int bitdif;
+
+ RAMBlock *rb = ram_find_block(block_name);
+
+ if (!rb) {
+ error_report("ram_discard_range: Failed to find block '%s'",
+ block_name);
+ return -1;
+ }
+
+ if (source_target_page_bits != TARGET_PAGE_BITS) {
+ if (source_target_page_bits < TARGET_PAGE_BITS) {
+ /*
+ * e.g. source is 4K and we're 64k - we'll have to discard
+ * on the larger boundary
+ * e.g. a range of 70K...132K we would discard from
+ * 64K..192K, so round start down, and end up
+ */
+ bitdif = TARGET_PAGE_BITS - source_target_page_bits;
+ start = start >> bitdif;
+ if (end & ((1<<bitdif)-1)) {
+ end = end >> bitdif;
+ end++;
+ } else {
+ end = end >> bitdif;
+ }
+
+ } else {
+ /* e.g. source is 64K and we're 4K - easy just scale the indexes */
+ bitdif = source_target_page_bits - TARGET_PAGE_BITS;
+
+ start = start << bitdif;
+ end = end << bitdif;
+ }
+ }
+
+ uint64_t index_offset = rb->offset >> TARGET_PAGE_BITS;
+ postcopy_pmi_discard_range(mis, start + index_offset, (end - start) + 1);
+
+ /* +1 gives the byte after the end of the last page to be discarded */
+ ram_addr_t end_offset = (end+1) << TARGET_PAGE_BITS;
+ uint8_t *host_startaddr = rb->host + (start << TARGET_PAGE_BITS);
+ uint8_t *host_endaddr;
+
+ if (end_offset <= rb->length) {
+ host_endaddr = rb->host + (end_offset-1);
+ return postcopy_ram_discard_range(mis, host_startaddr, host_endaddr);
+ } else {
+ error_report("ram_discard_range: Overrun block '%s' (%zu/%zu/%zu)",
+ block_name, start, end, rb->length);
+ return -1;
+ }
+}
+
static int ram_save_setup(QEMUFile *f, void *opaque)
{
RAMBlock *block;
@@ -844,7 +995,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
acct_clear();
}
-
qemu_mutex_lock_iothread();
qemu_mutex_lock_ramlist();
bytes_transferred = 0;
@@ -854,6 +1004,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
migration_bitmap = bitmap_new(ram_bitmap_pages);
bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
+ if (migrate_postcopy_ram()) {
+ MigrationState *s = migrate_get_current();
+ s->sentmap = bitmap_new(ram_bitmap_pages);
+ bitmap_clear(s->sentmap, 0, ram_bitmap_pages);
+ }
+
/*
* Count the total number of pages used by ram blocks not including any
* gaps due to alignment or unplugs.
@@ -171,6 +171,11 @@ double xbzrle_mig_cache_miss_rate(void);
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
void ram_debug_dump_bitmap(unsigned long *todump, bool expected);
+int64_t ram_mask_postcopy_bitmap(MigrationState *ms);
+int ram_postcopy_each_ram_discard(MigrationState *ms);
+int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
+ int source_target_page_bits,
+ uint64_t start, uint64_t end);
/**
* @migrate_add_blocker - prevent migration from proceeding
@@ -13,7 +13,27 @@
#ifndef QEMU_POSTCOPY_RAM_H
#define QEMU_POSTCOPY_RAM_H
+#include "migration/migration.h"
+
/* Return 0 if the host supports everything we need to do postcopy-ram */
int postcopy_ram_hosttest(void);
+/* Send the list of sent-but-dirty pages */
+int postcopy_send_discard_bitmap(MigrationState *ms);
+
+/*
+ * Discard the contents of memory start..end inclusive.
+ * We can assume that if we've been called postcopy_ram_hosttest returned true
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+ uint8_t *end);
+
+
+/*
+ * Called back from arch_init's ram_postcopy_each_ram_discard to handle
+ * discarding one RAMBlock's pre-postcopy dirty pages
+ */
+int postcopy_send_discard_bm_ram(MigrationState *ms, const char *name,
+ unsigned long start, unsigned long end);
+
#endif
@@ -22,6 +22,7 @@
#include "block/block.h"
#include "qemu/sockets.h"
#include "migration/block.h"
+#include "migration/postcopy-ram.h"
#include "qemu/thread.h"
#include "qmp-commands.h"
#include "trace.h"
@@ -928,6 +929,7 @@ static void *migration_thread(void *opaque)
} else {
int ret;
+ DPRINTF("done iterating\n");
qemu_mutex_lock_iothread();
start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
@@ -23,6 +23,7 @@
#include "qemu-common.h"
#include "migration/migration.h"
#include "migration/postcopy-ram.h"
+#include "sysemu/sysemu.h"
//#define DEBUG_POSTCOPY
@@ -116,6 +117,21 @@ int postcopy_ram_hosttest(void)
return 0;
}
+/*
+ * Discard the contents of memory start..end inclusive.
+ * We can assume that if we've been called postcopy_ram_hosttest returned true
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+ uint8_t *end)
+{
+ if (madvise(start, (end-start)+1, MADV_DONTNEED)) {
+ perror("postcopy_ram_discard_range MADV_DONTNEED");
+ return -1;
+ }
+
+ return 0;
+}
+
#else
/* No target OS support, stubs just fail */
@@ -125,5 +141,145 @@ int postcopy_ram_hosttest(void)
return -1;
}
+int postcopy_ram_discard_range(MigrationIncomingState *mis, void *start,
+ void *end)
+{
+ error_report("postcopy_ram_discard_range: No OS support");
+ return -1;
+}
+#endif
+
+/* ------------------------------------------------------------------------- */
+/*
+ * A helper to get 64 bits from the sentmap; trivial for HOST_LONG_BITS=64
+ * messier for other sizes; pads with 0's at end if an unaligned end
+ * check2nd32: True if it's safe to read the upper 32bits in a 32bit long
+ * map
+ */
+static uint64_t get_64bits_sentmap(unsigned long *sentmap, bool check2nd32,
+ int64_t start)
+{
+ uint64_t result;
+#if HOST_LONG_BITS == 64
+ result = sentmap[start / 64];
+#elif HOST_LONG_BITS == 32
+ /*
+ * Irrespective of host endianness, sentmap[n] is for pages earlier
+ * than sentmap[n+1] so we can't just cast up
+ */
+ uint32_t sm0, sm1;
+ sm0 = sentmap[start / 32];
+ sm1 = check2nd32 ? sentmap[(start / 32) + 1] : 0;
+ result = sm0 | ((uint64_t)sm1) << 32;
+#else
+#error "Host long other than 64/32 not supported"
+#endif
+
+ return result;
+}
+
+/*
+ * Callback from ram_postcopy_each_ram_discard for each RAMBlock
+ * start,end: Indexes into the bitmap for the first and last bit
+ * representing the named block
+ */
+int postcopy_send_discard_bm_ram(MigrationState *ms, const char *name,
+ unsigned long start, unsigned long end)
+{
+ /* Keeps command under 256 bytes - but arbitrary */
+ const unsigned int max_entries_per_command = 12;
+ uint16_t cur_entry;
+ uint64_t buffer[2*max_entries_per_command];
+ unsigned int nsentwords = 0;
+ unsigned int nsentcmds = 0;
+
+ /*
+ * There is no guarantee that start, end are on convenient 64bit multiples
+ * (We always send 64bit chunks over the wire, irrespective of long size)
+ */
+ unsigned long first64, last64, cur64;
+ first64 = start / 64;
+ last64 = end / 64;
+
+ cur_entry = 0;
+ for (cur64 = first64; cur64 <= last64; cur64++) {
+ /* Deal with start/end not on alignment */
+ uint64_t mask;
+ mask = ~(uint64_t)0;
+
+ if ((cur64 == first64) && (start & 63)) {
+ /* e.g. (start & 63) = 3
+ * 1 << . -> 2^3
+ * . - 1 -> 2^3 - 1 i.e. mask 2..0
+ * ~. -> mask 63..3
+ */
+ mask &= ~((((uint64_t)1) << (start & 63)) - 1);
+ }
+
+ if ((cur64 == last64) && ((end & 64) != 63)) {
+ /* e.g. (end & 64) = 3
+ * . +1 -> 4
+ * 1 << . -> 2^4
+ * . -1 -> 2^4 - 1
+ * = mask set 3..0
+ */
+ mask &= (((uint64_t)1) << ((end & 64) + 1)) - 1;
+ }
+
+ uint64_t data = get_64bits_sentmap(ms->sentmap,
+ (end & 64) >= 32, cur64 * 64);
+ data &= mask;
+
+ if (data) {
+ cpu_to_be64w(buffer+2*cur_entry, (cur64-first64));
+ cpu_to_be64w(buffer+1+2*cur_entry, data);
+ cur_entry++;
+ nsentwords++;
+
+ if (cur_entry == max_entries_per_command) {
+ /* Full set, ship it! */
+ qemu_savevm_send_postcopy_ram_discard(ms->file, name,
+ cur_entry,
+ start & 63,
+ buffer);
+ nsentcmds++;
+ cur_entry = 0;
+ }
+ }
+ }
+
+ /* Anything unsent? */
+ if (cur_entry) {
+ qemu_savevm_send_postcopy_ram_discard(ms->file, name, cur_entry,
+ start & 63, buffer);
+ nsentcmds++;
+ }
+
+ /*fprintf(stderr, "postcopy_send_discard_bm_ram: '%s' mask words"
+ " sent=%d in %d commands.\n",
+ name, nsentwords, nsentcmds);*/
+
+ return 0;
+}
+
+/*
+ * Transmit the set of pages to be discarded after precopy to the target
+ * these are pages that have been sent previously but have been dirtied
+ * Hopefully this is pretty sparse
+ */
+int postcopy_send_discard_bitmap(MigrationState *ms)
+{
+ /*
+ * Update the sentmap to be sentmap&=dirty
+ * (arch_init gives us the full size as a return)
+ */
+ ram_mask_postcopy_bitmap(ms);
+
+ DPRINTF("Dumping merged sentmap");
+#ifdef DEBUG_POSTCOPY
+ ram_debug_dump_bitmap(ms->sentmap, false);
#endif
+ return ram_postcopy_each_ram_discard(ms);
+}
+
@@ -1238,12 +1238,9 @@ static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
* we know there must be at least 1 bit set due to the loop entry
* If there is no 0 firstzero will be 64
*/
- /* TODO - ram_discard_range gets added in a later patch
int ret = ram_discard_range(mis, ramid, source_target_page_bits,
startaddr + firstset - first_bit_offset,
startaddr + (firstzero - 1) - first_bit_offset);
- */
- ret = -1; /* TODO */
if (ret) {
return ret;
}