diff mbox

[v3,32/35] postcopy: pre+post optimization incoming side

Message ID 3c1f5c075198d8435be92f13d74165fd98709d4f.1351582535.git.yamahata@valinux.co.jp
State New
Headers show

Commit Message

Isaku Yamahata Oct. 30, 2012, 8:33 a.m. UTC
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 migration-postcopy.c |  207 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 204 insertions(+), 3 deletions(-)
diff mbox

Patch

diff --git a/migration-postcopy.c b/migration-postcopy.c
index 421fb39..9298cd4 100644
--- a/migration-postcopy.c
+++ b/migration-postcopy.c
@@ -274,6 +274,9 @@  static void postcopy_outgoing_free_req(struct qemu_umem_req *req)
 #define QEMU_VM_POSTCOPY_INIT           0
 #define QEMU_VM_POSTCOPY_SECTION_FULL   1
 
+/* options in QEMU_VM_POSTCOPY_INIT section */
+#define POSTCOPY_OPTION_PRECOPY         1ULL
+
 /***************************************************************************
  * outgoing part
  */
@@ -739,6 +742,7 @@  struct PostcopyIncomingUMemDaemon {
     int nr_target_pages_per_host_page;
     int target_to_host_page_shift;
     int version_id;     /* save/load format version id */
+    bool precopy_enabled;
 
     QemuThread thread;
     QLIST_HEAD(, UMemBlock) blocks;
@@ -784,6 +788,7 @@  static PostcopyIncomingState state = {
 
 static PostcopyIncomingUMemDaemon umemd = {
     .state = 0,
+    .precopy_enabled = false,
     .to_qemu_fd = -1,
     .to_qemu = NULL,
     .from_qemu_fd = -1,
@@ -797,6 +802,8 @@  static PostcopyIncomingUMemDaemon umemd = {
 
 static void *postcopy_incoming_umemd(void*);
 static void postcopy_incoming_qemu_handle_req(void *opaque);
+static UMemBlock *postcopy_incoming_umem_block_from_stream(
+    QEMUFile *f, int flags);
 
 /* protected by qemu_mutex_lock_ramlist() */
 void postcopy_incoming_ram_free(RAMBlock *ram_block)
@@ -875,6 +882,25 @@  int postcopy_incoming_ram_load(QEMUFile *f, void *opaque, int version_id)
     return -EINVAL;
 }
 
+static void*
+postcopy_incoming_shmem_from_stream_offset(QEMUFile *f, ram_addr_t offset,
+                                           int flags)
+{
+    UMemBlock *block = postcopy_incoming_umem_block_from_stream(f, flags);
+    if (block == NULL) {
+        DPRINTF("error block = NULL\n");
+        return NULL;
+    }
+    return block->umem->shmem + offset;
+}
+
+static int postcopy_incoming_ram_load_precopy(QEMUFile *f, void *opaque,
+                                              int version_id)
+{
+    return ram_load(f, opaque, version_id,
+                    &postcopy_incoming_shmem_from_stream_offset);
+}
+
 static void postcopy_incoming_umem_block_free(void)
 {
     UMemBlock *block;
@@ -982,6 +1008,12 @@  static int postcopy_incoming_loadvm_init(QEMUFile *f, uint32_t size)
         return -EINVAL;
     }
     options = qemu_get_be64(f);
+    if (options & POSTCOPY_OPTION_PRECOPY) {
+        options &= ~POSTCOPY_OPTION_PRECOPY;
+        umemd.precopy_enabled = true;
+    } else {
+        umemd.precopy_enabled = false;
+    }
     if (options) {
         fprintf(stderr, "unknown options 0x%"PRIx64, options);
         return -ENOSYS;
@@ -999,12 +1031,17 @@  static int postcopy_incoming_loadvm_init(QEMUFile *f, uint32_t size)
         return -ENOSYS;
     }
 
-    DPRINTF("detected POSTCOPY\n");
+    DPRINTF("detected POSTCOPY precpoy %d\n", umemd.precopy_enabled);
     error = postcopy_incoming_prepare();
     if (error) {
         return error;
     }
-    savevm_ram_handlers.load_state = postcopy_incoming_ram_load;
+    if (umemd.precopy_enabled) {
+        savevm_ram_handlers.load_state = postcopy_incoming_ram_load_precopy;
+    } else {
+        savevm_ram_handlers.load_state = postcopy_incoming_ram_load;
+    }
+
     incoming_postcopy = true;
     return 0;
 }
@@ -1515,6 +1552,169 @@  static int postcopy_incoming_umem_ram_load(void)
     return 0;
 }
 
+static int postcopy_incoming_umemd_read_dirty_bitmap(
+    QEMUFile *f, const char *idstr, uint8_t idlen,
+    uint64_t block_offset, uint64_t block_length, uint64_t bitmap_length)
+{
+    UMemBlock *block;
+    uint64_t bit_start = block_offset >> TARGET_PAGE_BITS;
+    uint64_t bit_end = (block_offset + block_length) >> TARGET_PAGE_BITS;
+    uint64_t bit_offset;
+    uint8_t *buffer;
+    uint64_t index;
+
+    if ((bitmap_length % sizeof(uint64_t)) != 0) {
+        return -EINVAL;
+    }
+    QLIST_FOREACH(block, &umemd.blocks, next) {
+        if (!strncmp(block->idstr, idstr, idlen)) {
+            break;
+        }
+    }
+    if (block == NULL) {
+        return -EINVAL;
+    }
+
+    DPRINTF("bitmap %s 0x%"PRIx64" 0x%"PRIx64" 0x%"PRIx64"\n",
+            block->idstr, block_offset, block_length, bitmap_length);
+    buffer = g_malloc(bitmap_length);
+    qemu_get_buffer(f, buffer, bitmap_length);
+
+    bit_offset = bit_start & ~63;
+    index = 0;
+    while (index < bitmap_length) {
+        uint64_t bitmap;
+        int i;
+        int j;
+        int bit;
+
+        bitmap = be64_to_cpup((uint64_t*)(buffer + index));
+        for (i = 0; i < 64; i++) {
+            bit = bit_offset + i;
+            if (bit < bit_start) {
+                continue;
+            }
+            if (bit >= bit_end) {
+                break;
+            }
+            if (!(bitmap & (1ULL << i))) {
+                set_bit(bit, umemd.phys_received);
+
+                /* this is racy, but write side just sends redundant request */
+                set_bit(bit, umemd.phys_requested);
+            }
+        }
+
+        umemd.page_cached->nr = 0;
+        if (TARGET_PAGE_SIZE >= umemd.host_page_size) {
+            for (i = 0; i < 64; i++) {
+                uint64_t pgoff;
+                bit = bit_offset + i;
+                if (bit < bit_start) {
+                    continue;
+                }
+                if (bit >= bit_end) {
+                    break;
+                }
+                if (!test_bit(bit, umemd.phys_received)) {
+                    continue;
+                }
+                pgoff = (bit - bit_start) << umemd.target_to_host_page_shift;
+                for (j = 0; j < umemd.nr_host_pages_per_target_page; j++) {
+                    umemd.page_cached->pgoffs[umemd.page_cached->nr] =
+                        pgoff + j;
+                    umemd.page_cached->nr++;
+                }
+            }
+        } else {
+            for (i = 0; i < 64; i += umemd.nr_target_pages_per_host_page) {
+                bool mark_cache = true;
+                bit = bit_offset + i;
+                if (bit < bit_start) {
+                    continue;
+                }
+                if (bit >= bit_end) {
+                    break;
+                }
+                if (!test_bit(bit, umemd.phys_received)) {
+                    continue;
+                }
+                for (j = 0; j < umemd.nr_target_pages_per_host_page; j++) {
+                    if (!test_bit(bit + j, umemd.phys_received)) {
+                        mark_cache = false;
+                        break;
+                    }
+                }
+                if (mark_cache) {
+                    umemd.page_cached->pgoffs[umemd.page_cached->nr] =
+                        (bit - bit_start) >>
+                        (umemd.host_page_shift - TARGET_PAGE_BITS);
+                    umemd.page_cached->nr++;
+                }
+            }
+        }
+
+        if (umemd.page_cached->nr > 0) {
+            umem_mark_page_cached(block->umem, umemd.page_cached);
+            postcopy_incoming_umem_page_fault(block, umemd.page_cached);
+        }
+
+        bit_offset += 64;
+        index += sizeof(bitmap);
+    }
+
+    g_free(buffer);
+    return 0;
+}
+
+static int postcopy_incoming_umemd_mig_read_init(void)
+{
+    QEMUFile *f = umemd.mig_read;
+#ifdef DEBUG_POSTCOPY
+    uint64_t start = qemu_get_clock_ns(rt_clock);
+    uint64_t end;
+#endif
+
+    if (!umemd.precopy_enabled) {
+        return 0;
+    }
+
+    for (;;) {
+        uint8_t idlen;
+        char idstr[256];
+        uint64_t block_offset;
+        uint64_t block_length;
+        uint64_t bitmap_length;
+        int ret;
+
+        idlen = qemu_get_byte(f);
+        qemu_get_buffer(f, (uint8_t*)idstr, idlen);
+        idstr[idlen] = 0;
+        block_offset = qemu_get_be64(f);
+        block_length = qemu_get_be64(f);
+        bitmap_length = qemu_get_be64(f);
+
+        if (idlen == 0 && block_offset == 0 && block_length == 0 &&
+            bitmap_length == 0) {
+            DPRINTF("bitmap done\n");
+            break;
+        }
+        ret = postcopy_incoming_umemd_read_dirty_bitmap(
+            f, idstr, idlen, block_offset, block_length, bitmap_length);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+    if (postcopy_incoming_umem_check_umem_done()) {
+        postcopy_incoming_umem_done();
+    }
+#ifdef DEBUG_POSTCOPY
+    end = qemu_get_clock_ns(rt_clock);
+    DPRINTF("bitmap %"PRIu64" nsec\n", end - start);
+#endif
+    return 0;
+}
+
 static int postcopy_incoming_umemd_mig_read_loop(void)
 {
     int error;
@@ -1704,7 +1904,8 @@  static void *postcopy_incoming_umemd(void* unused)
     qemu_thread_create(&umemd.mig_read_thread,
                        &postcopy_incoming_umemd_thread,
                        &(IncomingThread) {
-                           NULL, &postcopy_incoming_umemd_mig_read_loop,},
+                           &postcopy_incoming_umemd_mig_read_init,
+                           &postcopy_incoming_umemd_mig_read_loop,},
                        QEMU_THREAD_JOINABLE);
     qemu_thread_create(&umemd.mig_write_thread,
                        &postcopy_incoming_umemd_thread,