From patchwork Tue Oct 30 08:33:08 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Isaku Yamahata X-Patchwork-Id: 195436 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 8B5362C00A5 for ; Tue, 30 Oct 2012 21:56:13 +1100 (EST) Received: from localhost ([::1]:49590 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TT7IR-00079Z-BE for incoming@patchwork.ozlabs.org; Tue, 30 Oct 2012 04:35:39 -0400 Received: from eggs.gnu.org ([208.118.235.92]:60856) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TT7GN-0004l1-Ib for qemu-devel@nongnu.org; Tue, 30 Oct 2012 04:33:44 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1TT7GD-0000aw-KN for qemu-devel@nongnu.org; Tue, 30 Oct 2012 04:33:31 -0400 Received: from mail.valinux.co.jp ([210.128.90.3]:44733) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TT7GC-0000X8-U8 for qemu-devel@nongnu.org; Tue, 30 Oct 2012 04:33:21 -0400 Received: from ps.local.valinux.co.jp (vagw.valinux.co.jp [210.128.90.14]) by mail.valinux.co.jp (Postfix) with SMTP id A4D29181D1; Tue, 30 Oct 2012 17:33:14 +0900 (JST) Received: (nullmailer pid 29492 invoked by uid 1000); Tue, 30 Oct 2012 08:33:14 -0000 From: Isaku Yamahata To: qemu-devel@nongnu.org, kvm@vger.kernel.org Date: Tue, 30 Oct 2012 17:33:08 +0900 Message-Id: <3c1f5c075198d8435be92f13d74165fd98709d4f.1351582535.git.yamahata@valinux.co.jp> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: References: In-Reply-To: References: X-Virus-Scanned: clamav-milter 0.95.2 at va-mail.local.valinux.co.jp X-Virus-Status: Clean X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 3) X-Received-From: 210.128.90.3 Cc: benoit.hudzia@gmail.com, aarcange@redhat.com, aliguori@us.ibm.com, quintela@redhat.com, stefanha@gmail.com, t.hirofuchi@aist.go.jp, dlaor@redhat.com, satoshi.itoh@aist.go.jp, mdroth@linux.vnet.ibm.com, yoshikawa.takuya@oss.ntt.co.jp, owasserm@redhat.com, avi@redhat.com, pbonzini@redhat.com, chegu_vinod@hp.com Subject: [Qemu-devel] [PATCH v3 32/35] postcopy: pre+post optimization incoming side X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Signed-off-by: Isaku Yamahata --- migration-postcopy.c | 207 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 204 insertions(+), 3 deletions(-) diff --git a/migration-postcopy.c b/migration-postcopy.c index 421fb39..9298cd4 100644 --- a/migration-postcopy.c +++ b/migration-postcopy.c @@ -274,6 +274,9 @@ static void postcopy_outgoing_free_req(struct qemu_umem_req *req) #define QEMU_VM_POSTCOPY_INIT 0 #define QEMU_VM_POSTCOPY_SECTION_FULL 1 +/* options in QEMU_VM_POSTCOPY_INIT section */ +#define POSTCOPY_OPTION_PRECOPY 1ULL + /*************************************************************************** * outgoing part */ @@ -739,6 +742,7 @@ struct PostcopyIncomingUMemDaemon { int nr_target_pages_per_host_page; int target_to_host_page_shift; int version_id; /* save/load format version id */ + bool precopy_enabled; QemuThread thread; QLIST_HEAD(, UMemBlock) blocks; @@ -784,6 +788,7 @@ static PostcopyIncomingState state = { static PostcopyIncomingUMemDaemon umemd = { .state = 0, + .precopy_enabled = false, .to_qemu_fd = -1, .to_qemu = NULL, .from_qemu_fd = -1, @@ -797,6 +802,8 @@ static PostcopyIncomingUMemDaemon umemd = { static void *postcopy_incoming_umemd(void*); static void postcopy_incoming_qemu_handle_req(void *opaque); +static UMemBlock *postcopy_incoming_umem_block_from_stream( + QEMUFile *f, int flags); /* protected by qemu_mutex_lock_ramlist() */ void postcopy_incoming_ram_free(RAMBlock *ram_block) @@ -875,6 +882,25 @@ int postcopy_incoming_ram_load(QEMUFile *f, void *opaque, int version_id) return -EINVAL; } +static void* +postcopy_incoming_shmem_from_stream_offset(QEMUFile *f, ram_addr_t offset, + int flags) +{ + UMemBlock *block = postcopy_incoming_umem_block_from_stream(f, flags); + if (block == NULL) { + DPRINTF("error block = NULL\n"); + return NULL; + } + return block->umem->shmem + offset; +} + +static int postcopy_incoming_ram_load_precopy(QEMUFile *f, void *opaque, + int version_id) +{ + return ram_load(f, opaque, version_id, + &postcopy_incoming_shmem_from_stream_offset); +} + static void postcopy_incoming_umem_block_free(void) { UMemBlock *block; @@ -982,6 +1008,12 @@ static int postcopy_incoming_loadvm_init(QEMUFile *f, uint32_t size) return -EINVAL; } options = qemu_get_be64(f); + if (options & POSTCOPY_OPTION_PRECOPY) { + options &= ~POSTCOPY_OPTION_PRECOPY; + umemd.precopy_enabled = true; + } else { + umemd.precopy_enabled = false; + } if (options) { fprintf(stderr, "unknown options 0x%"PRIx64, options); return -ENOSYS; @@ -999,12 +1031,17 @@ static int postcopy_incoming_loadvm_init(QEMUFile *f, uint32_t size) return -ENOSYS; } - DPRINTF("detected POSTCOPY\n"); + DPRINTF("detected POSTCOPY precpoy %d\n", umemd.precopy_enabled); error = postcopy_incoming_prepare(); if (error) { return error; } - savevm_ram_handlers.load_state = postcopy_incoming_ram_load; + if (umemd.precopy_enabled) { + savevm_ram_handlers.load_state = postcopy_incoming_ram_load_precopy; + } else { + savevm_ram_handlers.load_state = postcopy_incoming_ram_load; + } + incoming_postcopy = true; return 0; } @@ -1515,6 +1552,169 @@ static int postcopy_incoming_umem_ram_load(void) return 0; } +static int postcopy_incoming_umemd_read_dirty_bitmap( + QEMUFile *f, const char *idstr, uint8_t idlen, + uint64_t block_offset, uint64_t block_length, uint64_t bitmap_length) +{ + UMemBlock *block; + uint64_t bit_start = block_offset >> TARGET_PAGE_BITS; + uint64_t bit_end = (block_offset + block_length) >> TARGET_PAGE_BITS; + uint64_t bit_offset; + uint8_t *buffer; + uint64_t index; + + if ((bitmap_length % sizeof(uint64_t)) != 0) { + return -EINVAL; + } + QLIST_FOREACH(block, &umemd.blocks, next) { + if (!strncmp(block->idstr, idstr, idlen)) { + break; + } + } + if (block == NULL) { + return -EINVAL; + } + + DPRINTF("bitmap %s 0x%"PRIx64" 0x%"PRIx64" 0x%"PRIx64"\n", + block->idstr, block_offset, block_length, bitmap_length); + buffer = g_malloc(bitmap_length); + qemu_get_buffer(f, buffer, bitmap_length); + + bit_offset = bit_start & ~63; + index = 0; + while (index < bitmap_length) { + uint64_t bitmap; + int i; + int j; + int bit; + + bitmap = be64_to_cpup((uint64_t*)(buffer + index)); + for (i = 0; i < 64; i++) { + bit = bit_offset + i; + if (bit < bit_start) { + continue; + } + if (bit >= bit_end) { + break; + } + if (!(bitmap & (1ULL << i))) { + set_bit(bit, umemd.phys_received); + + /* this is racy, but write side just sends redundant request */ + set_bit(bit, umemd.phys_requested); + } + } + + umemd.page_cached->nr = 0; + if (TARGET_PAGE_SIZE >= umemd.host_page_size) { + for (i = 0; i < 64; i++) { + uint64_t pgoff; + bit = bit_offset + i; + if (bit < bit_start) { + continue; + } + if (bit >= bit_end) { + break; + } + if (!test_bit(bit, umemd.phys_received)) { + continue; + } + pgoff = (bit - bit_start) << umemd.target_to_host_page_shift; + for (j = 0; j < umemd.nr_host_pages_per_target_page; j++) { + umemd.page_cached->pgoffs[umemd.page_cached->nr] = + pgoff + j; + umemd.page_cached->nr++; + } + } + } else { + for (i = 0; i < 64; i += umemd.nr_target_pages_per_host_page) { + bool mark_cache = true; + bit = bit_offset + i; + if (bit < bit_start) { + continue; + } + if (bit >= bit_end) { + break; + } + if (!test_bit(bit, umemd.phys_received)) { + continue; + } + for (j = 0; j < umemd.nr_target_pages_per_host_page; j++) { + if (!test_bit(bit + j, umemd.phys_received)) { + mark_cache = false; + break; + } + } + if (mark_cache) { + umemd.page_cached->pgoffs[umemd.page_cached->nr] = + (bit - bit_start) >> + (umemd.host_page_shift - TARGET_PAGE_BITS); + umemd.page_cached->nr++; + } + } + } + + if (umemd.page_cached->nr > 0) { + umem_mark_page_cached(block->umem, umemd.page_cached); + postcopy_incoming_umem_page_fault(block, umemd.page_cached); + } + + bit_offset += 64; + index += sizeof(bitmap); + } + + g_free(buffer); + return 0; +} + +static int postcopy_incoming_umemd_mig_read_init(void) +{ + QEMUFile *f = umemd.mig_read; +#ifdef DEBUG_POSTCOPY + uint64_t start = qemu_get_clock_ns(rt_clock); + uint64_t end; +#endif + + if (!umemd.precopy_enabled) { + return 0; + } + + for (;;) { + uint8_t idlen; + char idstr[256]; + uint64_t block_offset; + uint64_t block_length; + uint64_t bitmap_length; + int ret; + + idlen = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t*)idstr, idlen); + idstr[idlen] = 0; + block_offset = qemu_get_be64(f); + block_length = qemu_get_be64(f); + bitmap_length = qemu_get_be64(f); + + if (idlen == 0 && block_offset == 0 && block_length == 0 && + bitmap_length == 0) { + DPRINTF("bitmap done\n"); + break; + } + ret = postcopy_incoming_umemd_read_dirty_bitmap( + f, idstr, idlen, block_offset, block_length, bitmap_length); + if (ret < 0) { + return ret; + } + } + if (postcopy_incoming_umem_check_umem_done()) { + postcopy_incoming_umem_done(); + } +#ifdef DEBUG_POSTCOPY + end = qemu_get_clock_ns(rt_clock); + DPRINTF("bitmap %"PRIu64" nsec\n", end - start); +#endif + return 0; +} + static int postcopy_incoming_umemd_mig_read_loop(void) { int error; @@ -1704,7 +1904,8 @@ static void *postcopy_incoming_umemd(void* unused) qemu_thread_create(&umemd.mig_read_thread, &postcopy_incoming_umemd_thread, &(IncomingThread) { - NULL, &postcopy_incoming_umemd_mig_read_loop,}, + &postcopy_incoming_umemd_mig_read_init, + &postcopy_incoming_umemd_mig_read_loop,}, QEMU_THREAD_JOINABLE); qemu_thread_create(&umemd.mig_write_thread, &postcopy_incoming_umemd_thread,