From patchwork Thu Sep 15 15:50:47 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Llu=C3=ADs_Vilanova?= X-Patchwork-Id: 670499 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3sZjgz3Gmjz9sf6 for ; Fri, 16 Sep 2016 01:56:15 +1000 (AEST) Received: from localhost ([::1]:35710 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bkZ1I-00084m-V8 for incoming@patchwork.ozlabs.org; Thu, 15 Sep 2016 11:56:13 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43486) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bkYw9-0003lQ-Lw for qemu-devel@nongnu.org; Thu, 15 Sep 2016 11:50:57 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1bkYw5-0006Yq-QR for qemu-devel@nongnu.org; Thu, 15 Sep 2016 11:50:53 -0400 Received: from roura.ac.upc.edu ([147.83.33.10]:33384 helo=roura.ac.upc.es) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bkYw5-0006Yl-8u for qemu-devel@nongnu.org; Thu, 15 Sep 2016 11:50:49 -0400 Received: from gw-3.ac.upc.es (gw-3.ac.upc.es [147.83.30.9]) by roura.ac.upc.es (8.13.8/8.13.8) with ESMTP id u8FFomYq002266; Thu, 15 Sep 2016 17:50:48 +0200 Received: from localhost (unknown [84.88.51.85]) by gw-3.ac.upc.es (Postfix) with ESMTPSA id 1314C75A; Thu, 15 Sep 2016 17:50:48 +0200 (CEST) From: =?utf-8?b?TGx1w61z?= Vilanova To: qemu-devel@nongnu.org Date: Thu, 15 Sep 2016 17:50:47 +0200 Message-Id: <147395464777.2399.12669887559432574397.stgit@fimbulvetr.bsc.es> X-Mailer: git-send-email 2.9.3 In-Reply-To: <147395463702.2399.17485798724867278064.stgit@fimbulvetr.bsc.es> References: <147395463702.2399.17485798724867278064.stgit@fimbulvetr.bsc.es> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 X-MIME-Autoconverted: from 8bit to quoted-printable by roura.ac.upc.es id u8FFomYq002266 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6.x X-Received-From: 147.83.33.10 Subject: [Qemu-devel] [PATCH v2 2/5] exec: [tcg] Use multiple physical TB caches X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Eduardo Habkost , Peter Crosthwaite , Stefan Hajnoczi , Paolo Bonzini , Richard Henderson Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" The physical TB cache is split into 2^E caches, where E is the number of events with the "vcpu" and without the "disable" properties. The virtual TB cache on each vCPU uses a (potentially) different physical TB cache. This is later exploited to support different tracing event states on a per-vCPU basis. Signed-off-by: LluĂ­s Vilanova --- cpu-exec.c | 5 ++++ include/exec/exec-all.h | 6 +++++ include/exec/tb-context.h | 2 +- include/qom/cpu.h | 4 +++- qom/cpu.c | 1 + translate-all.c | 51 +++++++++++++++++++++++++++++++++++++-------- translate-all.h | 17 +++++++++++++++ translate-all.inc.h | 13 +++++++++++ 8 files changed, 87 insertions(+), 12 deletions(-) create mode 100644 translate-all.inc.h diff --git a/cpu-exec.c b/cpu-exec.c index 5d9710a..7b2d8c6 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -33,6 +33,7 @@ #include "hw/i386/apic.h" #endif #include "sysemu/replay.h" +#include "translate-all.h" /* -icount align implementation. */ @@ -267,6 +268,7 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, tb_page_addr_t phys_pc; struct tb_desc desc; uint32_t h; + struct qht *qht; desc.env = (CPUArchState *)cpu->env_ptr; desc.cs_base = cs_base; @@ -275,7 +277,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, phys_pc = get_page_addr_code(desc.env, pc); desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; h = tb_hash_func(phys_pc, pc, flags); - return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h); + qht = tb_caches_get(&tcg_ctx.tb_ctx, cpu->tb_cache_idx); + return qht_lookup(qht, tb_cmp, &desc, h); } static TranslationBlock *tb_find_slow(CPUState *cpu, diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index e2124dc..4ae04f6 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -211,6 +211,10 @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, ...) #define USE_DIRECT_JUMP #endif +/** + * TranslationBlock: + * @tb_cache_idx: Index of physical TB cache where this TB has been allocated. + */ struct TranslationBlock { target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */ target_ulong cs_base; /* CS base for this block */ @@ -262,6 +266,8 @@ struct TranslationBlock { */ uintptr_t jmp_list_next[2]; uintptr_t jmp_list_first; + + DECLARE_BITMAP(tb_cache_idx, TRACE_VCPU_EVENT_COUNT); }; void tb_free(TranslationBlock *tb); diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index dce95d9..7728904 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -32,7 +32,7 @@ typedef struct TBContext TBContext; struct TBContext { TranslationBlock *tbs; - struct qht htable; + struct qht *htables; int nb_tbs; /* any access to the tbs or the page table must use this lock */ QemuMutex tb_lock; diff --git a/include/qom/cpu.h b/include/qom/cpu.h index ce0c406..d870810 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -282,6 +282,7 @@ struct qemu_work_item { * @kvm_fd: vCPU file descriptor for KVM. * @work_mutex: Lock to prevent multiple access to queued_work_*. * @queued_work_first: First asynchronous work pending. + * @tb_cache_idx: Index of current TB cache. * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask). * * State of one CPU core or thread. @@ -350,7 +351,8 @@ struct CPUState { struct KVMState *kvm_state; struct kvm_run *kvm_run; - /* Used for events with 'vcpu' and *without* the 'disabled' properties */ + /* Used for events with 'vcpu' and *without* the 'disable' properties */ + DECLARE_BITMAP(tb_cache_idx, TRACE_VCPU_EVENT_COUNT); DECLARE_BITMAP(trace_dstate, TRACE_VCPU_EVENT_COUNT); /* TODO Move common fields from CPUArchState here. */ diff --git a/qom/cpu.c b/qom/cpu.c index 2553247..2225103 100644 --- a/qom/cpu.c +++ b/qom/cpu.c @@ -345,6 +345,7 @@ static void cpu_common_initfn(Object *obj) qemu_mutex_init(&cpu->work_mutex); QTAILQ_INIT(&cpu->breakpoints); QTAILQ_INIT(&cpu->watchpoints); + bitmap_zero(cpu->tb_cache_idx, TRACE_VCPU_EVENT_COUNT); bitmap_zero(cpu->trace_dstate, TRACE_VCPU_EVENT_COUNT); } diff --git a/translate-all.c b/translate-all.c index ebd9fa0..c864eee 100644 --- a/translate-all.c +++ b/translate-all.c @@ -733,11 +733,22 @@ static inline void code_gen_alloc(size_t tb_size) qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); } +/* + * Ensure bitmaps can be used as indexes. + */ +void *__error__too_many_vcpu_events[ + (TRACE_VCPU_EVENT_COUNT + 1) <= BITS_PER_LONG ? 0 : -1]; + static void tb_htable_init(void) { + int cache; unsigned int mode = QHT_MODE_AUTO_RESIZE; - qht_init(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE, mode); + tcg_ctx.tb_ctx.htables = g_malloc( + sizeof(tcg_ctx.tb_ctx.htables[0]) * tb_caches_count()); + for (cache = 0; cache < tb_caches_count(); cache++) { + qht_init(&tcg_ctx.tb_ctx.htables[cache], CODE_GEN_HTABLE_SIZE, mode); + } } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -834,6 +845,8 @@ static void page_flush_tb(void) /* XXX: tb_flush is currently not thread safe */ void tb_flush(CPUState *cpu) { + int i; + if (!tcg_enabled()) { return; } @@ -854,7 +867,9 @@ void tb_flush(CPUState *cpu) tb_flush_jmp_cache_all(cpu); } - qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE); + for (i = 0; i < tb_caches_count(); i++) { + qht_reset_size(&tcg_ctx.tb_ctx.htables[i], CODE_GEN_HTABLE_SIZE); + } page_flush_tb(); tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; @@ -879,8 +894,12 @@ do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp) static void tb_invalidate_check(target_ulong address) { + int i; + address &= TARGET_PAGE_MASK; - qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_invalidate_check, &address); + for (i = 0; i < tb_caches_count(); i++) { + qht_iter(&tcg_ctx.tb_ctx.htables[i], do_tb_invalidate_check, &address); + } } static void @@ -900,7 +919,10 @@ do_tb_page_check(struct qht *ht, void *p, uint32_t hash, void *userp) /* verify that all the pages have correct rights for code */ static void tb_page_check(void) { - qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_page_check, NULL); + int i; + for (i = 0; i < tb_caches_count(); i++) { + qht_iter(&tcg_ctx.tb_ctx.htables[i], do_tb_page_check, NULL); + } } #endif @@ -987,12 +1009,14 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) CPUState *cpu; PageDesc *p; uint32_t h; + struct qht *qht; tb_page_addr_t phys_pc; /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); h = tb_hash_func(phys_pc, tb->pc, tb->flags); - qht_remove(&tcg_ctx.tb_ctx.htable, tb, h); + qht = tb_caches_get(&tcg_ctx.tb_ctx, tb->tb_cache_idx); + qht_remove(qht, tb, h); /* remove the TB from the page list */ if (tb->page_addr[0] != page_addr) { @@ -1122,10 +1146,12 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2) { uint32_t h; + struct qht *qht; /* add in the hash table */ h = tb_hash_func(phys_pc, tb->pc, tb->flags); - qht_insert(&tcg_ctx.tb_ctx.htable, tb, h); + qht = tb_caches_get(&tcg_ctx.tb_ctx, tb->tb_cache_idx); + qht_insert(qht, tb, h); /* add in the page list */ tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK); @@ -1175,6 +1201,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; + bitmap_copy(tb->tb_cache_idx, ENV_GET_CPU(env)->tb_cache_idx, + TRACE_VCPU_EVENT_COUNT); #ifdef CONFIG_PROFILER tcg_ctx.tb_count1++; /* includes aborted translations because of @@ -1636,6 +1664,8 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) pc = tb->pc; cs_base = tb->cs_base; flags = tb->flags; + /* XXX: It is OK to invalidate only this TB, as this is the one triggering + * the memory access */ tb_phys_invalidate(tb, -1); if (tb->cflags & CF_NOCACHE) { if (tb->orig_tb) { @@ -1715,6 +1745,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) int direct_jmp_count, direct_jmp2_count, cross_page; TranslationBlock *tb; struct qht_stats hst; + int cache; target_code_size = 0; max_target_code_size = 0; @@ -1766,9 +1797,11 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) / tcg_ctx.tb_ctx.nb_tbs : 0); - qht_statistics_init(&tcg_ctx.tb_ctx.htable, &hst); - print_qht_statistics(f, cpu_fprintf, hst); - qht_statistics_destroy(&hst); + for (cache = 0; cache < tb_caches_count(); cache++) { + qht_statistics_init(&tcg_ctx.tb_ctx.htables[cache], &hst); + print_qht_statistics(f, cpu_fprintf, hst); + qht_statistics_destroy(&hst); + } cpu_fprintf(f, "\nStatistics:\n"); cpu_fprintf(f, "TB flush count %d\n", tcg_ctx.tb_ctx.tb_flush_count); diff --git a/translate-all.h b/translate-all.h index ba8e4d6..d39bf32 100644 --- a/translate-all.h +++ b/translate-all.h @@ -20,7 +20,21 @@ #define TRANSLATE_ALL_H #include "exec/exec-all.h" +#include "qemu/typedefs.h" +/** + * tb_caches_count: + * + * Number of TB caches. + */ +static size_t tb_caches_count(void); + +/** + * tb_caches_get: + * + * Get the TB cache for the given bitmap index. + */ +static struct qht *tb_caches_get(TBContext *tb_ctx, unsigned long *bitmap); /* translate-all.c */ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len); @@ -33,4 +47,7 @@ void tb_check_watchpoint(CPUState *cpu); int page_unprotect(target_ulong address, uintptr_t pc); #endif + +#include "translate-all.inc.h" + #endif /* TRANSLATE_ALL_H */ diff --git a/translate-all.inc.h b/translate-all.inc.h new file mode 100644 index 0000000..c60a48e --- /dev/null +++ b/translate-all.inc.h @@ -0,0 +1,13 @@ +/* Inline implementations for translate-all.h */ + +static inline size_t tb_caches_count(void) +{ + return 1ULL << TRACE_VCPU_EVENT_COUNT; +} + +static inline struct qht *tb_caches_get(TBContext *tb_ctx, + unsigned long *bitmap) +{ + unsigned long idx = *bitmap; + return &tb_ctx->htables[idx]; +}