[RFC,v1,01/11] tcg: move tb_find_fast outside the tb_lock critical section

Message ID	1458317932-1875-2-git-send-email-alex.bennee@linaro.org
State	New
Headers	show Return-Path: <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org> From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org> To: mttcg@listserver.greensocs.com, fred.konrad@greensocs.com, a.rigo@virtualopensystems.com, serge.fdrv@gmail.com, cota@braap.org Date: Fri, 18 Mar 2016 16:18:42 +0000 Message-Id: <1458317932-1875-2-git-send-email-alex.bennee@linaro.org> In-Reply-To: <1458317932-1875-1-git-send-email-alex.bennee@linaro.org> References: <1458317932-1875-1-git-send-email-alex.bennee@linaro.org> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Peter Crosthwaite <crosthwaite.peter@gmail.com>, mark.burton@greensocs.com, qemu-devel@nongnu.org, pbonzini@redhat.com, =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>, =?UTF-8?q?Andreas=20F=C3=A4rber?= <afaerber@suse.de>, Richard Henderson <rth@twiddle.net> Subject: [Qemu-devel] [RFC v1 01/11] tcg: move tb_find_fast outside the tb_lock critical section Precedence: list Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

diff --git a/cpu-exec.c b/cpu-exec.c index 07545aa..52f25de 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -225,8 +225,9 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, phys_page1 = phys_pc & TARGET_PAGE_MASK; h = tb_phys_hash_func(phys_pc); for (ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h]; - (tb = *ptb1) != NULL; + (tb = atomic_read(ptb1)) != NULL; ptb1 = &tb->phys_hash_next) { + smp_read_barrier_depends(); if (tb->pc != pc || tb->page_addr[0] != phys_page1 || tb->cs_base != cs_base || @@ -254,7 +255,18 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, *ptb1 = tb->phys_hash_next; tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h]; tcg_ctx.tb_ctx.tb_phys_hash[h] = tb; + } else { + return NULL; } + + /* If tb_flush was called since the last time we released the lock, + * forget about this TB. + */ + smp_rmb(); + if (atomic_read(&cpu->tb_invalidated_flag)) { + return NULL; + } + return tb; } @@ -265,36 +277,31 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, { TranslationBlock *tb; - tb = tb_find_physical(cpu, pc, cs_base, flags); - if (tb) { - goto found; - } - -#ifdef CONFIG_USER_ONLY - /* mmap_lock is needed by tb_gen_code, and mmap_lock must be - * taken outside tb_lock. Since we're momentarily dropping - * tb_lock, there's a chance that our desired tb has been - * translated. + /* First try to get the tb. If we don't find it we need to lock and + * compile it. */ - tb_unlock(); - mmap_lock(); - tb_lock(); tb = tb_find_physical(cpu, pc, cs_base, flags); - if (tb) { - mmap_unlock(); - goto found; - } -#endif - - /* if no translated code available, then translate it now */ - cpu->tb_invalidated_flag = 0; - tb = tb_gen_code(cpu, pc, cs_base, flags, 0); - + if (!tb) { #ifdef CONFIG_USER_ONLY - mmap_unlock(); + /* mmap_lock is needed by tb_gen_code, and mmap_lock must be + * taken outside tb_lock. tb_lock is released later in + * cpu_exec. + */ + mmap_lock(); + tb_lock(); + + /* Retry to get the TB in case a CPU just translate it to avoid having + * duplicated TB in the pool. + */ + tb = tb_find_physical(cpu, pc, cs_base, flags); #endif + if (!tb) { + /* if no translated code available, then translate it now */ + tb = tb_gen_code(cpu, pc, cs_base, flags, 0); + } + mmap_unlock(); + } -found: /* we add the TB in the virtual pc hash table */ cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; return tb; @@ -312,6 +319,8 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu) is executed. */ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; + /* Read tb_jmp_cache before tb->pc. */ + smp_read_barrier_depends(); if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags)) { tb = tb_find_slow(cpu, pc, cs_base, flags); @@ -489,15 +498,18 @@ int cpu_exec(CPUState *cpu) cpu->exception_index = EXCP_INTERRUPT; cpu_loop_exit(cpu); } - tb_lock(); tb = tb_find_fast(cpu); /* Note: we do it here to avoid a gcc bug on Mac OS X when doing it in tb_find_slow */ - if (cpu->tb_invalidated_flag) { + if (atomic_read(&cpu->tb_invalidated_flag)) { /* as some TB could have been invalidated because of a tb_flush while generating the code, we must recompute the hash index here */ next_tb = 0; + + /* Clear the flag, we've now observed the flush. */ + tb_lock_recursive(); + cpu->tb_invalidated_flag = 0; } if (qemu_loglevel_mask(CPU_LOG_EXEC)) { qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n", @@ -508,10 +520,14 @@ int cpu_exec(CPUState *cpu) jump. */ if (next_tb != 0 && tb->page_addr[1] == -1 && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + tb_lock_recursive(); tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK), next_tb & TB_EXIT_MASK, tb); } - tb_unlock(); + /* The lock may not be taken if we went through the + * fast lookup path and did not have to do any patching. + */ + tb_lock_reset(); if (likely(!cpu->exit_request)) { trace_exec_tb(tb, tb->pc); tc_ptr = tb->tc_ptr; diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 9538f9c..4132108 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -241,6 +241,8 @@ struct kvm_run; * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this * CPU and return to its top level loop. * @tb_invalidated_flag: Set to tell TCG that tb_flush has been called. + * It is only cleared while holding the tb_lock, so that no tb_flush can + * happen concurrently. * @singlestep_enabled: Flags for single-stepping. * @icount_extra: Instructions until next timer event. * @icount_decr: Number of cycles left, with interrupt flag in high bit. diff --git a/tcg/tcg.h b/tcg/tcg.h index b83f763..aa4e123 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -615,6 +615,7 @@ void tcg_pool_delete(TCGContext *s); void tb_lock(void); void tb_unlock(void); +bool tb_lock_recursive(void); void tb_lock_reset(void); static inline void *tcg_malloc(int size) diff --git a/translate-all.c b/translate-all.c index 8e1edd6..f68dcbc 100644 --- a/translate-all.c +++ b/translate-all.c @@ -143,6 +143,17 @@ void tb_unlock(void) #endif } +bool tb_lock_recursive(void) +{ +#ifdef CONFIG_USER_ONLY + if (have_tb_lock) { + return false; + } + tb_lock(); +#endif + return true; +} + void tb_lock_reset(void) { #ifdef CONFIG_USER_ONLY @@ -843,7 +854,8 @@ void tb_flush(CPUState *cpu) tcg_ctx.tb_ctx.nb_tbs = 0; CPU_FOREACH(cpu) { - cpu->tb_invalidated_flag = 1; + atomic_set(&cpu->tb_invalidated_flag, 1); + smp_wmb(); memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); } @@ -979,6 +991,9 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) pc = tb->pc; tb->pc = -1; + /* Pairs with smp_read_barrier_depends() in tb_find_fast. */ + smp_wmb(); + /* Then suppress this TB from the two jump lists. CPUs will not jump * anymore into this translation block. */ @@ -1478,7 +1493,13 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, /* add in the physical hash table */ h = tb_phys_hash_func(phys_pc); ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h]; + + /* Both write barriers pair with tb_find_physical's + * smp_read_barrier_depends. + */ + smp_wmb(); tb->phys_hash_next = *ptb; + smp_wmb(); *ptb = tb; /* add in the page list */

[RFC,v1,01/11] tcg: move tb_find_fast outside the tb_lock critical section

Commit Message

Comments

Patch