@@ -906,6 +906,7 @@ int main(int argc, char **argv)
#endif
}
tcg_exec_init(0);
+ tcg_gen_buffer_init();
cpu_exec_init_all();
/* NOTE: we need to init the CPU at this stage to get
qemu_host_page_size */
@@ -746,6 +746,8 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *env = arg;
+ tcg_gen_buffer_init();
+
qemu_tcg_init_cpu_signals();
qemu_thread_get_self(env->thread);
@@ -851,6 +851,7 @@ int main(int argc, char **argv)
#endif
}
tcg_exec_init(0);
+ tcg_gen_buffer_init();
cpu_exec_init_all();
/* NOTE: we need to init the CPU at this stage to get
qemu_host_page_size */
@@ -79,10 +79,10 @@
#define SMC_BITMAP_USE_THRESHOLD 10
-static TranslationBlock *tbs;
+static DEFINE_TLS(TranslationBlock*, tbs);
static int code_gen_max_blocks;
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
-static int nb_tbs;
+static DEFINE_TLS(int, nb_tbs);
/* any access to the tbs or the page table must use this lock */
spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
@@ -103,11 +103,12 @@ spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
#endif
uint8_t code_gen_prologue[1024] code_gen_section;
-static uint8_t *code_gen_buffer;
+static bool code_gen_enabled;
+static DEFINE_TLS(uint8_t*, code_gen_buffer);
static unsigned long code_gen_buffer_size;
/* threshold to flush the translated code buffer */
static unsigned long code_gen_buffer_max_size;
-static uint8_t *code_gen_ptr;
+static DEFINE_TLS(uint8_t*, code_gen_ptr);
#if !defined(CONFIG_USER_ONLY)
int phys_ram_fd;
@@ -469,18 +470,17 @@ static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
#endif
#ifdef USE_STATIC_CODE_GEN_BUFFER
-static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
- __attribute__((aligned (CODE_GEN_ALIGN)));
+static DEFINE_TLS(uint8_t [DEFAULT_CODE_GEN_BUFFER_SIZE],
+ static_code_gen_buffer) __attribute__((aligned(CODE_GEN_ALIGN)));
#endif
-static void code_gen_alloc(unsigned long tb_size)
+static void code_gen_alloc(void)
{
#ifdef USE_STATIC_CODE_GEN_BUFFER
- code_gen_buffer = static_code_gen_buffer;
+ tls_var(code_gen_buffer) = tls_var(static_code_gen_buffer);
code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
- map_exec(code_gen_buffer, code_gen_buffer_size);
+ map_exec(tls_var(code_gen_buffer), code_gen_buffer_size);
#else
- code_gen_buffer_size = tb_size;
if (code_gen_buffer_size == 0) {
#if defined(CONFIG_USER_ONLY)
code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
@@ -522,10 +522,10 @@ static void code_gen_alloc(unsigned long tb_size)
}
start = (void *)0x90000000UL;
#endif
- code_gen_buffer = mmap(start, code_gen_buffer_size,
+ tls_var(code_gen_buffer) = mmap(start, code_gen_buffer_size,
PROT_WRITE | PROT_READ | PROT_EXEC,
flags, -1, 0);
- if (code_gen_buffer == MAP_FAILED) {
+ if (tls_var(code_gen_buffer) == MAP_FAILED) {
fprintf(stderr, "Could not allocate dynamic translator buffer\n");
exit(1);
}
@@ -553,24 +553,31 @@ static void code_gen_alloc(unsigned long tb_size)
code_gen_buffer_size = (512 * 1024 * 1024);
}
#endif
- code_gen_buffer = mmap(addr, code_gen_buffer_size,
+ tls_var(code_gen_buffer) = mmap(addr, code_gen_buffer_size,
PROT_WRITE | PROT_READ | PROT_EXEC,
flags, -1, 0);
- if (code_gen_buffer == MAP_FAILED) {
+ if (tls_var(code_gen_buffer) == MAP_FAILED) {
fprintf(stderr, "Could not allocate dynamic translator buffer\n");
exit(1);
}
}
#else
- code_gen_buffer = g_malloc(code_gen_buffer_size);
- map_exec(code_gen_buffer, code_gen_buffer_size);
+ tls_var(code_gen_buffer) = g_malloc(code_gen_buffer_size);
+ map_exec(tls_var(code_gen_buffer), code_gen_buffer_size);
#endif
#endif /* !USE_STATIC_CODE_GEN_BUFFER */
map_exec(code_gen_prologue, sizeof(code_gen_prologue));
code_gen_buffer_max_size = code_gen_buffer_size -
(TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
- tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
+ tls_var(tbs) = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
+ code_gen_enabled = 1;
+}
+
+void tcg_gen_buffer_init(void)
+{
+ code_gen_alloc();
+ tls_var(code_gen_ptr) = tls_var(code_gen_buffer);
}
/* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -579,8 +586,7 @@ static void code_gen_alloc(unsigned long tb_size)
void tcg_exec_init(unsigned long tb_size)
{
cpu_gen_init();
- code_gen_alloc(tb_size);
- code_gen_ptr = code_gen_buffer;
+ code_gen_buffer_size = tb_size;
page_init();
#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
/* There's no guest base to take into account, so go ahead and
@@ -591,7 +597,7 @@ void tcg_exec_init(unsigned long tb_size)
bool tcg_enabled(void)
{
- return code_gen_buffer != NULL;
+ return code_gen_enabled;
}
void cpu_exec_init_all(void)
@@ -682,10 +688,13 @@ static TranslationBlock *tb_alloc(target_ulong pc)
{
TranslationBlock *tb;
- if (nb_tbs >= code_gen_max_blocks ||
- (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
+ if (tls_var(nb_tbs) >= code_gen_max_blocks ||
+ (tls_var(code_gen_ptr) - tls_var(code_gen_buffer)) >=
+ code_gen_buffer_max_size) {
return NULL;
- tb = &tbs[nb_tbs++];
+ }
+
+ tb = &tls_var(tbs)[tls_var(nb_tbs)++];
tb->pc = pc;
tb->cflags = 0;
return tb;
@@ -696,9 +705,9 @@ void tb_free(TranslationBlock *tb)
/* In practice this is mostly used for single use temporary TB
Ignore the hard cases and just back up if this TB happens to
be the last one generated. */
- if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
- code_gen_ptr = tb->tc_ptr;
- nb_tbs--;
+ if (tls_var(nb_tbs) > 0 && tb == &tls_var(tbs)[tls_var(nb_tbs) - 1]) {
+ tls_var(code_gen_ptr) = tb->tc_ptr;
+ tls_var(nb_tbs)--;
}
}
@@ -749,14 +758,17 @@ void tb_flush(CPUState *env1)
CPUState *env;
#if defined(DEBUG_FLUSH)
printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
- (unsigned long)(code_gen_ptr - code_gen_buffer),
- nb_tbs, nb_tbs > 0 ?
- ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
+ (unsigned long)(tls_var(code_gen_ptr) - tls_var(code_gen_buffer)),
+ tls_var(nb_tbs), tls_var(nb_tbs) > 0 ?
+ ((unsigned long)(tls_var(code_gen_ptr) - tls_var(code_gen_buffer))) /
+ tls_var(nb_tbs) : 0);
#endif
- if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
+ if ((unsigned long)(tls_var(code_gen_ptr) - tls_var(code_gen_buffer)) >
+ code_gen_buffer_size) {
cpu_abort(env1, "Internal error: code buffer overflow\n");
+ }
- nb_tbs = 0;
+ tls_var(nb_tbs) = 0;
for(env = first_cpu; env != NULL; env = env->next_cpu) {
memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
@@ -765,7 +777,7 @@ void tb_flush(CPUState *env1)
memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
page_flush_tb();
- code_gen_ptr = code_gen_buffer;
+ tls_var(code_gen_ptr) = tls_var(code_gen_buffer);
/* XXX: flush processor icache at this point if cache flush is
expensive */
tb_flush_count++;
@@ -1008,13 +1020,14 @@ TranslationBlock *tb_gen_code(CPUState *env,
/* Don't forget to invalidate previous TB info. */
tb_invalidated_flag = 1;
}
- tc_ptr = code_gen_ptr;
+ tc_ptr = tls_var(code_gen_ptr);
tb->tc_ptr = tc_ptr;
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
cpu_gen_code(env, tb, &code_gen_size);
- code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+ tls_var(code_gen_ptr) = (void *)(((unsigned long)tls_var(code_gen_ptr) +
+ code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
/* check next page if needed */
virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
@@ -1330,17 +1343,19 @@ TranslationBlock *tb_find_pc(unsigned long tc_ptr)
unsigned long v;
TranslationBlock *tb;
- if (nb_tbs <= 0)
+ if (tls_var(nb_tbs) <= 0) {
return NULL;
- if (tc_ptr < (unsigned long)code_gen_buffer ||
- tc_ptr >= (unsigned long)code_gen_ptr)
+ }
+ if (tc_ptr < (unsigned long)tls_var(code_gen_buffer) ||
+ tc_ptr >= (unsigned long)tls_var(code_gen_ptr)) {
return NULL;
+ }
/* binary search (cf Knuth) */
m_min = 0;
- m_max = nb_tbs - 1;
+ m_max = tls_var(nb_tbs) - 1;
while (m_min <= m_max) {
m = (m_min + m_max) >> 1;
- tb = &tbs[m];
+ tb = &tls_var(tbs)[m];
v = (unsigned long)tb->tc_ptr;
if (v == tc_ptr)
return tb;
@@ -1350,7 +1365,7 @@ TranslationBlock *tb_find_pc(unsigned long tc_ptr)
m_min = m + 1;
}
}
- return &tbs[m_max];
+ return &tls_var(tbs)[m_max];
}
static void tb_reset_jump_recursive(TranslationBlock *tb);
@@ -4332,8 +4347,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
cross_page = 0;
direct_jmp_count = 0;
direct_jmp2_count = 0;
- for(i = 0; i < nb_tbs; i++) {
- tb = &tbs[i];
+ for(i = 0; i < tls_var(nb_tbs); i++) {
+ tb = &tls_var(tbs)[i];
target_code_size += tb->size;
if (tb->size > max_target_code_size)
max_target_code_size = tb->size;
@@ -4349,23 +4364,29 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
/* XXX: avoid using doubles ? */
cpu_fprintf(f, "Translation buffer state:\n");
cpu_fprintf(f, "gen code size %td/%ld\n",
- code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
+ tls_var(code_gen_ptr) - tls_var(code_gen_buffer),
+ code_gen_buffer_max_size);
cpu_fprintf(f, "TB count %d/%d\n",
- nb_tbs, code_gen_max_blocks);
+ tls_var(nb_tbs), code_gen_max_blocks);
cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
- nb_tbs ? target_code_size / nb_tbs : 0,
+ tls_var(nb_tbs) ? target_code_size / tls_var(nb_tbs) : 0,
max_target_code_size);
cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
- nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
- target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
+ tls_var(nb_tbs) ?
+ (tls_var(code_gen_ptr) - tls_var(code_gen_buffer)) /
+ tls_var(nb_tbs) : 0,
+ target_code_size ? (double) (tls_var(code_gen_ptr) -
+ tls_var(code_gen_buffer)) / target_code_size : 0);
cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
cross_page,
- nb_tbs ? (cross_page * 100) / nb_tbs : 0);
+ tls_var(nb_tbs) ? (cross_page * 100) / tls_var(nb_tbs) : 0);
cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
direct_jmp_count,
- nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
+ tls_var(nb_tbs) ?
+ (direct_jmp_count * 100) / tls_var(nb_tbs) : 0,
direct_jmp2_count,
- nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
+ tls_var(nb_tbs) ?
+ (direct_jmp2_count * 100) / tls_var(nb_tbs) : 0);
cpu_fprintf(f, "\nStatistics:\n");
cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
@@ -3364,6 +3364,7 @@ int main(int argc, char **argv, char **envp)
#endif
}
tcg_exec_init(0);
+ tcg_gen_buffer_init();
cpu_exec_init_all();
/* NOTE: we need to init the CPU at this stage to get
qemu_host_page_size */
@@ -258,6 +258,7 @@ typedef enum LostTickPolicy {
LOST_TICK_MAX
} LostTickPolicy;
+void tcg_gen_buffer_init(void);
void tcg_exec_init(unsigned long tb_size);
bool tcg_enabled(void);
This commit converts code_gen_buffer, code_gen_ptr, tbs, nb_tbs to TLS. We need this if we want TCG to become multithreaded. Initialization of code_gen_buffer and code_gen_ptr is moved to new tcg_gen_buffer_init() function. This is done because we do not need to allocate and initialize TCG buffers for IO thread. Initialization is now done in qemu_tcg_cpu_thread_fn() by each HW thread individually. Also tcg_enabled() returns a variable instead of (code_gen_buffer != NULL) since if called from IO thread, this will always return FALSE. Also some code format changes. Signed-off-by: Evgeny Voevodin <e.voevodin@samsung.com> --- bsd-user/main.c | 1 + cpus.c | 2 + darwin-user/main.c | 1 + exec.c | 121 ++++++++++++++++++++++++++++++--------------------- linux-user/main.c | 1 + qemu-common.h | 1 + 6 files changed, 77 insertions(+), 50 deletions(-)