diff options
Diffstat (limited to 'accel')
| -rw-r--r-- | accel/tcg/cpu-exec.c | 73 | ||||
| -rw-r--r-- | accel/tcg/cputlb.c | 17 | ||||
| -rw-r--r-- | accel/tcg/tcg-runtime.c | 34 | ||||
| -rw-r--r-- | accel/tcg/tcg-runtime.h | 2 | ||||
| -rw-r--r-- | accel/tcg/translate-all.c | 96 |
5 files changed, 118 insertions, 104 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index ff6866624a..363dfa208a 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -28,6 +28,7 @@ #include "exec/address-spaces.h" #include "qemu/rcu.h" #include "exec/tb-hash.h" +#include "exec/tb-lookup.h" #include "exec/log.h" #include "qemu/main-loop.h" #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) @@ -142,11 +143,11 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) uintptr_t ret; TranslationBlock *last_tb; int tb_exit; - uint8_t *tb_ptr = itb->tc_ptr; + uint8_t *tb_ptr = itb->tc.ptr; qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc, "Trace %p [%d: " TARGET_FMT_lx "] %s\n", - itb->tc_ptr, cpu->cpu_index, itb->pc, + itb->tc.ptr, cpu->cpu_index, itb->pc, lookup_symbol(itb->pc)); #if defined(DEBUG_DISAS) @@ -178,7 +179,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc, "Stopped execution of TB chain before %p [" TARGET_FMT_lx "] %s\n", - last_tb->tc_ptr, last_tb->pc, + last_tb->tc.ptr, last_tb->pc, lookup_symbol(last_tb->pc)); if (cc->synchronize_from_tb) { cc->synchronize_from_tb(cpu, last_tb); @@ -293,7 +294,7 @@ static bool tb_cmp(const void *p, const void *d) tb->cs_base == desc->cs_base && tb->flags == desc->flags && tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && - !atomic_read(&tb->invalid)) { + !(atomic_read(&tb->cflags) & CF_INVALID)) { /* check next page if needed */ if (tb->page_addr[1] == -1) { return true; @@ -333,7 +334,7 @@ void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) { if (TCG_TARGET_HAS_direct_jump) { uintptr_t offset = tb->jmp_target_arg[n]; - uintptr_t tc_ptr = (uintptr_t)tb->tc_ptr; + uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr; tb_target_set_jmp_target(tc_ptr, tc_ptr + offset, addr); } else { tb->jmp_target_arg[n] = addr; @@ -353,11 +354,11 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, "Linking TBs %p [" TARGET_FMT_lx "] index %d -> %p [" TARGET_FMT_lx "]\n", - tb->tc_ptr, tb->pc, n, - tb_next->tc_ptr, tb_next->pc); + tb->tc.ptr, tb->pc, n, + tb_next->tc.ptr, tb_next->pc); /* patch the native jump address */ - tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc_ptr); + tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr); /* add in TB jmp circular list */ tb->jmp_list_next[n] = tb_next->jmp_list_first; @@ -368,43 +369,31 @@ static inline TranslationBlock *tb_find(CPUState *cpu, TranslationBlock *last_tb, int tb_exit) { - CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; - bool have_tb_lock = false; - - /* we record a subset of the CPU state. It will - always be the same before a given translated block - is executed. */ - cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); - if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || - tb->flags != flags || - tb->trace_vcpu_dstate != *cpu->trace_dstate)) { - tb = tb_htable_lookup(cpu, pc, cs_base, flags); - if (!tb) { + bool acquired_tb_lock = false; - /* mmap_lock is needed by tb_gen_code, and mmap_lock must be - * taken outside tb_lock. As system emulation is currently - * single threaded the locks are NOPs. - */ - mmap_lock(); - tb_lock(); - have_tb_lock = true; - - /* There's a chance that our desired tb has been translated while - * taking the locks so we check again inside the lock. - */ - tb = tb_htable_lookup(cpu, pc, cs_base, flags); - if (!tb) { - /* if no translated code available, then translate it now */ - tb = tb_gen_code(cpu, pc, cs_base, flags, 0); - } + tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags); + if (tb == NULL) { + /* mmap_lock is needed by tb_gen_code, and mmap_lock must be + * taken outside tb_lock. As system emulation is currently + * single threaded the locks are NOPs. + */ + mmap_lock(); + tb_lock(); + acquired_tb_lock = true; - mmap_unlock(); + /* There's a chance that our desired tb has been translated while + * taking the locks so we check again inside the lock. + */ + tb = tb_htable_lookup(cpu, pc, cs_base, flags); + if (likely(tb == NULL)) { + /* if no translated code available, then translate it now */ + tb = tb_gen_code(cpu, pc, cs_base, flags, 0); } + mmap_unlock(); /* We add the TB in the virtual pc hash table for the fast lookup */ atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); } @@ -419,15 +408,15 @@ static inline TranslationBlock *tb_find(CPUState *cpu, #endif /* See if we can patch the calling TB. */ if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { - if (!have_tb_lock) { + if (!acquired_tb_lock) { tb_lock(); - have_tb_lock = true; + acquired_tb_lock = true; } - if (!tb->invalid) { + if (!(tb->cflags & CF_INVALID)) { tb_add_jump(last_tb, tb_exit, tb); } } - if (have_tb_lock) { + if (acquired_tb_lock) { tb_unlock(); } return tb; diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index bcbcc4db6c..5b1ef1442c 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -92,8 +92,18 @@ static void flush_all_helper(CPUState *src, run_on_cpu_func fn, } } -/* statistics */ -int tlb_flush_count; +size_t tlb_flush_count(void) +{ + CPUState *cpu; + size_t count = 0; + + CPU_FOREACH(cpu) { + CPUArchState *env = cpu->env_ptr; + + count += atomic_read(&env->tlb_flush_count); + } + return count; +} /* This is OK because CPU architectures generally permit an * implementation to drop entries from the TLB at any time, so @@ -112,7 +122,8 @@ static void tlb_flush_nocheck(CPUState *cpu) } assert_cpu_is_self(cpu); - tlb_debug("(count: %d)\n", tlb_flush_count++); + atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1); + tlb_debug("(count: %zu)\n", tlb_flush_count()); tb_lock(); diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index aafb171294..54d89100d9 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -27,7 +27,7 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" -#include "exec/tb-hash.h" +#include "exec/tb-lookup.h" #include "disas/disas.h" #include "exec/log.h" @@ -144,34 +144,22 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) return ctpop64(arg); } -void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) +void *HELPER(lookup_tb_ptr)(CPUArchState *env) { CPUState *cpu = ENV_GET_CPU(env); TranslationBlock *tb; target_ulong cs_base, pc; - uint32_t flags, addr_hash; - - addr_hash = tb_jmp_cache_hash_func(addr); - tb = atomic_rcu_read(&cpu->tb_jmp_cache[addr_hash]); - cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - - if (unlikely(!(tb - && tb->pc == addr - && tb->cs_base == cs_base - && tb->flags == flags - && tb->trace_vcpu_dstate == *cpu->trace_dstate))) { - tb = tb_htable_lookup(cpu, addr, cs_base, flags); - if (!tb) { - return tcg_ctx.code_gen_epilogue; - } - atomic_set(&cpu->tb_jmp_cache[addr_hash], tb); - } + uint32_t flags; - qemu_log_mask_and_addr(CPU_LOG_EXEC, addr, + tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags); + if (tb == NULL) { + return tcg_ctx.code_gen_epilogue; + } + qemu_log_mask_and_addr(CPU_LOG_EXEC, pc, "Chain %p [%d: " TARGET_FMT_lx "] %s\n", - tb->tc_ptr, cpu->cpu_index, addr, - lookup_symbol(addr)); - return tb->tc_ptr; + tb->tc.ptr, cpu->cpu_index, pc, + lookup_symbol(pc)); + return tb->tc.ptr; } void HELPER(exit_atomic)(CPUArchState *env) diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index c41d38a557..1df17d0ba9 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -24,7 +24,7 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl) +DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env) DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 2d1ed06065..c5ce99d549 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -65,11 +65,29 @@ /* make various TB consistency checks */ /* #define DEBUG_TB_CHECK */ +#ifdef DEBUG_TB_INVALIDATE +#define DEBUG_TB_INVALIDATE_GATE 1 +#else +#define DEBUG_TB_INVALIDATE_GATE 0 +#endif + +#ifdef DEBUG_TB_FLUSH +#define DEBUG_TB_FLUSH_GATE 1 +#else +#define DEBUG_TB_FLUSH_GATE 0 +#endif + #if !defined(CONFIG_USER_ONLY) /* TB consistency checks only implemented for usermode emulation. */ #undef DEBUG_TB_CHECK #endif +#ifdef DEBUG_TB_CHECK +#define DEBUG_TB_CHECK_GATE 1 +#else +#define DEBUG_TB_CHECK_GATE 0 +#endif + /* Access to the various translations structures need to be serialised via locks * for consistency. This is automatic for SoftMMU based system * emulation due to its single threaded nature. In user-mode emulation @@ -139,7 +157,7 @@ TCGContext tcg_ctx; bool parallel_cpus; /* translation block context */ -__thread int have_tb_lock; +static __thread int have_tb_lock; static void page_table_config_init(void) { @@ -242,7 +260,7 @@ static target_long decode_sleb128(uint8_t **pp) which comes from the host pc of the end of the code implementing the insn. Each line of the table is encoded as sleb128 deltas from the previous - line. The seed for the first line is { tb->pc, 0..., tb->tc_ptr }. + line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. That is, the first column is seeded with the guest pc, the last column with the host pc, and the middle columns with zeros. */ @@ -252,7 +270,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) uint8_t *p = block; int i, j, n; - tb->tc_search = block; + tb->tc.search = block; for (i = 0, n = tb->icount; i < n; ++i) { target_ulong prev; @@ -287,9 +305,9 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc) { target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc }; - uintptr_t host_pc = (uintptr_t)tb->tc_ptr; + uintptr_t host_pc = (uintptr_t)tb->tc.ptr; CPUArchState *env = cpu->env_ptr; - uint8_t *p = tb->tc_search; + uint8_t *p = tb->tc.search; int i, j, num_insns = tb->icount; #ifdef CONFIG_PROFILER int64_t ti = profile_getclock(); @@ -840,7 +858,7 @@ void tb_free(TranslationBlock *tb) tb == tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) { size_t struct_size = ROUND_UP(sizeof(*tb), qemu_icache_linesize); - tcg_ctx.code_gen_ptr = tb->tc_ptr - struct_size; + tcg_ctx.code_gen_ptr = tb->tc.ptr - struct_size; tcg_ctx.tb_ctx.nb_tbs--; } } @@ -899,13 +917,13 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) goto done; } -#if defined(DEBUG_TB_FLUSH) - printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n", - (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer), - tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ? - ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) / - tcg_ctx.tb_ctx.nb_tbs : 0); -#endif + if (DEBUG_TB_FLUSH_GATE) { + printf("qemu: flush code_size=%td nb_tbs=%d avg_tb_size=%td\n", + tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer, + tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ? + (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) / + tcg_ctx.tb_ctx.nb_tbs : 0); + } if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) > tcg_ctx.code_gen_buffer_size) { cpu_abort(cpu, "Internal error: code buffer overflow\n"); @@ -938,7 +956,13 @@ void tb_flush(CPUState *cpu) } } -#ifdef DEBUG_TB_CHECK +/* + * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only, + * so in order to prevent bit rot we compile them unconditionally in user-mode, + * and let the optimizer get rid of them by wrapping their user-only callers + * with if (DEBUG_TB_CHECK_GATE). + */ +#ifdef CONFIG_USER_ONLY static void do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp) @@ -982,7 +1006,7 @@ static void tb_page_check(void) qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_page_check, NULL); } -#endif +#endif /* CONFIG_USER_ONLY */ static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) { @@ -1035,7 +1059,7 @@ static inline void tb_remove_from_jmp_list(TranslationBlock *tb, int n) another TB */ static inline void tb_reset_jump(TranslationBlock *tb, int n) { - uintptr_t addr = (uintptr_t)(tb->tc_ptr + tb->jmp_reset_offset[n]); + uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]); tb_set_jmp_target(tb, n, addr); } @@ -1073,7 +1097,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) assert_tb_locked(); - atomic_set(&tb->invalid, true); + atomic_set(&tb->cflags, tb->cflags | CF_INVALID); /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); @@ -1186,10 +1210,9 @@ static inline void tb_alloc_page(TranslationBlock *tb, } mprotect(g2h(page_addr), qemu_host_page_size, (prot & PAGE_BITS) & ~PAGE_WRITE); -#ifdef DEBUG_TB_INVALIDATE - printf("protecting code page: 0x" TARGET_FMT_lx "\n", - page_addr); -#endif + if (DEBUG_TB_INVALIDATE_GATE) { + printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr); + } } #else /* if some code is already present, then the pages are already @@ -1225,8 +1248,10 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate); qht_insert(&tcg_ctx.tb_ctx.htable, tb, h); -#ifdef DEBUG_TB_CHECK - tb_page_check(); +#ifdef CONFIG_USER_ONLY + if (DEBUG_TB_CHECK_GATE) { + tb_page_check(); + } #endif } @@ -1263,13 +1288,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } gen_code_buf = tcg_ctx.code_gen_ptr; - tb->tc_ptr = gen_code_buf; + tb->tc.ptr = gen_code_buf; tb->pc = pc; tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; tb->trace_vcpu_dstate = *cpu->trace_dstate; - tb->invalid = false; #ifdef CONFIG_PROFILER tcg_ctx.tb_count1++; /* includes aborted translations because of @@ -1283,7 +1307,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, gen_intermediate_code(cpu, tb); tcg_ctx.cpu = NULL; - trace_translate_block(tb, tb->pc, tb->tc_ptr); + trace_translate_block(tb, tb->pc, tb->tc.ptr); /* generate machine code */ tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; @@ -1300,7 +1324,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, #ifdef CONFIG_PROFILER tcg_ctx.tb_count++; tcg_ctx.interm_time += profile_getclock() - ti; - tcg_ctx.code_time -= profile_getclock(); + ti = profile_getclock(); #endif /* ??? Overflow could be handled better here. In particular, we @@ -1318,7 +1342,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } #ifdef CONFIG_PROFILER - tcg_ctx.code_time += profile_getclock(); + tcg_ctx.code_time += profile_getclock() - ti; tcg_ctx.code_in_len += tb->size; tcg_ctx.code_out_len += gen_code_size; tcg_ctx.search_out_len += search_size; @@ -1330,11 +1354,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu, qemu_log_lock(); qemu_log("OUT: [size=%d]\n", gen_code_size); if (tcg_ctx.data_gen_ptr) { - size_t code_size = tcg_ctx.data_gen_ptr - tb->tc_ptr; + size_t code_size = tcg_ctx.data_gen_ptr - tb->tc.ptr; size_t data_size = gen_code_size - code_size; size_t i; - log_disas(tb->tc_ptr, code_size); + log_disas(tb->tc.ptr, code_size); for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { if (sizeof(tcg_target_ulong) == 8) { @@ -1348,7 +1372,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } } } else { - log_disas(tb->tc_ptr, gen_code_size); + log_disas(tb->tc.ptr, gen_code_size); } qemu_log("\n"); qemu_log_flush(); @@ -1675,7 +1699,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) while (m_min <= m_max) { m = (m_min + m_max) >> 1; tb = tcg_ctx.tb_ctx.tbs[m]; - v = (uintptr_t)tb->tc_ptr; + v = (uintptr_t)tb->tc.ptr; if (v == tc_ptr) { return tb; } else if (tc_ptr < v) { @@ -1936,7 +1960,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) atomic_read(&tcg_ctx.tb_ctx.tb_flush_count)); cpu_fprintf(f, "TB invalidate count %d\n", tcg_ctx.tb_ctx.tb_phys_invalidate_count); - cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); + cpu_fprintf(f, "TLB flush count %zu\n", tlb_flush_count()); tcg_dump_info(f, cpu_fprintf); tb_unlock(); @@ -2213,8 +2237,10 @@ int page_unprotect(target_ulong address, uintptr_t pc) /* and since the content will be modified, we must invalidate the corresponding translated code. */ current_tb_invalidated |= tb_invalidate_phys_page(addr, pc); -#ifdef DEBUG_TB_CHECK - tb_invalidate_check(addr); +#ifdef CONFIG_USER_ONLY + if (DEBUG_TB_CHECK_GATE) { + tb_invalidate_check(addr); + } #endif } mprotect((void *)g2h(host_start), qemu_host_page_size, |