From 6f04cb1c8f481cf02fbc4657fefba985a1fe725f Mon Sep 17 00:00:00 2001 From: Alex Bennée Date: Wed, 24 Feb 2021 16:58:07 +0000 Subject: accel/tcg: rename tb_lookup__cpu_state and hoist state extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having a function return either and valid TB and some system state seems excessive. It will make the subsequent re-factoring easier if we lookup the current state where we are. Signed-off-by: Alex Bennée Message-Id: <20210224165811.11567-2-alex.bennee@linaro.org> Signed-off-by: Richard Henderson --- include/exec/tb-lookup.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/exec') diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h index 9cf475bb03..c3f5d81c55 100644 --- a/include/exec/tb-lookup.h +++ b/include/exec/tb-lookup.h @@ -17,30 +17,28 @@ #include "exec/tb-hash.h" /* Might cause an exception, so have a longjmp destination ready */ -static inline TranslationBlock * -tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base, - uint32_t *flags, uint32_t cf_mask) +static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, + uint32_t flags, uint32_t cf_mask) { - CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; uint32_t hash; - cpu_get_tb_cpu_state(env, pc, cs_base, flags); - hash = tb_jmp_cache_hash_func(*pc); + hash = tb_jmp_cache_hash_func(pc); tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); cf_mask &= ~CF_CLUSTER_MASK; cf_mask |= cpu->cluster_index << CF_CLUSTER_SHIFT; if (likely(tb && - tb->pc == *pc && - tb->cs_base == *cs_base && - tb->flags == *flags && + tb->pc == pc && + tb->cs_base == cs_base && + tb->flags == flags && tb->trace_vcpu_dstate == *cpu->trace_dstate && (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) { return tb; } - tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask); + tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask); if (tb == NULL) { return NULL; } -- cgit 1.4.1 From c0ae396a81e13e5a09846f86a702bc61733a8885 Mon Sep 17 00:00:00 2001 From: Alex Bennée Date: Wed, 24 Feb 2021 16:58:08 +0000 Subject: accel/tcg: move CF_CLUSTER calculation to curr_cflags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is nothing special about this compile flag that doesn't mean we can't just compute it with curr_cflags() which we should be using when building a new set. Signed-off-by: Alex Bennée Message-Id: <20210224165811.11567-3-alex.bennee@linaro.org> Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 9 ++++----- accel/tcg/tcg-runtime.c | 2 +- accel/tcg/translate-all.c | 6 +++--- include/exec/exec-all.h | 8 +++++--- include/exec/tb-lookup.h | 3 --- softmmu/physmem.c | 2 +- 6 files changed, 14 insertions(+), 16 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index ef96b312a1..45286dc4b3 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -249,8 +249,7 @@ void cpu_exec_step_atomic(CPUState *cpu) TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; - uint32_t cflags = 1; - uint32_t cf_mask = cflags & CF_HASH_MASK; + uint32_t cflags = (curr_cflags(cpu) & ~CF_PARALLEL) | 1; int tb_exit; if (sigsetjmp(cpu->jmp_env, 0) == 0) { @@ -260,7 +259,7 @@ void cpu_exec_step_atomic(CPUState *cpu) cpu->running = true; cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb = tb_lookup(cpu, pc, cs_base, flags, cf_mask); + tb = tb_lookup(cpu, pc, cs_base, flags, cflags); if (tb == NULL) { mmap_lock(); @@ -497,7 +496,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) if (replay_has_exception() && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) { /* Execute just one insn to trigger exception pending in the log */ - cpu->cflags_next_tb = (curr_cflags() & ~CF_USE_ICOUNT) | 1; + cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1; } #endif return false; @@ -794,7 +793,7 @@ int cpu_exec(CPUState *cpu) have CF_INVALID set, -1 is a convenient invalid value that does not require tcg headers for cpu_common_reset. */ if (cflags == -1) { - cflags = curr_cflags(); + cflags = curr_cflags(cpu); } else { cpu->cflags_next_tb = -1; } diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index 05e3d52c2f..99403e3eb3 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -154,7 +154,7 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags()); + tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags(cpu)); if (tb == NULL) { return tcg_code_gen_epilogue; } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index bbd919a393..f29b47f090 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2194,7 +2194,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, if (current_tb_modified) { page_collection_unlock(pages); /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(); + cpu->cflags_next_tb = 1 | curr_cflags(cpu); mmap_unlock(); cpu_loop_exit_noexc(cpu); } @@ -2362,7 +2362,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(); + cpu->cflags_next_tb = 1 | curr_cflags(cpu); return true; } #endif @@ -2438,7 +2438,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) * operations only (which execute after completion) so we don't * double instrument the instruction. */ - cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n; + cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, "cpu_io_recompile: rewound execution of TB to " diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index b7b3c0ef12..1a69c07add 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -519,10 +519,12 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb) } /* current cflags for hashing/comparison */ -static inline uint32_t curr_cflags(void) +static inline uint32_t curr_cflags(CPUState *cpu) { - return (parallel_cpus ? CF_PARALLEL : 0) - | (icount_enabled() ? CF_USE_ICOUNT : 0); + uint32_t cflags = deposit32(0, CF_CLUSTER_SHIFT, 8, cpu->cluster_index); + cflags |= parallel_cpus ? CF_PARALLEL : 0; + cflags |= icount_enabled() ? CF_USE_ICOUNT : 0; + return cflags; } /* TranslationBlock invalidate API */ diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h index c3f5d81c55..1c92fe0521 100644 --- a/include/exec/tb-lookup.h +++ b/include/exec/tb-lookup.h @@ -27,9 +27,6 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, hash = tb_jmp_cache_hash_func(pc); tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); - cf_mask &= ~CF_CLUSTER_MASK; - cf_mask |= cpu->cluster_index << CF_CLUSTER_SHIFT; - if (likely(tb && tb->pc == pc && tb->cs_base == cs_base && diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 19e0aa9836..7e8b0fab89 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -937,7 +937,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, cpu_loop_exit_restore(cpu, ra); } else { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(); + cpu->cflags_next_tb = 1 | curr_cflags(cpu); mmap_unlock(); if (ra) { cpu_restore_state(cpu, ra, true); -- cgit 1.4.1 From bf253ac606de4a934e41ba178bf4f1338c554cec Mon Sep 17 00:00:00 2001 From: Alex Bennée Date: Wed, 24 Feb 2021 16:58:09 +0000 Subject: accel/tcg: drop the use of CF_HASH_MASK and rename params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't really deal in cf_mask most of the time. The one time it's relevant is when we want to remove an invalidated TB from the QHT lookup. Everywhere else we should be looking up things without CF_INVALID set. Signed-off-by: Alex Bennée Message-Id: <20210224165811.11567-4-alex.bennee@linaro.org> Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 16 ++++++++-------- accel/tcg/tcg-runtime.c | 2 +- accel/tcg/translate-all.c | 8 +++++--- include/exec/exec-all.h | 4 +--- include/exec/tb-lookup.h | 9 ++++++--- 5 files changed, 21 insertions(+), 18 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 45286dc4b3..931da96c2b 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -307,7 +307,7 @@ struct tb_desc { CPUArchState *env; tb_page_addr_t phys_page1; uint32_t flags; - uint32_t cf_mask; + uint32_t cflags; uint32_t trace_vcpu_dstate; }; @@ -321,7 +321,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) tb->cs_base == desc->cs_base && tb->flags == desc->flags && tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && - (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) { + tb_cflags(tb) == desc->cflags) { /* check next page if needed */ if (tb->page_addr[1] == -1) { return true; @@ -341,7 +341,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags, - uint32_t cf_mask) + uint32_t cflags) { tb_page_addr_t phys_pc; struct tb_desc desc; @@ -350,7 +350,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, desc.env = (CPUArchState *)cpu->env_ptr; desc.cs_base = cs_base; desc.flags = flags; - desc.cf_mask = cf_mask; + desc.cflags = cflags; desc.trace_vcpu_dstate = *cpu->trace_dstate; desc.pc = pc; phys_pc = get_page_addr_code(desc.env, pc); @@ -358,7 +358,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, return NULL; } desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; - h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate); + h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate); return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); } @@ -418,7 +418,7 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, static inline TranslationBlock *tb_find(CPUState *cpu, TranslationBlock *last_tb, - int tb_exit, uint32_t cf_mask) + int tb_exit, uint32_t cflags) { CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; @@ -427,10 +427,10 @@ static inline TranslationBlock *tb_find(CPUState *cpu, cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb = tb_lookup(cpu, pc, cs_base, flags, cf_mask); + tb = tb_lookup(cpu, pc, cs_base, flags, cflags); if (tb == NULL) { mmap_lock(); - tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask); + tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); mmap_unlock(); /* We add the TB in the virtual pc hash table for the fast lookup */ qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index 99403e3eb3..49f5de37e8 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -27,10 +27,10 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" -#include "exec/tb-lookup.h" #include "disas/disas.h" #include "exec/log.h" #include "tcg/tcg.h" +#include "exec/tb-lookup.h" /* 32-bit helpers */ diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index f29b47f090..0b0bfd35ab 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1311,7 +1311,7 @@ static bool tb_cmp(const void *ap, const void *bp) return a->pc == b->pc && a->cs_base == b->cs_base && a->flags == b->flags && - (tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) && + (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) && a->trace_vcpu_dstate == b->trace_vcpu_dstate && a->page_addr[0] == b->page_addr[0] && a->page_addr[1] == b->page_addr[1]; @@ -1616,6 +1616,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) PageDesc *p; uint32_t h; tb_page_addr_t phys_pc; + uint32_t orig_cflags = tb_cflags(tb); assert_memory_lock(); @@ -1626,7 +1627,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK, + h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags, tb->trace_vcpu_dstate); if (!qht_remove(&tb_ctx.htable, tb, h)) { return; @@ -1793,6 +1794,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, uint32_t h; assert_memory_lock(); + tcg_debug_assert(!(tb->cflags & CF_INVALID)); /* * Add the TB to the page list, acquiring first the pages's locks. @@ -1811,7 +1813,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, } /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags, tb->trace_vcpu_dstate); qht_insert(&tb_ctx.htable, tb, h, &existing_tb); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 1a69c07add..acf66ab692 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -460,8 +460,6 @@ struct TranslationBlock { #define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ #define CF_CLUSTER_SHIFT 24 -/* cflags' mask for hashing/comparison, basically ignore CF_INVALID */ -#define CF_HASH_MASK (~CF_INVALID) /* Per-vCPU dynamic tracing state used to generate this TB */ uint32_t trace_vcpu_dstate; @@ -538,7 +536,7 @@ void tb_flush(CPUState *cpu); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags, - uint32_t cf_mask); + uint32_t cflags); void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); /* GETPC is the true target of the return instruction that we'll execute. */ diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h index 1c92fe0521..29d61ceb34 100644 --- a/include/exec/tb-lookup.h +++ b/include/exec/tb-lookup.h @@ -19,11 +19,14 @@ /* Might cause an exception, so have a longjmp destination ready */ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base, - uint32_t flags, uint32_t cf_mask) + uint32_t flags, uint32_t cflags) { TranslationBlock *tb; uint32_t hash; + /* we should never be trying to look up an INVALID tb */ + tcg_debug_assert(!(cflags & CF_INVALID)); + hash = tb_jmp_cache_hash_func(pc); tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); @@ -32,10 +35,10 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, tb->cs_base == cs_base && tb->flags == flags && tb->trace_vcpu_dstate == *cpu->trace_dstate && - (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) { + tb_cflags(tb) == cflags)) { return tb; } - tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask); + tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags); if (tb == NULL) { return NULL; } -- cgit 1.4.1 From 872ebd884dd68ecef4c6f9f86c5da519f18bd31e Mon Sep 17 00:00:00 2001 From: Alex Bennée Date: Wed, 24 Feb 2021 16:58:10 +0000 Subject: include/exec: lightly re-arrange TranslationBlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets make sure all the flags we compare when looking up blocks are together in the same place. Signed-off-by: Alex Bennée Message-Id: <20210224165811.11567-5-alex.bennee@linaro.org> Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/exec') diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index acf66ab692..a262dd5804 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -448,9 +448,6 @@ struct TranslationBlock { target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */ target_ulong cs_base; /* CS base for this block */ uint32_t flags; /* flags defining in which context the code was generated */ - uint16_t size; /* size of target code for this block (1 <= - size <= TARGET_PAGE_SIZE) */ - uint16_t icount; uint32_t cflags; /* compile flags */ #define CF_COUNT_MASK 0x00007fff #define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ @@ -464,6 +461,14 @@ struct TranslationBlock { /* Per-vCPU dynamic tracing state used to generate this TB */ uint32_t trace_vcpu_dstate; + /* + * Above fields used for comparing + */ + + /* size of target code for this block (1 <= size <= TARGET_PAGE_SIZE) */ + uint16_t size; + uint16_t icount; + struct tb_tc tc; /* first and second physical page containing code. The lower bit -- cgit 1.4.1 From 6cc9d67c6f682cf04eea2d6e64a252b63a7eccdf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 1 Mar 2021 19:21:08 -0800 Subject: accel/tcg: Precompute curr_cflags into cpu->tcg_cflags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The primary motivation is to remove a dozen insns along the fast-path in tb_lookup. As a byproduct, this allows us to completely remove parallel_cpus. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 3 --- accel/tcg/tcg-accel-ops-mttcg.c | 3 +-- accel/tcg/tcg-accel-ops-rr.c | 2 +- accel/tcg/tcg-accel-ops.c | 8 ++++++++ accel/tcg/tcg-accel-ops.h | 1 + accel/tcg/translate-all.c | 4 ---- include/exec/exec-all.h | 7 +------ include/hw/core/cpu.h | 2 ++ linux-user/main.c | 1 + linux-user/sh4/signal.c | 8 +++++--- linux-user/syscall.c | 18 ++++++++++-------- 11 files changed, 30 insertions(+), 27 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 931da96c2b..bdfa036ac8 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -267,8 +267,6 @@ void cpu_exec_step_atomic(CPUState *cpu) mmap_unlock(); } - /* Since we got here, we know that parallel_cpus must be true. */ - parallel_cpus = false; cpu_exec_enter(cpu); /* execute the generated code */ trace_exec_tb(tb, pc); @@ -296,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu) * the execution. */ g_assert(cpu_in_exclusive_context(cpu)); - parallel_cpus = true; cpu->running = false; end_exclusive(); } diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index 42973fb062..847d2079d2 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -114,8 +114,7 @@ void mttcg_start_vcpu_thread(CPUState *cpu) char thread_name[VCPU_THREAD_NAME_SIZE]; g_assert(tcg_enabled()); - - parallel_cpus = (current_machine->smp.max_cpus > 1); + tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1); cpu->thread = g_malloc0(sizeof(QemuThread)); cpu->halt_cond = g_malloc0(sizeof(QemuCond)); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 4a66055e0d..018b54c508 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -269,7 +269,7 @@ void rr_start_vcpu_thread(CPUState *cpu) static QemuThread *single_tcg_cpu_thread; g_assert(tcg_enabled()); - parallel_cpus = false; + tcg_cpu_init_cflags(cpu, false); if (!single_tcg_cpu_thread) { cpu->thread = g_malloc0(sizeof(QemuThread)); diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 6144d9df87..6cdcaa2855 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -41,6 +41,14 @@ /* common functionality among all TCG variants */ +void tcg_cpu_init_cflags(CPUState *cpu, bool parallel) +{ + uint32_t cflags = cpu->cluster_index << CF_CLUSTER_SHIFT; + cflags |= parallel ? CF_PARALLEL : 0; + cflags |= icount_enabled() ? CF_USE_ICOUNT : 0; + cpu->tcg_cflags = cflags; +} + void tcg_cpus_destroy(CPUState *cpu) { cpu_thread_signal_destroyed(cpu); diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h index 48130006de..6a5fcef889 100644 --- a/accel/tcg/tcg-accel-ops.h +++ b/accel/tcg/tcg-accel-ops.h @@ -17,5 +17,6 @@ void tcg_cpus_destroy(CPUState *cpu); int tcg_cpus_exec(CPUState *cpu); void tcg_handle_interrupt(CPUState *cpu, int mask); +void tcg_cpu_init_cflags(CPUState *cpu, bool parallel); #endif /* TCG_CPUS_H */ diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 0b0bfd35ab..f32df8b240 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -224,7 +224,6 @@ static void *l1_map[V_L1_MAX_SIZE]; TCGContext tcg_init_ctx; __thread TCGContext *tcg_ctx; TBContext tb_ctx; -bool parallel_cpus; static void page_table_config_init(void) { @@ -1867,9 +1866,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, cflags = (cflags & ~CF_COUNT_MASK) | 1; } - cflags &= ~CF_CLUSTER_MASK; - cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT; - max_insns = cflags & CF_COUNT_MASK; if (max_insns == 0) { max_insns = CF_COUNT_MASK; diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index a262dd5804..6b036cae8f 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -513,8 +513,6 @@ struct TranslationBlock { uintptr_t jmp_dest[2]; }; -extern bool parallel_cpus; - /* Hide the qatomic_read to make code a little easier on the eyes */ static inline uint32_t tb_cflags(const TranslationBlock *tb) { @@ -524,10 +522,7 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb) /* current cflags for hashing/comparison */ static inline uint32_t curr_cflags(CPUState *cpu) { - uint32_t cflags = deposit32(0, CF_CLUSTER_SHIFT, 8, cpu->cluster_index); - cflags |= parallel_cpus ? CF_PARALLEL : 0; - cflags |= icount_enabled() ? CF_USE_ICOUNT : 0; - return cflags; + return cpu->tcg_cflags; } /* TranslationBlock invalidate API */ diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index c005d3dc2d..c68bc3ba8a 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -282,6 +282,7 @@ struct qemu_work_item; * to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will * be the same as the cluster-id property of the CPU object's TYPE_CPU_CLUSTER * QOM parent. + * @tcg_cflags: Pre-computed cflags for this cpu. * @nr_cores: Number of cores within this CPU package. * @nr_threads: Number of threads within this CPU. * @running: #true if CPU is currently running (lockless). @@ -412,6 +413,7 @@ struct CPUState { /* TODO Move common fields from CPUArchState here. */ int cpu_index; int cluster_index; + uint32_t tcg_cflags; uint32_t halted; uint32_t can_do_io; int32_t exception_index; diff --git a/linux-user/main.c b/linux-user/main.c index 81f48ff54e..4f4746dce8 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -205,6 +205,7 @@ CPUArchState *cpu_copy(CPUArchState *env) /* Reset non arch specific state */ cpu_reset(new_cpu); + new_cpu->tcg_cflags = cpu->tcg_cflags; memcpy(new_env, env, sizeof(CPUArchState)); /* Clone all break/watchpoints. diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c index cc89a48ff8..29c1ee30e6 100644 --- a/linux-user/sh4/signal.c +++ b/linux-user/sh4/signal.c @@ -82,9 +82,11 @@ static abi_ulong get_sigframe(struct target_sigaction *ka, return (sp - frame_size) & -8ul; } -/* Notice when we're in the middle of a gUSA region and reset. - Note that this will only occur for !parallel_cpus, as we will - translate such sequences differently in a parallel context. */ +/* + * Notice when we're in the middle of a gUSA region and reset. + * Note that this will only occur when #CF_PARALLEL is unset, as we + * will translate such sequences differently in a parallel context. + */ static void unwind_gusa(CPUSH4State *regs) { /* If the stack pointer is sufficiently negative, and we haven't diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 389ec09764..9522f603aa 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -6481,6 +6481,16 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, /* Grab a mutex so that thread setup appears atomic. */ pthread_mutex_lock(&clone_lock); + /* + * If this is our first additional thread, we need to ensure we + * generate code for parallel execution and flush old translations. + * Do this now so that the copy gets CF_PARALLEL too. + */ + if (!(cpu->tcg_cflags & CF_PARALLEL)) { + cpu->tcg_cflags |= CF_PARALLEL; + tb_flush(cpu); + } + /* we create a new CPU instance. */ new_env = cpu_copy(env); /* Init regs that differ from the parent. */ @@ -6521,14 +6531,6 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, sigprocmask(SIG_BLOCK, &sigmask, &info.sigmask); cpu->random_seed = qemu_guest_random_seed_thread_part1(); - /* If this is our first additional thread, we need to ensure we - * generate code for parallel execution and flush old translations. - */ - if (!parallel_cpus) { - parallel_cpus = true; - tb_flush(cpu); - } - ret = pthread_create(&info.thread, &attr, clone_func, &info); /* TODO: Free new CPU state if thread creation failed. */ -- cgit 1.4.1