From 9373e63297c43752f9cf085feb7f5aed57d959f8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 18 Aug 2015 06:24:34 -0700 Subject: tcg: introduce tcg_current_cpu This is already useful on Windows in order to remove tls.h, because accesses to current_cpu are done from a different thread on that platform. It will be used on POSIX platforms as soon TCG stops using signals to interrupt the execution of translated code. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- include/exec/exec-all.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/exec/exec-all.h') diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 83b925172f..d5dd48f759 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -387,6 +387,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr); extern int singlestep; /* cpu-exec.c */ +extern CPUState *tcg_current_cpu; extern volatile sig_atomic_t exit_request; #if !defined(CONFIG_USER_ONLY) -- cgit 1.4.1 From e0c382113f768cc375a0d61b7cb3692f1b4bba58 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Aug 2015 00:19:19 +0200 Subject: tcg: signal-free qemu_cpu_kick Signals are slow and do not exist on Win32. The previous patches have done most of the legwork to introduce memory barriers (some of them were even there already for the sake of Windows!) and we can now set the flags directly in the iothread. qemu_cpu_kick_thread is not used anymore on TCG, since the TCG thread is never outside usermode while the CPU is running (not halted). Instead run the content of the signal handler (now in qemu_cpu_kick_no_halt) directly. qemu_cpu_kick_no_halt is also used in qemu_mutex_lock_iothread to avoid the overhead of qemu_cond_broadcast. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- cpu-exec.c | 2 +- cpus.c | 89 ++++++++++++------------------------------------- include/exec/exec-all.h | 4 +-- include/qom/cpu.h | 4 +-- 4 files changed, 27 insertions(+), 72 deletions(-) (limited to 'include/exec/exec-all.h') diff --git a/cpu-exec.c b/cpu-exec.c index ef9d74552e..6a30261a83 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -341,7 +341,7 @@ static void cpu_handle_debug_exception(CPUState *cpu) /* main execution loop */ -volatile sig_atomic_t exit_request; +bool exit_request; CPUState *tcg_current_cpu; int cpu_exec(CPUState *cpu) diff --git a/cpus.c b/cpus.c index e4079103ce..4f3374e201 100644 --- a/cpus.c +++ b/cpus.c @@ -661,19 +661,6 @@ static void cpu_handle_guest_debug(CPUState *cpu) cpu->stopped = true; } -static void cpu_signal(int sig) -{ - CPUState *cpu; - /* Ensure whatever caused the exit has reached the CPU threads before - * writing exit_request. - */ - atomic_mb_set(&exit_request, 1); - cpu = atomic_mb_read(&tcg_current_cpu); - if (cpu) { - cpu_exit(cpu); - } -} - #ifdef CONFIG_LINUX static void sigbus_reraise(void) { @@ -786,29 +773,11 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu) } } -static void qemu_tcg_init_cpu_signals(void) -{ - sigset_t set; - struct sigaction sigact; - - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_handler = cpu_signal; - sigaction(SIG_IPI, &sigact, NULL); - - sigemptyset(&set); - sigaddset(&set, SIG_IPI); - pthread_sigmask(SIG_UNBLOCK, &set, NULL); -} - #else /* _WIN32 */ static void qemu_kvm_init_cpu_signals(CPUState *cpu) { abort(); } - -static void qemu_tcg_init_cpu_signals(void) -{ -} #endif /* _WIN32 */ static QemuMutex qemu_global_mutex; @@ -1046,7 +1015,6 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) rcu_register_thread(); qemu_mutex_lock_iothread(); - qemu_tcg_init_cpu_signals(); qemu_thread_get_self(cpu->thread); CPU_FOREACH(cpu) { @@ -1090,60 +1058,47 @@ static void qemu_cpu_kick_thread(CPUState *cpu) #ifndef _WIN32 int err; - if (!tcg_enabled()) { - if (cpu->thread_kicked) { - return; - } - cpu->thread_kicked = true; + if (cpu->thread_kicked) { + return; } + cpu->thread_kicked = true; err = pthread_kill(cpu->thread->thread, SIG_IPI); if (err) { fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); exit(1); } #else /* _WIN32 */ - if (!qemu_cpu_is_self(cpu)) { - CONTEXT tcgContext; - - if (SuspendThread(cpu->hThread) == (DWORD)-1) { - fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__, - GetLastError()); - exit(1); - } - - /* On multi-core systems, we are not sure that the thread is actually - * suspended until we can get the context. - */ - tcgContext.ContextFlags = CONTEXT_CONTROL; - while (GetThreadContext(cpu->hThread, &tcgContext) != 0) { - continue; - } - - cpu_signal(0); + abort(); +#endif +} - if (ResumeThread(cpu->hThread) == (DWORD)-1) { - fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__, - GetLastError()); - exit(1); - } +static void qemu_cpu_kick_no_halt(void) +{ + CPUState *cpu; + /* Ensure whatever caused the exit has reached the CPU threads before + * writing exit_request. + */ + atomic_mb_set(&exit_request, 1); + cpu = atomic_mb_read(&tcg_current_cpu); + if (cpu) { + cpu_exit(cpu); } -#endif } void qemu_cpu_kick(CPUState *cpu) { qemu_cond_broadcast(cpu->halt_cond); - qemu_cpu_kick_thread(cpu); + if (tcg_enabled()) { + qemu_cpu_kick_no_halt(); + } else { + qemu_cpu_kick_thread(cpu); + } } void qemu_cpu_kick_self(void) { -#ifndef _WIN32 assert(current_cpu); qemu_cpu_kick_thread(current_cpu); -#else - abort(); -#endif } bool qemu_cpu_is_self(CPUState *cpu) @@ -1175,7 +1130,7 @@ void qemu_mutex_lock_iothread(void) atomic_dec(&iothread_requesting_mutex); } else { if (qemu_mutex_trylock(&qemu_global_mutex)) { - qemu_cpu_kick_thread(first_cpu); + qemu_cpu_kick_no_halt(); qemu_mutex_lock(&qemu_global_mutex); } atomic_dec(&iothread_requesting_mutex); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index d5dd48f759..4a09e6c599 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -386,9 +386,9 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr); /* vl.c */ extern int singlestep; -/* cpu-exec.c */ +/* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */ extern CPUState *tcg_current_cpu; -extern volatile sig_atomic_t exit_request; +extern bool exit_request; #if !defined(CONFIG_USER_ONLY) void migration_bitmap_extend(ram_addr_t old, ram_addr_t new); diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 8612655a27..2b4936ad2f 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -268,7 +268,7 @@ struct CPUState { bool created; bool stop; bool stopped; - volatile sig_atomic_t exit_request; + bool exit_request; uint32_t interrupt_request; int singlestep_enabled; int64_t icount_extra; @@ -319,7 +319,7 @@ struct CPUState { offset from AREG0. Leave this field at the end so as to make the (absolute value) offset as small as possible. This reduces code size, especially for hosts without large memory offsets. */ - volatile sig_atomic_t tcg_exit_req; + uint32_t tcg_exit_req; }; QTAILQ_HEAD(CPUTailQ, CPUState); -- cgit 1.4.1 From 677ef6230b603571ae05125db469f7b4c8912a77 Mon Sep 17 00:00:00 2001 From: KONRAD Frederic Date: Mon, 10 Aug 2015 17:27:02 +0200 Subject: replace spinlock by QemuMutex. spinlock is only used in two cases: * cpu-exec.c: to protect TranslationBlock * mem_helper.c: for lock helper in target-i386 (which seems broken). It's a pthread_mutex_t in user-mode, so we can use QemuMutex directly, with an #ifdef. The #ifdef will be removed when multithreaded TCG will need the mutex as well. Signed-off-by: KONRAD Frederic Message-Id: <1439220437-23957-5-git-send-email-fred.konrad@greensocs.com> Signed-off-by: Emilio G. Cota [Merge Emilio G. Cota's patch to remove volatile. - Paolo] Signed-off-by: Paolo Bonzini --- cpu-exec.c | 14 +++----------- include/exec/exec-all.h | 4 ++-- linux-user/main.c | 6 +++--- target-i386/cpu.h | 3 +++ target-i386/mem_helper.c | 25 ++++++++++++++++++++++--- target-i386/translate.c | 2 ++ tcg/tcg.h | 4 ++++ translate-all.c | 34 ++++++++++++++++++++++++++++++++++ 8 files changed, 73 insertions(+), 19 deletions(-) (limited to 'include/exec/exec-all.h') diff --git a/cpu-exec.c b/cpu-exec.c index 6a30261a83..2c0a6f642d 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -357,9 +357,6 @@ int cpu_exec(CPUState *cpu) uintptr_t next_tb; SyncClocks sc; - /* This must be volatile so it is not trashed by longjmp() */ - volatile bool have_tb_lock = false; - if (cpu->halted) { if (!cpu_has_work(cpu)) { return EXCP_HALTED; @@ -468,8 +465,7 @@ int cpu_exec(CPUState *cpu) cpu->exception_index = EXCP_INTERRUPT; cpu_loop_exit(cpu); } - spin_lock(&tcg_ctx.tb_ctx.tb_lock); - have_tb_lock = true; + tb_lock(); tb = tb_find_fast(cpu); /* Note: we do it here to avoid a gcc bug on Mac OS X when doing it in tb_find_slow */ @@ -491,8 +487,7 @@ int cpu_exec(CPUState *cpu) tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK), next_tb & TB_EXIT_MASK, tb); } - have_tb_lock = false; - spin_unlock(&tcg_ctx.tb_ctx.tb_lock); + tb_unlock(); if (likely(!cpu->exit_request)) { trace_exec_tb(tb, tb->pc); tc_ptr = tb->tc_ptr; @@ -558,10 +553,7 @@ int cpu_exec(CPUState *cpu) x86_cpu = X86_CPU(cpu); env = &x86_cpu->env; #endif - if (have_tb_lock) { - spin_unlock(&tcg_ctx.tb_ctx.tb_lock); - have_tb_lock = false; - } + tb_lock_reset(); } } /* for(;;) */ diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 4a09e6c599..59544d4d89 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -225,7 +225,7 @@ struct TranslationBlock { struct TranslationBlock *jmp_first; }; -#include "exec/spinlock.h" +#include "qemu/thread.h" typedef struct TBContext TBContext; @@ -235,7 +235,7 @@ struct TBContext { TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; int nb_tbs; /* any access to the tbs or the page table must use this lock */ - spinlock_t tb_lock; + QemuMutex tb_lock; /* statistics */ int tb_flush_count; diff --git a/linux-user/main.c b/linux-user/main.c index 2c9658e90d..1cecc4c850 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -105,7 +105,7 @@ static int pending_cpus; /* Make sure everything is in a consistent state for calling fork(). */ void fork_start(void) { - pthread_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); pthread_mutex_lock(&exclusive_lock); mmap_fork_start(); } @@ -127,11 +127,11 @@ void fork_end(int child) pthread_mutex_init(&cpu_list_mutex, NULL); pthread_cond_init(&exclusive_cond, NULL); pthread_cond_init(&exclusive_resume, NULL); - pthread_mutex_init(&tcg_ctx.tb_ctx.tb_lock, NULL); + qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); gdbserver_fork(thread_cpu); } else { pthread_mutex_unlock(&exclusive_lock); - pthread_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); } } diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 74b674d524..0337838d55 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -1318,6 +1318,9 @@ static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env) void cpu_set_mxcsr(CPUX86State *env, uint32_t val); void cpu_set_fpuc(CPUX86State *env, uint16_t val); +/* mem_helper.c */ +void helper_lock_init(void); + /* svm_helper.c */ void cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type, uint64_t param); diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c index 1aec8a5f19..8bf0da2433 100644 --- a/target-i386/mem_helper.c +++ b/target-i386/mem_helper.c @@ -23,18 +23,37 @@ /* broken thread support */ -static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED; +#if defined(CONFIG_USER_ONLY) +QemuMutex global_cpu_lock; void helper_lock(void) { - spin_lock(&global_cpu_lock); + qemu_mutex_lock(&global_cpu_lock); } void helper_unlock(void) { - spin_unlock(&global_cpu_lock); + qemu_mutex_unlock(&global_cpu_lock); } +void helper_lock_init(void) +{ + qemu_mutex_init(&global_cpu_lock); +} +#else +void helper_lock(void) +{ +} + +void helper_unlock(void) +{ +} + +void helper_lock_init(void) +{ +} +#endif + void helper_cmpxchg8b(CPUX86State *env, target_ulong a0) { uint64_t d; diff --git a/target-i386/translate.c b/target-i386/translate.c index 82e2245bfd..443bf604d8 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -7899,6 +7899,8 @@ void optimize_flags_init(void) offsetof(CPUX86State, regs[i]), reg_names[i]); } + + helper_lock_init(); } /* generate intermediate code in gen_opc_buf and gen_opparam_buf for diff --git a/tcg/tcg.h b/tcg/tcg.h index f437824ba9..aa295b9554 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -595,6 +595,10 @@ void *tcg_malloc_internal(TCGContext *s, int size); void tcg_pool_reset(TCGContext *s); void tcg_pool_delete(TCGContext *s); +void tb_lock(void); +void tb_unlock(void); +void tb_lock_reset(void); + static inline void *tcg_malloc(int size) { TCGContext *s = &tcg_ctx; diff --git a/translate-all.c b/translate-all.c index a75aeed538..37bb56ca42 100644 --- a/translate-all.c +++ b/translate-all.c @@ -128,6 +128,39 @@ static void *l1_map[V_L1_SIZE]; /* code generation context */ TCGContext tcg_ctx; +/* translation block context */ +#ifdef CONFIG_USER_ONLY +__thread int have_tb_lock; +#endif + +void tb_lock(void) +{ +#ifdef CONFIG_USER_ONLY + assert(!have_tb_lock); + qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); + have_tb_lock++; +#endif +} + +void tb_unlock(void) +{ +#ifdef CONFIG_USER_ONLY + assert(have_tb_lock); + have_tb_lock--; + qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); +#endif +} + +void tb_lock_reset(void) +{ +#ifdef CONFIG_USER_ONLY + if (have_tb_lock) { + qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + have_tb_lock = 0; + } +#endif +} + static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2); static TranslationBlock *tb_find_pc(uintptr_t tc_ptr); @@ -675,6 +708,7 @@ static inline void code_gen_alloc(size_t tb_size) CODE_GEN_AVG_BLOCK_SIZE; tcg_ctx.tb_ctx.tbs = g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock)); + qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); } /* Must be called before using the QEMU cpus. 'tb_size' is the size -- cgit 1.4.1 From 8fd19e6cfd5b6cdf028c6ac2ff4157ed831ea3a6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 11 Aug 2015 10:57:52 +0200 Subject: exec: make mmap_lock/mmap_unlock globally available There is some iffy lock hierarchy going on in translate-all.c. To fix it, we need to take the mmap_lock in cpu-exec.c. Make the functions globally available. Reviewed-by: Peter Maydell Signed-off-by: Paolo Bonzini --- bsd-user/qemu.h | 2 -- include/exec/exec-all.h | 6 ++++++ linux-user/qemu.h | 2 -- translate-all.c | 5 ----- 4 files changed, 6 insertions(+), 9 deletions(-) (limited to 'include/exec/exec-all.h') diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h index 21cc6023ee..735cb4042a 100644 --- a/bsd-user/qemu.h +++ b/bsd-user/qemu.h @@ -211,8 +211,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, abi_ulong new_addr); int target_msync(abi_ulong start, abi_ulong len, int flags); extern unsigned long last_brk; -void mmap_lock(void); -void mmap_unlock(void); void cpu_list_lock(void); void cpu_list_unlock(void); #if defined(CONFIG_USE_NPTL) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 59544d4d89..05a5d5c53b 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -374,11 +374,17 @@ void tlb_fill(CPUState *cpu, target_ulong addr, int is_write, int mmu_idx, #endif #if defined(CONFIG_USER_ONLY) +void mmap_lock(void); +void mmap_unlock(void); + static inline tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) { return addr; } #else +static inline void mmap_lock(void) {} +static inline void mmap_unlock(void) {} + /* cputlb.c */ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr); #endif diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 8012cc2f5b..e8606b290c 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -261,8 +261,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, int target_msync(abi_ulong start, abi_ulong len, int flags); extern unsigned long last_brk; extern abi_ulong mmap_next_start; -void mmap_lock(void); -void mmap_unlock(void); abi_ulong mmap_find_vma(abi_ulong, abi_ulong); void cpu_list_lock(void); void cpu_list_unlock(void); diff --git a/translate-all.c b/translate-all.c index a12139ba01..e3c5c5e20a 100644 --- a/translate-all.c +++ b/translate-all.c @@ -466,11 +466,6 @@ static inline PageDesc *page_find(tb_page_addr_t index) return page_find_alloc(index, 0); } -#if !defined(CONFIG_USER_ONLY) -#define mmap_lock() do { } while (0) -#define mmap_unlock() do { } while (0) -#endif - #if defined(CONFIG_USER_ONLY) /* Currently it is not recommended to allocate big chunks of data in user mode. It will change when a dedicated libc will be used. */ -- cgit 1.4.1