diff options
Diffstat (limited to 'softmmu/cpus.c')
| -rw-r--r-- | softmmu/cpus.c | 1705 |
1 files changed, 128 insertions, 1577 deletions
diff --git a/softmmu/cpus.c b/softmmu/cpus.c index ac8940d52e..9e33416b4d 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -24,45 +24,25 @@ #include "qemu/osdep.h" #include "qemu-common.h" -#include "qemu/config-file.h" -#include "qemu/cutils.h" -#include "migration/vmstate.h" #include "monitor/monitor.h" #include "qapi/error.h" #include "qapi/qapi-commands-misc.h" #include "qapi/qapi-events-run-state.h" #include "qapi/qmp/qerror.h" -#include "qemu/error-report.h" -#include "qemu/qemu-print.h" -#include "sysemu/tcg.h" -#include "sysemu/block-backend.h" #include "exec/gdbstub.h" -#include "sysemu/dma.h" #include "sysemu/hw_accel.h" -#include "sysemu/kvm.h" -#include "sysemu/hax.h" -#include "sysemu/hvf.h" -#include "sysemu/whpx.h" #include "exec/exec-all.h" - #include "qemu/thread.h" #include "qemu/plugin.h" #include "sysemu/cpus.h" -#include "sysemu/qtest.h" -#include "qemu/main-loop.h" -#include "qemu/option.h" -#include "qemu/bitmap.h" -#include "qemu/seqlock.h" #include "qemu/guest-random.h" -#include "tcg/tcg.h" #include "hw/nmi.h" #include "sysemu/replay.h" #include "sysemu/runstate.h" +#include "sysemu/cpu-timers.h" #include "hw/boards.h" #include "hw/hw.h" -#include "sysemu/cpu-throttle.h" - #ifdef CONFIG_LINUX #include <sys/prctl.h> @@ -83,15 +63,12 @@ static QemuMutex qemu_global_mutex; -int64_t max_delay; -int64_t max_advance; - bool cpu_is_stopped(CPUState *cpu) { return cpu->stopped || !runstate_is_running(); } -static inline bool cpu_work_list_empty(CPUState *cpu) +bool cpu_work_list_empty(CPUState *cpu) { bool ret; @@ -101,7 +78,7 @@ static inline bool cpu_work_list_empty(CPUState *cpu) return ret; } -static bool cpu_thread_is_idle(CPUState *cpu) +bool cpu_thread_is_idle(CPUState *cpu) { if (cpu->stop || !cpu_work_list_empty(cpu)) { return false; @@ -116,7 +93,7 @@ static bool cpu_thread_is_idle(CPUState *cpu) return true; } -static bool all_cpu_threads_idle(void) +bool all_cpu_threads_idle(void) { CPUState *cpu; @@ -129,837 +106,140 @@ static bool all_cpu_threads_idle(void) } /***********************************************************/ -/* guest cycle counter */ - -/* Protected by TimersState seqlock */ - -static bool icount_sleep = true; -/* Arbitrarily pick 1MIPS as the minimum allowable speed. */ -#define MAX_ICOUNT_SHIFT 10 - -typedef struct TimersState { - /* Protected by BQL. */ - int64_t cpu_ticks_prev; - int64_t cpu_ticks_offset; - - /* Protect fields that can be respectively read outside the - * BQL, and written from multiple threads. - */ - QemuSeqLock vm_clock_seqlock; - QemuSpin vm_clock_lock; - - int16_t cpu_ticks_enabled; - - /* Conversion factor from emulated instructions to virtual clock ticks. */ - int16_t icount_time_shift; - - /* Compensate for varying guest execution speed. */ - int64_t qemu_icount_bias; - - int64_t vm_clock_warp_start; - int64_t cpu_clock_offset; - - /* Only written by TCG thread */ - int64_t qemu_icount; - - /* for adjusting icount */ - QEMUTimer *icount_rt_timer; - QEMUTimer *icount_vm_timer; - QEMUTimer *icount_warp_timer; -} TimersState; - -static TimersState timers_state; -bool mttcg_enabled; - - -/* The current number of executed instructions is based on what we - * originally budgeted minus the current state of the decrementing - * icount counters in extra/u16.low. - */ -static int64_t cpu_get_icount_executed(CPUState *cpu) -{ - return (cpu->icount_budget - - (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra)); -} - -/* - * Update the global shared timer_state.qemu_icount to take into - * account executed instructions. This is done by the TCG vCPU - * thread so the main-loop can see time has moved forward. - */ -static void cpu_update_icount_locked(CPUState *cpu) +void hw_error(const char *fmt, ...) { - int64_t executed = cpu_get_icount_executed(cpu); - cpu->icount_budget -= executed; + va_list ap; + CPUState *cpu; - qatomic_set_i64(&timers_state.qemu_icount, - timers_state.qemu_icount + executed); + va_start(ap, fmt); + fprintf(stderr, "qemu: hardware error: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + CPU_FOREACH(cpu) { + fprintf(stderr, "CPU #%d:\n", cpu->cpu_index); + cpu_dump_state(cpu, stderr, CPU_DUMP_FPU); + } + va_end(ap); + abort(); } /* - * Update the global shared timer_state.qemu_icount to take into - * account executed instructions. This is done by the TCG vCPU - * thread so the main-loop can see time has moved forward. + * The chosen accelerator is supposed to register this. */ -void cpu_update_icount(CPUState *cpu) -{ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - cpu_update_icount_locked(cpu); - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); -} +static const CpusAccel *cpus_accel; -static int64_t cpu_get_icount_raw_locked(void) -{ - CPUState *cpu = current_cpu; - - if (cpu && cpu->running) { - if (!cpu->can_do_io) { - error_report("Bad icount read"); - exit(1); - } - /* Take into account what has run */ - cpu_update_icount_locked(cpu); - } - /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ - return qatomic_read_i64(&timers_state.qemu_icount); -} - -static int64_t cpu_get_icount_locked(void) -{ - int64_t icount = cpu_get_icount_raw_locked(); - return qatomic_read_i64(&timers_state.qemu_icount_bias) + - cpu_icount_to_ns(icount); -} - -int64_t cpu_get_icount_raw(void) -{ - int64_t icount; - unsigned start; - - do { - start = seqlock_read_begin(&timers_state.vm_clock_seqlock); - icount = cpu_get_icount_raw_locked(); - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); - - return icount; -} - -/* Return the virtual CPU time, based on the instruction counter. */ -int64_t cpu_get_icount(void) -{ - int64_t icount; - unsigned start; - - do { - start = seqlock_read_begin(&timers_state.vm_clock_seqlock); - icount = cpu_get_icount_locked(); - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); - - return icount; -} - -int64_t cpu_icount_to_ns(int64_t icount) -{ - return icount << qatomic_read(&timers_state.icount_time_shift); -} - -static int64_t cpu_get_ticks_locked(void) +void cpu_synchronize_all_states(void) { - int64_t ticks = timers_state.cpu_ticks_offset; - if (timers_state.cpu_ticks_enabled) { - ticks += cpu_get_host_ticks(); - } + CPUState *cpu; - if (timers_state.cpu_ticks_prev > ticks) { - /* Non increasing ticks may happen if the host uses software suspend. */ - timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks; - ticks = timers_state.cpu_ticks_prev; + CPU_FOREACH(cpu) { + cpu_synchronize_state(cpu); } - - timers_state.cpu_ticks_prev = ticks; - return ticks; } -/* return the time elapsed in VM between vm_start and vm_stop. Unless - * icount is active, cpu_get_ticks() uses units of the host CPU cycle - * counter. - */ -int64_t cpu_get_ticks(void) +void cpu_synchronize_all_post_reset(void) { - int64_t ticks; + CPUState *cpu; - if (use_icount) { - return cpu_get_icount(); + CPU_FOREACH(cpu) { + cpu_synchronize_post_reset(cpu); } - - qemu_spin_lock(&timers_state.vm_clock_lock); - ticks = cpu_get_ticks_locked(); - qemu_spin_unlock(&timers_state.vm_clock_lock); - return ticks; } -static int64_t cpu_get_clock_locked(void) +void cpu_synchronize_all_post_init(void) { - int64_t time; + CPUState *cpu; - time = timers_state.cpu_clock_offset; - if (timers_state.cpu_ticks_enabled) { - time += get_clock(); + CPU_FOREACH(cpu) { + cpu_synchronize_post_init(cpu); } - - return time; } -/* Return the monotonic time elapsed in VM, i.e., - * the time between vm_start and vm_stop - */ -int64_t cpu_get_clock(void) +void cpu_synchronize_all_pre_loadvm(void) { - int64_t ti; - unsigned start; - - do { - start = seqlock_read_begin(&timers_state.vm_clock_seqlock); - ti = cpu_get_clock_locked(); - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); - - return ti; -} + CPUState *cpu; -/* enable cpu_get_ticks() - * Caller must hold BQL which serves as mutex for vm_clock_seqlock. - */ -void cpu_enable_ticks(void) -{ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - if (!timers_state.cpu_ticks_enabled) { - timers_state.cpu_ticks_offset -= cpu_get_host_ticks(); - timers_state.cpu_clock_offset -= get_clock(); - timers_state.cpu_ticks_enabled = 1; + CPU_FOREACH(cpu) { + cpu_synchronize_pre_loadvm(cpu); } - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); } -/* disable cpu_get_ticks() : the clock is stopped. You must not call - * cpu_get_ticks() after that. - * Caller must hold BQL which serves as mutex for vm_clock_seqlock. - */ -void cpu_disable_ticks(void) +void cpu_synchronize_state(CPUState *cpu) { - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - if (timers_state.cpu_ticks_enabled) { - timers_state.cpu_ticks_offset += cpu_get_host_ticks(); - timers_state.cpu_clock_offset = cpu_get_clock_locked(); - timers_state.cpu_ticks_enabled = 0; + if (cpus_accel->synchronize_state) { + cpus_accel->synchronize_state(cpu); } - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); } -/* Correlation between real and virtual time is always going to be - fairly approximate, so ignore small variation. - When the guest is idle real and virtual time will be aligned in - the IO wait loop. */ -#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) - -static void icount_adjust(void) +void cpu_synchronize_post_reset(CPUState *cpu) { - int64_t cur_time; - int64_t cur_icount; - int64_t delta; - - /* Protected by TimersState mutex. */ - static int64_t last_delta; - - /* If the VM is not running, then do nothing. */ - if (!runstate_is_running()) { - return; + if (cpus_accel->synchronize_post_reset) { + cpus_accel->synchronize_post_reset(cpu); } - - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, - cpu_get_clock_locked()); - cur_icount = cpu_get_icount_locked(); - - delta = cur_icount - cur_time; - /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ - if (delta > 0 - && last_delta + ICOUNT_WOBBLE < delta * 2 - && timers_state.icount_time_shift > 0) { - /* The guest is getting too far ahead. Slow time down. */ - qatomic_set(&timers_state.icount_time_shift, - timers_state.icount_time_shift - 1); - } - if (delta < 0 - && last_delta - ICOUNT_WOBBLE > delta * 2 - && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { - /* The guest is getting too far behind. Speed time up. */ - qatomic_set(&timers_state.icount_time_shift, - timers_state.icount_time_shift + 1); - } - last_delta = delta; - qatomic_set_i64(&timers_state.qemu_icount_bias, - cur_icount - (timers_state.qemu_icount - << timers_state.icount_time_shift)); - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); -} - -static void icount_adjust_rt(void *opaque) -{ - timer_mod(timers_state.icount_rt_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); - icount_adjust(); } -static void icount_adjust_vm(void *opaque) +void cpu_synchronize_post_init(CPUState *cpu) { - timer_mod(timers_state.icount_vm_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - NANOSECONDS_PER_SECOND / 10); - icount_adjust(); -} - -static int64_t qemu_icount_round(int64_t count) -{ - int shift = qatomic_read(&timers_state.icount_time_shift); - return (count + (1 << shift) - 1) >> shift; -} - -static void icount_warp_rt(void) -{ - unsigned seq; - int64_t warp_start; - - /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start - * changes from -1 to another value, so the race here is okay. - */ - do { - seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); - warp_start = timers_state.vm_clock_warp_start; - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); - - if (warp_start == -1) { - return; + if (cpus_accel->synchronize_post_init) { + cpus_accel->synchronize_post_init(cpu); } - - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - if (runstate_is_running()) { - int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, - cpu_get_clock_locked()); - int64_t warp_delta; - - warp_delta = clock - timers_state.vm_clock_warp_start; - if (use_icount == 2) { - /* - * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too - * far ahead of real time. - */ - int64_t cur_icount = cpu_get_icount_locked(); - int64_t delta = clock - cur_icount; - warp_delta = MIN(warp_delta, delta); - } - qatomic_set_i64(&timers_state.qemu_icount_bias, - timers_state.qemu_icount_bias + warp_delta); - } - timers_state.vm_clock_warp_start = -1; - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - - if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); - } -} - -static void icount_timer_cb(void *opaque) -{ - /* No need for a checkpoint because the timer already synchronizes - * with CHECKPOINT_CLOCK_VIRTUAL_RT. - */ - icount_warp_rt(); } -void qtest_clock_warp(int64_t dest) +void cpu_synchronize_pre_loadvm(CPUState *cpu) { - int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - AioContext *aio_context; - assert(qtest_enabled()); - aio_context = qemu_get_aio_context(); - while (clock < dest) { - int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - QEMU_TIMER_ATTR_ALL); - int64_t warp = qemu_soonest_timeout(dest - clock, deadline); - - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - qatomic_set_i64(&timers_state.qemu_icount_bias, - timers_state.qemu_icount_bias + warp); - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - - qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); - timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]); - clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - } - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); -} - -void qemu_start_warp_timer(void) -{ - int64_t clock; - int64_t deadline; - - if (!use_icount) { - return; - } - - /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers - * do not fire, so computing the deadline does not make sense. - */ - if (!runstate_is_running()) { - return; - } - - if (replay_mode != REPLAY_MODE_PLAY) { - if (!all_cpu_threads_idle()) { - return; - } - - if (qtest_enabled()) { - /* When testing, qtest commands advance icount. */ - return; - } - - replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); - } else { - /* warp clock deterministically in record/replay mode */ - if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { - /* vCPU is sleeping and warp can't be started. - It is probably a race condition: notification sent - to vCPU was processed in advance and vCPU went to sleep. - Therefore we have to wake it up for doing someting. */ - if (replay_has_checkpoint()) { - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); - } - return; - } - } - - /* We want to use the earliest deadline from ALL vm_clocks */ - clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); - deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - ~QEMU_TIMER_ATTR_EXTERNAL); - if (deadline < 0) { - static bool notified; - if (!icount_sleep && !notified) { - warn_report("icount sleep disabled and no active timers"); - notified = true; - } - return; - } - - if (deadline > 0) { - /* - * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to - * sleep. Otherwise, the CPU might be waiting for a future timer - * interrupt to wake it up, but the interrupt never comes because - * the vCPU isn't running any insns and thus doesn't advance the - * QEMU_CLOCK_VIRTUAL. - */ - if (!icount_sleep) { - /* - * We never let VCPUs sleep in no sleep icount mode. - * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance - * to the next QEMU_CLOCK_VIRTUAL event and notify it. - * It is useful when we want a deterministic execution time, - * isolated from host latencies. - */ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - qatomic_set_i64(&timers_state.qemu_icount_bias, - timers_state.qemu_icount_bias + deadline); - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); - } else { - /* - * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some - * "real" time, (related to the time left until the next event) has - * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. - * This avoids that the warps are visible externally; for example, - * you will not be sending network packets continuously instead of - * every 100ms. - */ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - if (timers_state.vm_clock_warp_start == -1 - || timers_state.vm_clock_warp_start > clock) { - timers_state.vm_clock_warp_start = clock; - } - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - timer_mod_anticipate(timers_state.icount_warp_timer, - clock + deadline); - } - } else if (deadline == 0) { - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); + if (cpus_accel->synchronize_pre_loadvm) { + cpus_accel->synchronize_pre_loadvm(cpu); } } -static void qemu_account_warp_timer(void) +int64_t cpus_get_virtual_clock(void) { - if (!use_icount || !icount_sleep) { - return; - } - - /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers - * do not fire, so computing the deadline does not make sense. + /* + * XXX + * + * need to check that cpus_accel is not NULL, because qcow2 calls + * qemu_get_clock_ns(CLOCK_VIRTUAL) without any accel initialized and + * with ticks disabled in some io-tests: + * 030 040 041 060 099 120 127 140 156 161 172 181 191 192 195 203 229 249 256 267 + * + * is this expected? + * + * XXX */ - if (!runstate_is_running()) { - return; - } - - /* warp clock deterministically in record/replay mode */ - if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { - return; + if (cpus_accel && cpus_accel->get_virtual_clock) { + return cpus_accel->get_virtual_clock(); } - - timer_del(timers_state.icount_warp_timer); - icount_warp_rt(); -} - -static bool icount_state_needed(void *opaque) -{ - return use_icount; -} - -static bool warp_timer_state_needed(void *opaque) -{ - TimersState *s = opaque; - return s->icount_warp_timer != NULL; -} - -static bool adjust_timers_state_needed(void *opaque) -{ - TimersState *s = opaque; - return s->icount_rt_timer != NULL; -} - -static bool shift_state_needed(void *opaque) -{ - return use_icount == 2; + return cpu_get_clock(); } /* - * Subsection for warp timer migration is optional, because may not be created - */ -static const VMStateDescription icount_vmstate_warp_timer = { - .name = "timer/icount/warp_timer", - .version_id = 1, - .minimum_version_id = 1, - .needed = warp_timer_state_needed, - .fields = (VMStateField[]) { - VMSTATE_INT64(vm_clock_warp_start, TimersState), - VMSTATE_TIMER_PTR(icount_warp_timer, TimersState), - VMSTATE_END_OF_LIST() - } -}; - -static const VMStateDescription icount_vmstate_adjust_timers = { - .name = "timer/icount/timers", - .version_id = 1, - .minimum_version_id = 1, - .needed = adjust_timers_state_needed, - .fields = (VMStateField[]) { - VMSTATE_TIMER_PTR(icount_rt_timer, TimersState), - VMSTATE_TIMER_PTR(icount_vm_timer, TimersState), - VMSTATE_END_OF_LIST() - } -}; - -static const VMStateDescription icount_vmstate_shift = { - .name = "timer/icount/shift", - .version_id = 1, - .minimum_version_id = 1, - .needed = shift_state_needed, - .fields = (VMStateField[]) { - VMSTATE_INT16(icount_time_shift, TimersState), - VMSTATE_END_OF_LIST() - } -}; - -/* - * This is a subsection for icount migration. - */ -static const VMStateDescription icount_vmstate_timers = { - .name = "timer/icount", - .version_id = 1, - .minimum_version_id = 1, - .needed = icount_state_needed, - .fields = (VMStateField[]) { - VMSTATE_INT64(qemu_icount_bias, TimersState), - VMSTATE_INT64(qemu_icount, TimersState), - VMSTATE_END_OF_LIST() - }, - .subsections = (const VMStateDescription*[]) { - &icount_vmstate_warp_timer, - &icount_vmstate_adjust_timers, - &icount_vmstate_shift, - NULL - } -}; - -static const VMStateDescription vmstate_timers = { - .name = "timer", - .version_id = 2, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_INT64(cpu_ticks_offset, TimersState), - VMSTATE_UNUSED(8), - VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2), - VMSTATE_END_OF_LIST() - }, - .subsections = (const VMStateDescription*[]) { - &icount_vmstate_timers, - NULL - } -}; - -void cpu_ticks_init(void) -{ - seqlock_init(&timers_state.vm_clock_seqlock); - qemu_spin_init(&timers_state.vm_clock_lock); - vmstate_register(NULL, 0, &vmstate_timers, &timers_state); - cpu_throttle_init(); -} - -void configure_icount(QemuOpts *opts, Error **errp) -{ - const char *option = qemu_opt_get(opts, "shift"); - bool sleep = qemu_opt_get_bool(opts, "sleep", true); - bool align = qemu_opt_get_bool(opts, "align", false); - long time_shift = -1; - - if (!option) { - if (qemu_opt_get(opts, "align") != NULL) { - error_setg(errp, "Please specify shift option when using align"); - } - return; - } - - if (align && !sleep) { - error_setg(errp, "align=on and sleep=off are incompatible"); - return; - } - - if (strcmp(option, "auto") != 0) { - if (qemu_strtol(option, NULL, 0, &time_shift) < 0 - || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { - error_setg(errp, "icount: Invalid shift value"); - return; - } - } else if (icount_align_option) { - error_setg(errp, "shift=auto and align=on are incompatible"); - return; - } else if (!icount_sleep) { - error_setg(errp, "shift=auto and sleep=off are incompatible"); - return; - } - - icount_sleep = sleep; - if (icount_sleep) { - timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, - icount_timer_cb, NULL); - } - - icount_align_option = align; - - if (time_shift >= 0) { - timers_state.icount_time_shift = time_shift; - use_icount = 1; - return; - } - - use_icount = 2; - - /* 125MIPS seems a reasonable initial guess at the guest speed. - It will be corrected fairly quickly anyway. */ - timers_state.icount_time_shift = 3; - - /* Have both realtime and virtual time triggers for speed adjustment. - The realtime trigger catches emulated time passing too slowly, - the virtual time trigger catches emulated time passing too fast. - Realtime triggers occur even when idle, so use them less frequently - than VM triggers. */ - timers_state.vm_clock_warp_start = -1; - timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, - icount_adjust_rt, NULL); - timer_mod(timers_state.icount_rt_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); - timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, - icount_adjust_vm, NULL); - timer_mod(timers_state.icount_vm_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - NANOSECONDS_PER_SECOND / 10); -} - -/***********************************************************/ -/* TCG vCPU kick timer - * - * The kick timer is responsible for moving single threaded vCPU - * emulation on to the next vCPU. If more than one vCPU is running a - * timer event with force a cpu->exit so the next vCPU can get - * scheduled. - * - * The timer is removed if all vCPUs are idle and restarted again once - * idleness is complete. + * return the time elapsed in VM between vm_start and vm_stop. Unless + * icount is active, cpus_get_elapsed_ticks() uses units of the host CPU cycle + * counter. */ - -static QEMUTimer *tcg_kick_vcpu_timer; -static CPUState *tcg_current_rr_cpu; - -#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10) - -static inline int64_t qemu_tcg_next_kick(void) -{ - return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD; -} - -/* Kick the currently round-robin scheduled vCPU to next */ -static void qemu_cpu_kick_rr_next_cpu(void) -{ - CPUState *cpu; - do { - cpu = qatomic_mb_read(&tcg_current_rr_cpu); - if (cpu) { - cpu_exit(cpu); - } - } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu)); -} - -/* Kick all RR vCPUs */ -static void qemu_cpu_kick_rr_cpus(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_exit(cpu); - }; -} - -static void do_nothing(CPUState *cpu, run_on_cpu_data unused) -{ -} - -void qemu_timer_notify_cb(void *opaque, QEMUClockType type) -{ - if (!use_icount || type != QEMU_CLOCK_VIRTUAL) { - qemu_notify_event(); - return; - } - - if (qemu_in_vcpu_thread()) { - /* A CPU is currently running; kick it back out to the - * tcg_cpu_exec() loop so it will recalculate its - * icount deadline immediately. - */ - qemu_cpu_kick(current_cpu); - } else if (first_cpu) { - /* qemu_cpu_kick is not enough to kick a halted CPU out of - * qemu_tcg_wait_io_event. async_run_on_cpu, instead, - * causes cpu_thread_is_idle to return false. This way, - * handle_icount_deadline can run. - * If we have no CPUs at all for some reason, we don't - * need to do anything. - */ - async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL); - } -} - -static void kick_tcg_thread(void *opaque) -{ - timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); - qemu_cpu_kick_rr_next_cpu(); -} - -static void start_tcg_kick_timer(void) +int64_t cpus_get_elapsed_ticks(void) { - assert(!mttcg_enabled); - if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) { - tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, - kick_tcg_thread, NULL); - } - if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) { - timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); + if (cpus_accel->get_elapsed_ticks) { + return cpus_accel->get_elapsed_ticks(); } + return cpu_get_ticks(); } -static void stop_tcg_kick_timer(void) +static void generic_handle_interrupt(CPUState *cpu, int mask) { - assert(!mttcg_enabled); - if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) { - timer_del(tcg_kick_vcpu_timer); - } -} + cpu->interrupt_request |= mask; -/***********************************************************/ -void hw_error(const char *fmt, ...) -{ - va_list ap; - CPUState *cpu; - - va_start(ap, fmt); - fprintf(stderr, "qemu: hardware error: "); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - CPU_FOREACH(cpu) { - fprintf(stderr, "CPU #%d:\n", cpu->cpu_index); - cpu_dump_state(cpu, stderr, CPU_DUMP_FPU); - } - va_end(ap); - abort(); -} - -void cpu_synchronize_all_states(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_state(cpu); - } -} - -void cpu_synchronize_all_post_reset(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_post_reset(cpu); - } -} - -void cpu_synchronize_all_post_init(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_post_init(cpu); + if (!qemu_cpu_is_self(cpu)) { + qemu_cpu_kick(cpu); } } -void cpu_synchronize_all_pre_loadvm(void) +void cpu_interrupt(CPUState *cpu, int mask) { - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_pre_loadvm(cpu); + if (cpus_accel->handle_interrupt) { + cpus_accel->handle_interrupt(cpu, mask); + } else { + generic_handle_interrupt(cpu, mask); } } @@ -991,7 +271,7 @@ int vm_shutdown(void) return do_vm_stop(RUN_STATE_SHUTDOWN, false); } -static bool cpu_can_run(CPUState *cpu) +bool cpu_can_run(CPUState *cpu) { if (cpu->stop) { return false; @@ -1002,11 +282,24 @@ static bool cpu_can_run(CPUState *cpu) return true; } -static void cpu_handle_guest_debug(CPUState *cpu) +void cpu_handle_guest_debug(CPUState *cpu) { - gdb_set_stop_cpu(cpu); - qemu_system_debug_request(); - cpu->stopped = true; + if (replay_running_debug()) { + if (!cpu->singlestep_enabled) { + /* + * Report about the breakpoint and + * make a single step to skip it + */ + replay_breakpoint(); + cpu_single_step(cpu, SSTEP_ENABLE); + } else { + cpu_single_step(cpu, 0); + } + } else { + gdb_set_stop_cpu(cpu); + qemu_system_debug_request(); + cpu->stopped = true; + } } #ifdef CONFIG_LINUX @@ -1085,18 +378,6 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data) do_run_on_cpu(cpu, func, data, &qemu_global_mutex); } -static void qemu_kvm_destroy_vcpu(CPUState *cpu) -{ - if (kvm_destroy_vcpu(cpu) < 0) { - error_report("kvm_destroy_vcpu failed"); - exit(EXIT_FAILURE); - } -} - -static void qemu_tcg_destroy_vcpu(CPUState *cpu) -{ -} - static void qemu_cpu_stop(CPUState *cpu, bool exit) { g_assert(qemu_cpu_is_self(cpu)); @@ -1108,7 +389,7 @@ static void qemu_cpu_stop(CPUState *cpu, bool exit) qemu_cond_broadcast(&qemu_pause_cond); } -static void qemu_wait_io_event_common(CPUState *cpu) +void qemu_wait_io_event_common(CPUState *cpu) { qatomic_mb_set(&cpu->thread_kicked, false); if (cpu->stop) { @@ -1117,23 +398,7 @@ static void qemu_wait_io_event_common(CPUState *cpu) process_queued_cpu_work(cpu); } -static void qemu_tcg_rr_wait_io_event(void) -{ - CPUState *cpu; - - while (all_cpu_threads_idle()) { - stop_tcg_kick_timer(); - qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex); - } - - start_tcg_kick_timer(); - - CPU_FOREACH(cpu) { - qemu_wait_io_event_common(cpu); - } -} - -static void qemu_wait_io_event(CPUState *cpu) +void qemu_wait_io_event(CPUState *cpu) { bool slept = false; @@ -1149,557 +414,15 @@ static void qemu_wait_io_event(CPUState *cpu) } #ifdef _WIN32 - /* Eat dummy APC queued by qemu_cpu_kick_thread. */ - if (!tcg_enabled()) { + /* Eat dummy APC queued by cpus_kick_thread. */ + if (hax_enabled()) { SleepEx(0, TRUE); } #endif qemu_wait_io_event_common(cpu); } -static void *qemu_kvm_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - int r; - - rcu_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - cpu->thread_id = qemu_get_thread_id(); - cpu->can_do_io = 1; - current_cpu = cpu; - - r = kvm_init_vcpu(cpu); - if (r < 0) { - error_report("kvm_init_vcpu failed: %s", strerror(-r)); - exit(1); - } - - kvm_init_cpu_signals(cpu); - - /* signal CPU creation */ - cpu->created = true; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - if (cpu_can_run(cpu)) { - r = kvm_cpu_exec(cpu); - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - } - } - qemu_wait_io_event(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - - qemu_kvm_destroy_vcpu(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -} - -static void *qemu_dummy_cpu_thread_fn(void *arg) -{ -#ifdef _WIN32 - error_report("qtest is not supported under Windows"); - exit(1); -#else - CPUState *cpu = arg; - sigset_t waitset; - int r; - - rcu_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - cpu->thread_id = qemu_get_thread_id(); - cpu->can_do_io = 1; - current_cpu = cpu; - - sigemptyset(&waitset); - sigaddset(&waitset, SIG_IPI); - - /* signal CPU creation */ - cpu->created = true; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - qemu_mutex_unlock_iothread(); - do { - int sig; - r = sigwait(&waitset, &sig); - } while (r == -1 && (errno == EAGAIN || errno == EINTR)); - if (r == -1) { - perror("sigwait"); - exit(1); - } - qemu_mutex_lock_iothread(); - qemu_wait_io_event(cpu); - } while (!cpu->unplug); - - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -#endif -} - -static int64_t tcg_get_icount_limit(void) -{ - int64_t deadline; - - if (replay_mode != REPLAY_MODE_PLAY) { - /* - * Include all the timers, because they may need an attention. - * Too long CPU execution may create unnecessary delay in UI. - */ - deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - QEMU_TIMER_ATTR_ALL); - /* Check realtime timers, because they help with input processing */ - deadline = qemu_soonest_timeout(deadline, - qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME, - QEMU_TIMER_ATTR_ALL)); - - /* Maintain prior (possibly buggy) behaviour where if no deadline - * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than - * INT32_MAX nanoseconds ahead, we still use INT32_MAX - * nanoseconds. - */ - if ((deadline < 0) || (deadline > INT32_MAX)) { - deadline = INT32_MAX; - } - - return qemu_icount_round(deadline); - } else { - return replay_get_instructions(); - } -} - -static void notify_aio_contexts(void) -{ - /* Wake up other AioContexts. */ - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); - qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); -} - -static void handle_icount_deadline(void) -{ - assert(qemu_in_vcpu_thread()); - if (use_icount) { - int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - QEMU_TIMER_ATTR_ALL); - - if (deadline == 0) { - notify_aio_contexts(); - } - } -} - -static void prepare_icount_for_run(CPUState *cpu) -{ - if (use_icount) { - int insns_left; - - /* These should always be cleared by process_icount_data after - * each vCPU execution. However u16.high can be raised - * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt - */ - g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0); - g_assert(cpu->icount_extra == 0); - - cpu->icount_budget = tcg_get_icount_limit(); - insns_left = MIN(0xffff, cpu->icount_budget); - cpu_neg(cpu)->icount_decr.u16.low = insns_left; - cpu->icount_extra = cpu->icount_budget - insns_left; - - replay_mutex_lock(); - - if (cpu->icount_budget == 0 && replay_has_checkpoint()) { - notify_aio_contexts(); - } - } -} - -static void process_icount_data(CPUState *cpu) -{ - if (use_icount) { - /* Account for executed instructions */ - cpu_update_icount(cpu); - - /* Reset the counters */ - cpu_neg(cpu)->icount_decr.u16.low = 0; - cpu->icount_extra = 0; - cpu->icount_budget = 0; - - replay_account_executed_instructions(); - - replay_mutex_unlock(); - } -} - - -static int tcg_cpu_exec(CPUState *cpu) -{ - int ret; -#ifdef CONFIG_PROFILER - int64_t ti; -#endif - - assert(tcg_enabled()); -#ifdef CONFIG_PROFILER - ti = profile_getclock(); -#endif - cpu_exec_start(cpu); - ret = cpu_exec(cpu); - cpu_exec_end(cpu); -#ifdef CONFIG_PROFILER - qatomic_set(&tcg_ctx->prof.cpu_exec_time, - tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); -#endif - return ret; -} - -/* Destroy any remaining vCPUs which have been unplugged and have - * finished running - */ -static void deal_with_unplugged_cpus(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - if (cpu->unplug && !cpu_can_run(cpu)) { - qemu_tcg_destroy_vcpu(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - break; - } - } -} - -/* Single-threaded TCG - * - * In the single-threaded case each vCPU is simulated in turn. If - * there is more than a single vCPU we create a simple timer to kick - * the vCPU and ensure we don't get stuck in a tight loop in one vCPU. - * This is done explicitly rather than relying on side-effects - * elsewhere. - */ - -static void *qemu_tcg_rr_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - - assert(tcg_enabled()); - rcu_register_thread(); - tcg_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - - cpu->thread_id = qemu_get_thread_id(); - cpu->created = true; - cpu->can_do_io = 1; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - /* wait for initial kick-off after machine start */ - while (first_cpu->stopped) { - qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex); - - /* process any pending work */ - CPU_FOREACH(cpu) { - current_cpu = cpu; - qemu_wait_io_event_common(cpu); - } - } - - start_tcg_kick_timer(); - - cpu = first_cpu; - - /* process any pending work */ - cpu->exit_request = 1; - - while (1) { - qemu_mutex_unlock_iothread(); - replay_mutex_lock(); - qemu_mutex_lock_iothread(); - /* Account partial waits to QEMU_CLOCK_VIRTUAL. */ - qemu_account_warp_timer(); - - /* Run the timers here. This is much more efficient than - * waking up the I/O thread and waiting for completion. - */ - handle_icount_deadline(); - - replay_mutex_unlock(); - - if (!cpu) { - cpu = first_cpu; - } - - while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) { - - qatomic_mb_set(&tcg_current_rr_cpu, cpu); - current_cpu = cpu; - - qemu_clock_enable(QEMU_CLOCK_VIRTUAL, - (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); - - if (cpu_can_run(cpu)) { - int r; - - qemu_mutex_unlock_iothread(); - prepare_icount_for_run(cpu); - - r = tcg_cpu_exec(cpu); - - process_icount_data(cpu); - qemu_mutex_lock_iothread(); - - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - break; - } else if (r == EXCP_ATOMIC) { - qemu_mutex_unlock_iothread(); - cpu_exec_step_atomic(cpu); - qemu_mutex_lock_iothread(); - break; - } - } else if (cpu->stop) { - if (cpu->unplug) { - cpu = CPU_NEXT(cpu); - } - break; - } - - cpu = CPU_NEXT(cpu); - } /* while (cpu && !cpu->exit_request).. */ - - /* Does not need qatomic_mb_set because a spurious wakeup is okay. */ - qatomic_set(&tcg_current_rr_cpu, NULL); - - if (cpu && cpu->exit_request) { - qatomic_mb_set(&cpu->exit_request, 0); - } - - if (use_icount && all_cpu_threads_idle()) { - /* - * When all cpus are sleeping (e.g in WFI), to avoid a deadlock - * in the main_loop, wake it up in order to start the warp timer. - */ - qemu_notify_event(); - } - - qemu_tcg_rr_wait_io_event(); - deal_with_unplugged_cpus(); - } - - rcu_unregister_thread(); - return NULL; -} - -static void *qemu_hax_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - int r; - - rcu_register_thread(); - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - - cpu->thread_id = qemu_get_thread_id(); - cpu->created = true; - current_cpu = cpu; - - hax_init_vcpu(cpu); - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - if (cpu_can_run(cpu)) { - r = hax_smp_cpu_exec(cpu); - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - } - } - - qemu_wait_io_event(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - rcu_unregister_thread(); - return NULL; -} - -/* The HVF-specific vCPU thread function. This one should only run when the host - * CPU supports the VMX "unrestricted guest" feature. */ -static void *qemu_hvf_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - - int r; - - assert(hvf_enabled()); - - rcu_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - - cpu->thread_id = qemu_get_thread_id(); - cpu->can_do_io = 1; - current_cpu = cpu; - - hvf_init_vcpu(cpu); - - /* signal CPU creation */ - cpu->created = true; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - if (cpu_can_run(cpu)) { - r = hvf_vcpu_exec(cpu); - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - } - } - qemu_wait_io_event(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - - hvf_vcpu_destroy(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -} - -static void *qemu_whpx_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - int r; - - rcu_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - cpu->thread_id = qemu_get_thread_id(); - current_cpu = cpu; - - r = whpx_init_vcpu(cpu); - if (r < 0) { - fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r)); - exit(1); - } - - /* signal CPU creation */ - cpu->created = true; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - if (cpu_can_run(cpu)) { - r = whpx_vcpu_exec(cpu); - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - } - } - while (cpu_thread_is_idle(cpu)) { - qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); - } - qemu_wait_io_event_common(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - - whpx_destroy_vcpu(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -} - -#ifdef _WIN32 -static void CALLBACK dummy_apc_func(ULONG_PTR unused) -{ -} -#endif - -/* Multi-threaded TCG - * - * In the multi-threaded case each vCPU has its own thread. The TLS - * variable current_cpu can be used deep in the code to find the - * current CPUState for a given thread. - */ - -static void *qemu_tcg_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - - assert(tcg_enabled()); - g_assert(!use_icount); - - rcu_register_thread(); - tcg_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - - cpu->thread_id = qemu_get_thread_id(); - cpu->created = true; - cpu->can_do_io = 1; - current_cpu = cpu; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - /* process any pending work */ - cpu->exit_request = 1; - - do { - if (cpu_can_run(cpu)) { - int r; - qemu_mutex_unlock_iothread(); - r = tcg_cpu_exec(cpu); - qemu_mutex_lock_iothread(); - switch (r) { - case EXCP_DEBUG: - cpu_handle_guest_debug(cpu); - break; - case EXCP_HALTED: - /* during start-up the vCPU is reset and the thread is - * kicked several times. If we don't ensure we go back - * to sleep in the halted state we won't cleanly - * start-up when the vCPU is enabled. - * - * cpu->halted should ensure we sleep in wait_io_event - */ - g_assert(cpu->halted); - break; - case EXCP_ATOMIC: - qemu_mutex_unlock_iothread(); - cpu_exec_step_atomic(cpu); - qemu_mutex_lock_iothread(); - default: - /* Ignore everything else? */ - break; - } - } - - qatomic_mb_set(&cpu->exit_request, 0); - qemu_wait_io_event(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - - qemu_tcg_destroy_vcpu(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -} - -static void qemu_cpu_kick_thread(CPUState *cpu) +void cpus_kick_thread(CPUState *cpu) { #ifndef _WIN32 int err; @@ -1713,44 +436,23 @@ static void qemu_cpu_kick_thread(CPUState *cpu) fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); exit(1); } -#else /* _WIN32 */ - if (!qemu_cpu_is_self(cpu)) { - if (whpx_enabled()) { - whpx_vcpu_kick(cpu); - } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) { - fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n", - __func__, GetLastError()); - exit(1); - } - } #endif } void qemu_cpu_kick(CPUState *cpu) { qemu_cond_broadcast(cpu->halt_cond); - if (tcg_enabled()) { - if (qemu_tcg_mttcg_enabled()) { - cpu_exit(cpu); - } else { - qemu_cpu_kick_rr_cpus(); - } - } else { - if (hax_enabled()) { - /* - * FIXME: race condition with the exit_request check in - * hax_vcpu_hax_exec - */ - cpu->exit_request = 1; - } - qemu_cpu_kick_thread(cpu); + if (cpus_accel->kick_vcpu_thread) { + cpus_accel->kick_vcpu_thread(cpu); + } else { /* default */ + cpus_kick_thread(cpu); } } void qemu_cpu_kick_self(void) { assert(current_cpu); - qemu_cpu_kick_thread(current_cpu); + cpus_kick_thread(current_cpu); } bool qemu_cpu_is_self(CPUState *cpu) @@ -1800,6 +502,21 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms) qemu_cond_timedwait(cond, &qemu_global_mutex, ms); } +/* signal CPU creation */ +void cpu_thread_signal_created(CPUState *cpu) +{ + cpu->created = true; + qemu_cond_signal(&qemu_cpu_cond); +} + +/* signal CPU destruction */ +void cpu_thread_signal_destroyed(CPUState *cpu) +{ + cpu->created = false; + qemu_cond_signal(&qemu_cpu_cond); +} + + static bool all_vcpus_paused(void) { CPUState *cpu; @@ -1875,149 +592,11 @@ void cpu_remove_sync(CPUState *cpu) qemu_mutex_lock_iothread(); } -/* For temporary buffers for forming a name */ -#define VCPU_THREAD_NAME_SIZE 16 - -static void qemu_tcg_init_vcpu(CPUState *cpu) +void cpus_register_accel(const CpusAccel *ca) { - char thread_name[VCPU_THREAD_NAME_SIZE]; - static QemuCond *single_tcg_halt_cond; - static QemuThread *single_tcg_cpu_thread; - static int tcg_region_inited; - - assert(tcg_enabled()); - /* - * Initialize TCG regions--once. Now is a good time, because: - * (1) TCG's init context, prologue and target globals have been set up. - * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the - * -accel flag is processed, so the check doesn't work then). - */ - if (!tcg_region_inited) { - tcg_region_inited = 1; - tcg_region_init(); - /* - * If MTTCG, and we will create multiple cpus, - * then we will have cpus running in parallel. - */ - if (qemu_tcg_mttcg_enabled()) { - MachineState *ms = MACHINE(qdev_get_machine()); - if (ms->smp.max_cpus > 1) { - parallel_cpus = true; - } - } - } - - if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) { - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - - if (qemu_tcg_mttcg_enabled()) { - /* create a thread per vCPU with TCG (MTTCG) */ - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG", - cpu->cpu_index); - - qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); - - } else { - /* share a single thread for all cpus with TCG */ - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG"); - qemu_thread_create(cpu->thread, thread_name, - qemu_tcg_rr_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); - - single_tcg_halt_cond = cpu->halt_cond; - single_tcg_cpu_thread = cpu->thread; - } -#ifdef _WIN32 - cpu->hThread = qemu_thread_get_handle(cpu->thread); -#endif - } else { - /* For non-MTTCG cases we share the thread */ - cpu->thread = single_tcg_cpu_thread; - cpu->halt_cond = single_tcg_halt_cond; - cpu->thread_id = first_cpu->thread_id; - cpu->can_do_io = 1; - cpu->created = true; - } -} - -static void qemu_hax_start_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); -#ifdef _WIN32 - cpu->hThread = qemu_thread_get_handle(cpu->thread); -#endif -} - -static void qemu_kvm_start_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); -} - -static void qemu_hvf_start_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - /* HVF currently does not support TCG, and only runs in - * unrestricted-guest mode. */ - assert(hvf_enabled()); - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); -} - -static void qemu_whpx_start_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); -#ifdef _WIN32 - cpu->hThread = qemu_thread_get_handle(cpu->thread); -#endif -} - -static void qemu_dummy_start_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu, - QEMU_THREAD_JOINABLE); + assert(ca != NULL); + assert(ca->create_vcpu_thread != NULL); /* mandatory */ + cpus_accel = ca; } void qemu_init_vcpu(CPUState *cpu) @@ -2037,19 +616,9 @@ void qemu_init_vcpu(CPUState *cpu) cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory); } - if (kvm_enabled()) { - qemu_kvm_start_vcpu(cpu); - } else if (hax_enabled()) { - qemu_hax_start_vcpu(cpu); - } else if (hvf_enabled()) { - qemu_hvf_start_vcpu(cpu); - } else if (tcg_enabled()) { - qemu_tcg_init_vcpu(cpu); - } else if (whpx_enabled()) { - qemu_whpx_start_vcpu(cpu); - } else { - qemu_dummy_start_vcpu(cpu); - } + /* accelerators all implement the CpusAccel interface */ + g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL); + cpus_accel->create_vcpu_thread(cpu); while (!cpu->created) { qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); @@ -2227,21 +796,3 @@ void qmp_inject_nmi(Error **errp) nmi_monitor_handle(monitor_get_cpu_index(), errp); } -void dump_drift_info(void) -{ - if (!use_icount) { - return; - } - - qemu_printf("Host - Guest clock %"PRIi64" ms\n", - (cpu_get_clock() - cpu_get_icount())/SCALE_MS); - if (icount_align_option) { - qemu_printf("Max guest delay %"PRIi64" ms\n", - -max_delay / SCALE_MS); - qemu_printf("Max guest advance %"PRIi64" ms\n", - max_advance / SCALE_MS); - } else { - qemu_printf("Max guest delay NA\n"); - qemu_printf("Max guest advance NA\n"); - } -} |