From c2aa5f819900660f936faadfe92fe5d60a562482 Mon Sep 17 00:00:00 2001 From: Sebastian Tanase Date: Fri, 25 Jul 2014 11:56:31 +0200 Subject: cpu-exec: Add sleeping algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The goal is to sleep qemu whenever the guest clock is in advance compared to the host clock (we use the monotonic clocks). The amount of time to sleep is calculated in the execution loop in cpu_exec. At first, we tried to approximate at each for loop the real time elapsed while searching for a TB (generating or retrieving from cache) and executing it. We would then approximate the virtual time corresponding to the number of virtual instructions executed. The difference between these 2 values would allow us to know if the guest is in advance or delayed. However, the function used for measuring the real time (qemu_clock_get_ns(QEMU_CLOCK_REALTIME)) proved to be very expensive. We had an added overhead of 13% of the total run time. Therefore, we modified the algorithm and only take into account the difference between the 2 clocks at the begining of the cpu_exec function. During the for loop we try to reduce the advance of the guest only by computing the virtual time elapsed and sleeping if necessary. The overhead is thus reduced to 3%. Even though this method still has a noticeable overhead, it no longer is a bottleneck in trying to achieve a better guest frequency for which the guest clock is faster than the host one. As for the the alignement of the 2 clocks, with the first algorithm the guest clock was oscillating between -1 and 1ms compared to the host clock. Using the second algorithm we notice that the guest is 5ms behind the host, which is still acceptable for our use case. The tests where conducted using fio and stress. The host machine in an i5 CPU at 3.10GHz running Debian Jessie (kernel 3.12). The guest machine is an arm versatile-pb built with buildroot. Currently, on our test machine, the lowest icount we can achieve that is suitable for aligning the 2 clocks is 6. However, we observe that the IO tests (using fio) are slower than the cpu tests (using stress). Signed-off-by: Sebastian Tanase Tested-by: Camille Bégué Signed-off-by: Paolo Bonzini --- cpu-exec.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'cpu-exec.c') diff --git a/cpu-exec.c b/cpu-exec.c index 38e5f02a30..68f82b631b 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -22,6 +22,72 @@ #include "tcg.h" #include "qemu/atomic.h" #include "sysemu/qtest.h" +#include "qemu/timer.h" + +/* -icount align implementation. */ + +typedef struct SyncClocks { + int64_t diff_clk; + int64_t last_cpu_icount; +} SyncClocks; + +#if !defined(CONFIG_USER_ONLY) +/* Allow the guest to have a max 3ms advance. + * The difference between the 2 clocks could therefore + * oscillate around 0. + */ +#define VM_CLOCK_ADVANCE 3000000 + +static void align_clocks(SyncClocks *sc, const CPUState *cpu) +{ + int64_t cpu_icount; + + if (!icount_align_option) { + return; + } + + cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; + sc->diff_clk += cpu_icount_to_ns(sc->last_cpu_icount - cpu_icount); + sc->last_cpu_icount = cpu_icount; + + if (sc->diff_clk > VM_CLOCK_ADVANCE) { +#ifndef _WIN32 + struct timespec sleep_delay, rem_delay; + sleep_delay.tv_sec = sc->diff_clk / 1000000000LL; + sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL; + if (nanosleep(&sleep_delay, &rem_delay) < 0) { + sc->diff_clk -= (sleep_delay.tv_sec - rem_delay.tv_sec) * 1000000000LL; + sc->diff_clk -= sleep_delay.tv_nsec - rem_delay.tv_nsec; + } else { + sc->diff_clk = 0; + } +#else + Sleep(sc->diff_clk / SCALE_MS); + sc->diff_clk = 0; +#endif + } +} + +static void init_delay_params(SyncClocks *sc, + const CPUState *cpu) +{ + if (!icount_align_option) { + return; + } + sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - + qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + + cpu_get_clock_offset(); + sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; +} +#else +static void align_clocks(SyncClocks *sc, const CPUState *cpu) +{ +} + +static void init_delay_params(SyncClocks *sc, const CPUState *cpu) +{ +} +#endif /* CONFIG USER ONLY */ void cpu_loop_exit(CPUState *cpu) { @@ -227,6 +293,8 @@ int cpu_exec(CPUArchState *env) TranslationBlock *tb; uint8_t *tc_ptr; uintptr_t next_tb; + SyncClocks sc; + /* This must be volatile so it is not trashed by longjmp() */ volatile bool have_tb_lock = false; @@ -283,6 +351,13 @@ int cpu_exec(CPUArchState *env) #endif cpu->exception_index = -1; + /* Calculate difference between guest clock and host clock. + * This delay includes the delay of the last cycle, so + * what we have to do is sleep until it is 0. As for the + * advance/delay we gain here, we try to fix it next time. + */ + init_delay_params(&sc, cpu); + /* prepare setjmp context for exception handling */ for(;;) { if (sigsetjmp(cpu->jmp_env, 0) == 0) { @@ -672,6 +747,7 @@ int cpu_exec(CPUArchState *env) if (insns_left > 0) { /* Execute remaining instructions. */ cpu_exec_nocache(env, insns_left, tb); + align_clocks(&sc, cpu); } cpu->exception_index = EXCP_INTERRUPT; next_tb = 0; @@ -684,6 +760,9 @@ int cpu_exec(CPUArchState *env) } } cpu->current_tb = NULL; + /* Try to align the host and virtual clocks + if the guest is in advance */ + align_clocks(&sc, cpu); /* reset soft MMU for next block (it can currently only be set by a memory fault) */ } /* for(;;) */ -- cgit 1.4.1 From 7f7bc144ed653c6026ec956045224666abdec316 Mon Sep 17 00:00:00 2001 From: Sebastian Tanase Date: Fri, 25 Jul 2014 11:56:32 +0200 Subject: cpu-exec: Print to console if the guest is late MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the align option is enabled, we print to the user whenever the guest clock is behind the host clock in order for he/she to have a hint about the actual performance. The maximum print interval is 2s and we limit the number of messages to 100. If desired, this can be changed in cpu-exec.c Signed-off-by: Sebastian Tanase Tested-by: Camille Bégué Signed-off-by: Paolo Bonzini --- cpu-exec.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'cpu-exec.c') diff --git a/cpu-exec.c b/cpu-exec.c index 68f82b631b..3c14502329 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -29,6 +29,7 @@ typedef struct SyncClocks { int64_t diff_clk; int64_t last_cpu_icount; + int64_t realtime_clock; } SyncClocks; #if !defined(CONFIG_USER_ONLY) @@ -37,6 +38,9 @@ typedef struct SyncClocks { * oscillate around 0. */ #define VM_CLOCK_ADVANCE 3000000 +#define THRESHOLD_REDUCE 1.5 +#define MAX_DELAY_PRINT_RATE 2000000000LL +#define MAX_NB_PRINTS 100 static void align_clocks(SyncClocks *sc, const CPUState *cpu) { @@ -68,16 +72,43 @@ static void align_clocks(SyncClocks *sc, const CPUState *cpu) } } +static void print_delay(const SyncClocks *sc) +{ + static float threshold_delay; + static int64_t last_realtime_clock; + static int nb_prints; + + if (icount_align_option && + sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE && + nb_prints < MAX_NB_PRINTS) { + if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) || + (-sc->diff_clk / (float)1000000000LL < + (threshold_delay - THRESHOLD_REDUCE))) { + threshold_delay = (-sc->diff_clk / 1000000000LL) + 1; + printf("Warning: The guest is now late by %.1f to %.1f seconds\n", + threshold_delay - 1, + threshold_delay); + nb_prints++; + last_realtime_clock = sc->realtime_clock; + } + } +} + static void init_delay_params(SyncClocks *sc, const CPUState *cpu) { if (!icount_align_option) { return; } + sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - - qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + + sc->realtime_clock + cpu_get_clock_offset(); sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; + + /* Print every 2s max if the guest is late. We limit the number + of printed messages to NB_PRINT_MAX(currently 100) */ + print_delay(sc); } #else static void align_clocks(SyncClocks *sc, const CPUState *cpu) -- cgit 1.4.1 From 27498bef357de432a9aa403c5ccf11776773ba58 Mon Sep 17 00:00:00 2001 From: Sebastian Tanase Date: Fri, 25 Jul 2014 11:56:33 +0200 Subject: monitor: Add drift info to 'info jit' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Show in 'info jit' the current delay between the host clock and the guest clock. In addition, print the maximum advance and delay of the guest compared to the host. Signed-off-by: Sebastian Tanase Tested-by: Camille Bégué Signed-off-by: Paolo Bonzini --- cpu-exec.c | 6 ++++++ cpus.c | 19 +++++++++++++++++++ include/qemu-common.h | 4 ++++ monitor.c | 1 + 4 files changed, 30 insertions(+) (limited to 'cpu-exec.c') diff --git a/cpu-exec.c b/cpu-exec.c index 3c14502329..cbc8067b37 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -105,6 +105,12 @@ static void init_delay_params(SyncClocks *sc, sc->realtime_clock + cpu_get_clock_offset(); sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; + if (sc->diff_clk < max_delay) { + max_delay = sc->diff_clk; + } + if (sc->diff_clk > max_advance) { + max_advance = sc->diff_clk; + } /* Print every 2s max if the guest is late. We limit the number of printed messages to NB_PRINT_MAX(currently 100) */ diff --git a/cpus.c b/cpus.c index 19245e99b9..2b5c0bd7c7 100644 --- a/cpus.c +++ b/cpus.c @@ -64,6 +64,8 @@ #endif /* CONFIG_LINUX */ static CPUState *next_cpu; +int64_t max_delay; +int64_t max_advance; bool cpu_is_stopped(CPUState *cpu) { @@ -1552,3 +1554,20 @@ void qmp_inject_nmi(Error **errp) error_set(errp, QERR_UNSUPPORTED); #endif } + +void dump_drift_info(FILE *f, fprintf_function cpu_fprintf) +{ + if (!use_icount) { + return; + } + + cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n", + (cpu_get_clock() - cpu_get_icount())/SCALE_MS); + if (icount_align_option) { + cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS); + cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS); + } else { + cpu_fprintf(f, "Max guest delay NA\n"); + cpu_fprintf(f, "Max guest advance NA\n"); + } +} diff --git a/include/qemu-common.h b/include/qemu-common.h index 5d10ac27a1..bcf7a6ad43 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -109,6 +109,10 @@ static inline char *realpath(const char *path, char *resolved_path) void configure_icount(QemuOpts *opts, Error **errp); extern int use_icount; extern int icount_align_option; +/* drift information for info jit command */ +extern int64_t max_delay; +extern int64_t max_advance; +void dump_drift_info(FILE *f, fprintf_function cpu_fprintf); #include "qemu/osdep.h" #include "qemu/bswap.h" diff --git a/monitor.c b/monitor.c index 5bc70a642d..cdbaa60f98 100644 --- a/monitor.c +++ b/monitor.c @@ -1047,6 +1047,7 @@ static void do_info_registers(Monitor *mon, const QDict *qdict) static void do_info_jit(Monitor *mon, const QDict *qdict) { dump_exec_info((FILE *)mon, monitor_fprintf); + dump_drift_info((FILE *)mon, monitor_fprintf); } static void do_info_history(Monitor *mon, const QDict *qdict) -- cgit 1.4.1