From 6e3b2bfd6af488a896f7936e99ef160f8f37e6f2 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Tue, 6 Jun 2017 19:12:25 -0400 Subject: tcg: allocate TB structs before the corresponding translated code Allocating an arbitrarily-sized array of tbs results in either (a) a lot of memory wasted or (b) unnecessary flushes of the code cache when we run out of TB structs in the array. An obvious solution would be to just malloc a TB struct when needed, and keep the TB array as an array of pointers (recall that tb_find_pc() needs the TB array to run in O(log n)). Perhaps a better solution, which is implemented in this patch, is to allocate TB's right before the translated code they describe. This results in some memory waste due to padding to have code and TBs in separate cache lines--for instance, I measured 4.7% of padding in the used portion of code_gen_buffer when booting aarch64 Linux on a host with 64-byte cache lines. However, it can allow for optimizations in some host architectures, since TCG backends could safely assume that the TB and the corresponding translated code are very close to each other in memory. See this message by rth for a detailed explanation: https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg05172.html Subject: Re: GSoC 2017 Proposal: TCG performance enhancements Message-ID: <1e67644b-4b30-887e-d329-1848e94c9484@twiddle.net> Suggested-by: Richard Henderson Reviewed-by: Pranith Kumar Signed-off-by: Emilio G. Cota Message-Id: <1496790745-314-3-git-send-email-cota@braap.org> [rth: Simplify the arithmetic in tcg_tb_alloc] Signed-off-by: Richard Henderson --- include/exec/tb-context.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/exec') diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index c7f17f26e0..25c2afe753 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -31,8 +31,9 @@ typedef struct TBContext TBContext; struct TBContext { - TranslationBlock *tbs; + TranslationBlock **tbs; struct qht htable; + size_t tbs_size; int nb_tbs; /* any access to the tbs or the page table must use this lock */ QemuMutex tb_lock; -- cgit 1.4.1 From 3fb53fb4d12f2e7833bd1659e6013237b130ef20 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 5 Jun 2017 19:13:56 -0400 Subject: tcg/arm: Use indirect branch for goto_tb Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 5 +---- tcg/arm/tcg-target.inc.c | 17 ++--------------- 2 files changed, 3 insertions(+), 19 deletions(-) (limited to 'include/exec') diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 87ae10bcc9..724ec73dce 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -301,7 +301,7 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, #define CODE_GEN_AVG_BLOCK_SIZE 150 #endif -#if defined(__arm__) || defined(_ARCH_PPC) \ +#if defined(_ARCH_PPC) \ || defined(__x86_64__) || defined(__i386__) \ || defined(__sparc__) || defined(__aarch64__) \ || defined(__s390x__) || defined(__mips__) \ @@ -401,9 +401,6 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) #elif defined(__aarch64__) void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr); #define tb_set_jmp_target1 aarch64_tb_set_jmp_target -#elif defined(__arm__) -void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr); -#define tb_set_jmp_target1 arm_tb_set_jmp_target #elif defined(__sparc__) || defined(__mips__) void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr); #else diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 9f5cb66718..fce382f219 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -1026,16 +1026,6 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) } } -void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) -{ - tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; - tcg_insn_unit *target = (tcg_insn_unit *)addr; - - /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */ - reloc_pc24_atomic(code_ptr, target); - flush_icache_range(jmp_addr, jmp_addr + 4); -} - static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l) { if (l->has_value) { @@ -1665,11 +1655,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; case INDEX_op_goto_tb: - if (s->tb_jmp_insn_offset) { - /* Direct jump method */ - s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); - tcg_out_b_noaddr(s, COND_AL); - } else { + tcg_debug_assert(s->tb_jmp_insn_offset == 0); + { /* Indirect jump method */ intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]); tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff); -- cgit 1.4.1