From 25d3ec5831e08ed3e72a5db654d7c281feb559c7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 19 Aug 2022 14:20:37 -0700 Subject: accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This structure will shortly contain more than just data for accessing MMIO. Rename the 'addr' member to 'xlat_section' to more clearly indicate its purpose. Reviewed-by: Alex Bennée Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/exec/cpu-defs.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/exec') diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index ba3cd32a1e..f70f54d850 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -108,6 +108,7 @@ typedef uint64_t target_ulong; # endif # endif +/* Minimalized TLB entry for use by TCG fast path. */ typedef struct CPUTLBEntry { /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not @@ -131,14 +132,14 @@ typedef struct CPUTLBEntry { QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS)); -/* The IOTLB is not accessed directly inline by generated TCG code, - * so the CPUIOTLBEntry layout is not as critical as that of the - * CPUTLBEntry. (This is also why we don't want to combine the two - * structs into one.) +/* + * The full TLB entry, which is not accessed by generated TCG code, + * so the layout is not as critical as that of CPUTLBEntry. This is + * also why we don't want to combine the two structs. */ -typedef struct CPUIOTLBEntry { +typedef struct CPUTLBEntryFull { /* - * @addr contains: + * @xlat_section contains: * - in the lower TARGET_PAGE_BITS, a physical section number * - with the lower TARGET_PAGE_BITS masked off, an offset which * must be added to the virtual address to obtain: @@ -146,9 +147,9 @@ typedef struct CPUIOTLBEntry { * number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM) * + the offset within the target MemoryRegion (otherwise) */ - hwaddr addr; + hwaddr xlat_section; MemTxAttrs attrs; -} CPUIOTLBEntry; +} CPUTLBEntryFull; /* * Data elements that are per MMU mode, minus the bits accessed by @@ -172,9 +173,8 @@ typedef struct CPUTLBDesc { size_t vindex; /* The tlb victim table, in two parts. */ CPUTLBEntry vtable[CPU_VTLB_SIZE]; - CPUIOTLBEntry viotlb[CPU_VTLB_SIZE]; - /* The iotlb. */ - CPUIOTLBEntry *iotlb; + CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE]; + CPUTLBEntryFull *fulltlb; } CPUTLBDesc; /* -- cgit 1.4.1 From af803a4fcb1c707a6a885b5736335baf794f7676 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 19 Aug 2022 15:49:41 -0700 Subject: accel/tcg: Introduce probe_access_full MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an interface to return the CPUTLBEntryFull struct that goes with the lookup. The result is not intended to be valid across multiple lookups, so the user must use the results immediately. Reviewed-by: Alex Bennée Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 47 +++++++++++++++++++++++++++++------------------ include/exec/exec-all.h | 15 +++++++++++++++ include/qemu/typedefs.h | 1 + 3 files changed, 45 insertions(+), 18 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 264f84a248..e3ee4260bd 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1510,7 +1510,8 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, static int probe_access_internal(CPUArchState *env, target_ulong addr, int fault_size, MMUAccessType access_type, int mmu_idx, bool nonfault, - void **phost, uintptr_t retaddr) + void **phost, CPUTLBEntryFull **pfull, + uintptr_t retaddr) { uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); @@ -1543,10 +1544,12 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, mmu_idx, nonfault, retaddr)) { /* Non-faulting page table read failed. */ *phost = NULL; + *pfull = NULL; return TLB_INVALID_MASK; } /* TLB resize via tlb_fill may have moved the entry. */ + index = tlb_index(env, mmu_idx, addr); entry = tlb_entry(env, mmu_idx, addr); /* @@ -1560,6 +1563,8 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, } flags &= tlb_addr; + *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index]; + /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { *phost = NULL; @@ -1571,37 +1576,44 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, return flags; } -int probe_access_flags(CPUArchState *env, target_ulong addr, - MMUAccessType access_type, int mmu_idx, - bool nonfault, void **phost, uintptr_t retaddr) +int probe_access_full(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, CPUTLBEntryFull **pfull, + uintptr_t retaddr) { - int flags; - - flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, - nonfault, phost, retaddr); + int flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, + nonfault, phost, pfull, retaddr); /* Handle clean RAM pages. */ if (unlikely(flags & TLB_NOTDIRTY)) { - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; - - notdirty_write(env_cpu(env), addr, 1, full, retaddr); + notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr); flags &= ~TLB_NOTDIRTY; } return flags; } +int probe_access_flags(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t retaddr) +{ + CPUTLBEntryFull *full; + + return probe_access_full(env, addr, access_type, mmu_idx, + nonfault, phost, &full, retaddr); +} + void *probe_access(CPUArchState *env, target_ulong addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { + CPUTLBEntryFull *full; void *host; int flags; g_assert(-(addr | TARGET_PAGE_MASK) >= size); flags = probe_access_internal(env, addr, size, access_type, mmu_idx, - false, &host, retaddr); + false, &host, &full, retaddr); /* Per the interface, size == 0 merely faults the access. */ if (size == 0) { @@ -1609,9 +1621,6 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, } if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) { - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; - /* Handle watchpoints. */ if (flags & TLB_WATCHPOINT) { int wp_access = (access_type == MMU_DATA_STORE @@ -1632,11 +1641,12 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, MMUAccessType access_type, int mmu_idx) { + CPUTLBEntryFull *full; void *host; int flags; flags = probe_access_internal(env, addr, 0, access_type, - mmu_idx, true, &host, 0); + mmu_idx, true, &host, &full, 0); /* No combination of flags are expected by the caller. */ return flags ? NULL : host; @@ -1655,10 +1665,11 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, void **hostp) { + CPUTLBEntryFull *full; void *p; (void)probe_access_internal(env, addr, 1, MMU_INST_FETCH, - cpu_mmu_index(env, true), false, &p, 0); + cpu_mmu_index(env, true), false, &p, &full, 0); if (p == NULL) { return -1; } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index bcad607c4e..d255d69bc1 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -434,6 +434,21 @@ int probe_access_flags(CPUArchState *env, target_ulong addr, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, uintptr_t retaddr); +#ifndef CONFIG_USER_ONLY +/** + * probe_access_full: + * Like probe_access_flags, except also return into @pfull. + * + * The CPUTLBEntryFull structure returned via @pfull is transient + * and must be consumed or copied immediately, before any further + * access or changes to TLB @mmu_idx. + */ +int probe_access_full(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, + CPUTLBEntryFull **pfull, uintptr_t retaddr); +#endif + #define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */ /* Estimated block size for TB allocation. */ diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 42f4ceb701..a4aee238c7 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -42,6 +42,7 @@ typedef struct ConfidentialGuestSupport ConfidentialGuestSupport; typedef struct CPUAddressSpace CPUAddressSpace; typedef struct CPUArchState CPUArchState; typedef struct CPUState CPUState; +typedef struct CPUTLBEntryFull CPUTLBEntryFull; typedef struct DeviceListener DeviceListener; typedef struct DeviceState DeviceState; typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot; -- cgit 1.4.1 From 4047368938f64e2b79103c6e6358b06570b237e5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 19 Aug 2022 16:33:23 -0700 Subject: accel/tcg: Introduce tlb_set_page_full MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have collected all of the page data into CPUTLBEntryFull, provide an interface to record that all in one go, instead of using 4 arguments. This interface allows CPUTLBEntryFull to be extended without having to change the number of arguments. Reviewed-by: Alex Bennée Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 51 ++++++++++++++++++++++++++++++++----------------- include/exec/cpu-defs.h | 14 ++++++++++++++ include/exec/exec-all.h | 22 +++++++++++++++++++++ 3 files changed, 69 insertions(+), 18 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index e3ee4260bd..361078471b 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1095,16 +1095,16 @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx, env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; } -/* Add a new TLB entry. At most one entry for a given virtual address +/* + * Add a new TLB entry. At most one entry for a given virtual address * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the * supplied size is only used by tlb_flush_page. * * Called from TCG-generated code, which is under an RCU read-side * critical section. */ -void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, - hwaddr paddr, MemTxAttrs attrs, int prot, - int mmu_idx, target_ulong size) +void tlb_set_page_full(CPUState *cpu, int mmu_idx, + target_ulong vaddr, CPUTLBEntryFull *full) { CPUArchState *env = cpu->env_ptr; CPUTLB *tlb = env_tlb(env); @@ -1117,35 +1117,36 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, CPUTLBEntry *te, tn; hwaddr iotlb, xlat, sz, paddr_page; target_ulong vaddr_page; - int asidx = cpu_asidx_from_attrs(cpu, attrs); - int wp_flags; + int asidx, wp_flags, prot; bool is_ram, is_romd; assert_cpu_is_self(cpu); - if (size <= TARGET_PAGE_SIZE) { + if (full->lg_page_size <= TARGET_PAGE_BITS) { sz = TARGET_PAGE_SIZE; } else { - tlb_add_large_page(env, mmu_idx, vaddr, size); - sz = size; + sz = (hwaddr)1 << full->lg_page_size; + tlb_add_large_page(env, mmu_idx, vaddr, sz); } vaddr_page = vaddr & TARGET_PAGE_MASK; - paddr_page = paddr & TARGET_PAGE_MASK; + paddr_page = full->phys_addr & TARGET_PAGE_MASK; + prot = full->prot; + asidx = cpu_asidx_from_attrs(cpu, full->attrs); section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, - &xlat, &sz, attrs, &prot); + &xlat, &sz, full->attrs, &prot); assert(sz >= TARGET_PAGE_SIZE); tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx " prot=%x idx=%d\n", - vaddr, paddr, prot, mmu_idx); + vaddr, full->phys_addr, prot, mmu_idx); address = vaddr_page; - if (size < TARGET_PAGE_SIZE) { + if (full->lg_page_size < TARGET_PAGE_BITS) { /* Repeat the MMU check and TLB fill on every access. */ address |= TLB_INVALID_MASK; } - if (attrs.byte_swap) { + if (full->attrs.byte_swap) { address |= TLB_BSWAP; } @@ -1236,8 +1237,10 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, * subtract here is that of the page base, and not the same as the * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). */ + desc->fulltlb[index] = *full; desc->fulltlb[index].xlat_section = iotlb - vaddr_page; - desc->fulltlb[index].attrs = attrs; + desc->fulltlb[index].phys_addr = paddr_page; + desc->fulltlb[index].prot = prot; /* Now calculate the new entry */ tn.addend = addend - vaddr_page; @@ -1272,9 +1275,21 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, qemu_spin_unlock(&tlb->c.lock); } -/* Add a new TLB entry, but without specifying the memory - * transaction attributes to be used. - */ +void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, + hwaddr paddr, MemTxAttrs attrs, int prot, + int mmu_idx, target_ulong size) +{ + CPUTLBEntryFull full = { + .phys_addr = paddr, + .attrs = attrs, + .prot = prot, + .lg_page_size = ctz64(size) + }; + + assert(is_power_of_2(size)); + tlb_set_page_full(cpu, mmu_idx, vaddr, &full); +} + void tlb_set_page(CPUState *cpu, target_ulong vaddr, hwaddr paddr, int prot, int mmu_idx, target_ulong size) diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index f70f54d850..5e12cc1854 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -148,7 +148,21 @@ typedef struct CPUTLBEntryFull { * + the offset within the target MemoryRegion (otherwise) */ hwaddr xlat_section; + + /* + * @phys_addr contains the physical address in the address space + * given by cpu_asidx_from_attrs(cpu, @attrs). + */ + hwaddr phys_addr; + + /* @attrs contains the memory transaction attributes for the page. */ MemTxAttrs attrs; + + /* @prot contains the complete protections for the page. */ + uint8_t prot; + + /* @lg_page_size contains the log2 of the page size. */ + uint8_t lg_page_size; } CPUTLBEntryFull; /* diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index d255d69bc1..b1b920a713 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -257,6 +257,28 @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap, unsigned bits); +/** + * tlb_set_page_full: + * @cpu: CPU context + * @mmu_idx: mmu index of the tlb to modify + * @vaddr: virtual address of the entry to add + * @full: the details of the tlb entry + * + * Add an entry to @cpu tlb index @mmu_idx. All of the fields of + * @full must be filled, except for xlat_section, and constitute + * the complete description of the translated page. + * + * This is generally called by the target tlb_fill function after + * having performed a successful page table walk to find the physical + * address and attributes for the translation. + * + * At most one entry for a given virtual address is permitted. Only a + * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only + * used by tlb_flush_page. + */ +void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr, + CPUTLBEntryFull *full); + /** * tlb_set_page_with_attrs: * @cpu: CPU to add this TLB entry for -- cgit 1.4.1 From 8c6953cf034cec0998815961e5ec70c9de15da6e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 19 Aug 2022 17:02:36 -0700 Subject: include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow the target to cache items from the guest page tables. Reviewed-by: Alex Bennée Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/exec/cpu-defs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/exec') diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index 5e12cc1854..67239b4e5e 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -163,6 +163,15 @@ typedef struct CPUTLBEntryFull { /* @lg_page_size contains the log2 of the page size. */ uint8_t lg_page_size; + + /* + * Allow target-specific additions to this structure. + * This may be used to cache items from the guest cpu + * page tables for later use by the implementation. + */ +#ifdef TARGET_PAGE_ENTRY_EXTRA + TARGET_PAGE_ENTRY_EXTRA +#endif } CPUTLBEntryFull; /* -- cgit 1.4.1 From b21af662c15522b83a973bc2ffd51d0117c0e039 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 12 Aug 2022 09:15:15 -0700 Subject: accel/tcg: Use DisasContextBase in plugin_gen_tb_start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the pc coming from db->pc_first rather than the TB. Use the cached host_addr rather than re-computing for the first page. We still need a separate lookup for the second page because it won't be computed for DisasContextBase until the translator actually performs a read from the page. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/plugin-gen.c | 22 +++++++++++----------- accel/tcg/translator.c | 2 +- include/exec/plugin-gen.h | 7 ++++--- 3 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c index 3d0b101e34..80dff68934 100644 --- a/accel/tcg/plugin-gen.c +++ b/accel/tcg/plugin-gen.c @@ -852,7 +852,8 @@ static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb) pr_ops(); } -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_only) +bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db, + bool mem_only) { bool ret = false; @@ -870,9 +871,9 @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_onl ret = true; - ptb->vaddr = tb->pc; + ptb->vaddr = db->pc_first; ptb->vaddr2 = -1; - get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1); + ptb->haddr1 = db->host_addr[0]; ptb->haddr2 = NULL; ptb->mem_only = mem_only; @@ -898,16 +899,15 @@ void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db) * Note that we skip this when haddr1 == NULL, e.g. when we're * fetching instructions from a region not backed by RAM. */ - if (likely(ptb->haddr1 != NULL && ptb->vaddr2 == -1) && - unlikely((db->pc_next & TARGET_PAGE_MASK) != - (db->pc_first & TARGET_PAGE_MASK))) { - get_page_addr_code_hostp(cpu->env_ptr, db->pc_next, - &ptb->haddr2); - ptb->vaddr2 = db->pc_next; - } - if (likely(ptb->vaddr2 == -1)) { + if (ptb->haddr1 == NULL) { + pinsn->haddr = NULL; + } else if (is_same_page(db, db->pc_next)) { pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr; } else { + if (ptb->vaddr2 == -1) { + ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first); + get_page_addr_code_hostp(cpu->env_ptr, ptb->vaddr2, &ptb->haddr2); + } pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2; } } diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index ca8a5f2d83..8e78fd7a9c 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -75,7 +75,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns, ops->tb_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ - plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags & CF_MEMI_ONLY); + plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY); while (true) { db->num_insns++; diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h index f92f169739..5004728c61 100644 --- a/include/exec/plugin-gen.h +++ b/include/exec/plugin-gen.h @@ -19,7 +19,8 @@ struct DisasContextBase; #ifdef CONFIG_PLUGIN -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress); +bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, + bool supress); void plugin_gen_tb_end(CPUState *cpu); void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db); void plugin_gen_insn_end(void); @@ -48,8 +49,8 @@ static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size) #else /* !CONFIG_PLUGIN */ -static inline -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress) +static inline bool +plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, bool sup) { return false; } -- cgit 1.4.1 From a976a99a29755e8c7a275ac269db8a0a20d79e95 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 15 Aug 2022 15:13:05 -0500 Subject: include/hw/core: Create struct CPUJumpCache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wrap the bare TranslationBlock pointer into a structure. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/stubs/tcg-stub.c | 4 ++++ accel/tcg/cpu-exec.c | 10 +++++++--- accel/tcg/cputlb.c | 9 +++++---- accel/tcg/tb-hash.h | 1 + accel/tcg/tb-jmp-cache.h | 24 ++++++++++++++++++++++++ accel/tcg/translate-all.c | 28 +++++++++++++++++++++++++--- hw/core/cpu-common.c | 3 +-- include/exec/cpu-common.h | 1 + include/hw/core/cpu.h | 15 +-------------- include/qemu/typedefs.h | 1 + plugins/core.c | 2 +- trace/control-target.c | 2 +- 12 files changed, 72 insertions(+), 28 deletions(-) create mode 100644 accel/tcg/tb-jmp-cache.h (limited to 'include/exec') diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c index 6ce8a34228..c1b05767c0 100644 --- a/accel/stubs/tcg-stub.c +++ b/accel/stubs/tcg-stub.c @@ -21,6 +21,10 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) { } +void tcg_flush_jmp_cache(CPUState *cpu) +{ +} + int probe_access_flags(CPUArchState *env, target_ulong addr, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, uintptr_t retaddr) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index dd58a144a8..2d7e610ee2 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -42,6 +42,7 @@ #include "sysemu/replay.h" #include "sysemu/tcg.h" #include "exec/helper-proto.h" +#include "tb-jmp-cache.h" #include "tb-hash.h" #include "tb-context.h" #include "internal.h" @@ -252,7 +253,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, tcg_debug_assert(!(cflags & CF_INVALID)); hash = tb_jmp_cache_hash_func(pc); - tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); + tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb); if (likely(tb && tb->pc == pc && @@ -266,7 +267,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, if (tb == NULL) { return NULL; } - qatomic_set(&cpu->tb_jmp_cache[hash], tb); + qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb); return tb; } @@ -987,6 +988,8 @@ int cpu_exec(CPUState *cpu) tb = tb_lookup(cpu, pc, cs_base, flags, cflags); if (tb == NULL) { + uint32_t h; + mmap_lock(); tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); mmap_unlock(); @@ -994,7 +997,8 @@ int cpu_exec(CPUState *cpu) * We add the TB in the virtual pc hash table * for the fast lookup */ - qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); + h = tb_jmp_cache_hash_func(pc); + qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb); } #ifndef CONFIG_USER_ONLY diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index c7909fb619..6f1c00682b 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -100,10 +100,11 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) { - unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr); + int i, i0 = tb_jmp_cache_hash_page(page_addr); + CPUJumpCache *jc = cpu->tb_jmp_cache; for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { - qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL); + qatomic_set(&jc->array[i0 + i].tb, NULL); } } @@ -356,7 +357,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) qemu_spin_unlock(&env_tlb(env)->c.lock); - cpu_tb_jmp_cache_clear(cpu); + tcg_flush_jmp_cache(cpu); if (to_clean == ALL_MMUIDX_BITS) { qatomic_set(&env_tlb(env)->c.full_flush_count, @@ -785,7 +786,7 @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, * longer to clear each entry individually than it will to clear it all. */ if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) { - cpu_tb_jmp_cache_clear(cpu); + tcg_flush_jmp_cache(cpu); return; } diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h index 0a273d9605..83dc610e4c 100644 --- a/accel/tcg/tb-hash.h +++ b/accel/tcg/tb-hash.h @@ -23,6 +23,7 @@ #include "exec/cpu-defs.h" #include "exec/exec-all.h" #include "qemu/xxhash.h" +#include "tb-jmp-cache.h" #ifdef CONFIG_SOFTMMU diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h new file mode 100644 index 0000000000..2d8fbb1bfe --- /dev/null +++ b/accel/tcg/tb-jmp-cache.h @@ -0,0 +1,24 @@ +/* + * The per-CPU TranslationBlock jump cache. + * + * Copyright (c) 2003 Fabrice Bellard + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef ACCEL_TCG_TB_JMP_CACHE_H +#define ACCEL_TCG_TB_JMP_CACHE_H + +#define TB_JMP_CACHE_BITS 12 +#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) + +/* + * Accessed in parallel; all accesses to 'tb' must be atomic. + */ +struct CPUJumpCache { + struct { + TranslationBlock *tb; + } array[TB_JMP_CACHE_SIZE]; +}; + +#endif /* ACCEL_TCG_TB_JMP_CACHE_H */ diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 3a63113c41..63ecc15236 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -58,6 +58,7 @@ #include "sysemu/tcg.h" #include "qapi/error.h" #include "hw/core/tcg-cpu-ops.h" +#include "tb-jmp-cache.h" #include "tb-hash.h" #include "tb-context.h" #include "internal.h" @@ -967,7 +968,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) } CPU_FOREACH(cpu) { - cpu_tb_jmp_cache_clear(cpu); + tcg_flush_jmp_cache(cpu); } qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE); @@ -1187,8 +1188,9 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) /* remove the TB from the hash list */ h = tb_jmp_cache_hash_func(tb->pc); CPU_FOREACH(cpu) { - if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) { - qatomic_set(&cpu->tb_jmp_cache[h], NULL); + CPUJumpCache *jc = cpu->tb_jmp_cache; + if (qatomic_read(&jc->array[h].tb) == tb) { + qatomic_set(&jc->array[h].tb, NULL); } } @@ -2443,6 +2445,26 @@ int page_unprotect(target_ulong address, uintptr_t pc) } #endif /* CONFIG_USER_ONLY */ +/* + * Called by generic code at e.g. cpu reset after cpu creation, + * therefore we must be prepared to allocate the jump cache. + */ +void tcg_flush_jmp_cache(CPUState *cpu) +{ + CPUJumpCache *jc = cpu->tb_jmp_cache; + + if (likely(jc)) { + for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { + qatomic_set(&jc->array[i].tb, NULL); + } + } else { + /* This should happen once during realize, and thus never race. */ + jc = g_new0(CPUJumpCache, 1); + jc = qatomic_xchg(&cpu->tb_jmp_cache, jc); + assert(jc == NULL); + } +} + /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ void tcg_flush_softmmu_tlb(CPUState *cs) { diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c index 9e3241b430..f9fdd46b9d 100644 --- a/hw/core/cpu-common.c +++ b/hw/core/cpu-common.c @@ -137,8 +137,7 @@ static void cpu_common_reset(DeviceState *dev) cpu->cflags_next_tb = -1; if (tcg_enabled()) { - cpu_tb_jmp_cache_clear(cpu); - + tcg_flush_jmp_cache(cpu); tcg_flush_softmmu_tlb(cpu); } } diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index d909429427..c493510ee9 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -38,6 +38,7 @@ void cpu_list_unlock(void); unsigned int cpu_list_generation_id_get(void); void tcg_flush_softmmu_tlb(CPUState *cs); +void tcg_flush_jmp_cache(CPUState *cs); void tcg_iommu_init_notifier_list(CPUState *cpu); void tcg_iommu_free_notifier_list(CPUState *cpu); diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 009dc0d336..18ca701b44 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -236,9 +236,6 @@ struct kvm_run; struct hax_vcpu_state; struct hvf_vcpu_state; -#define TB_JMP_CACHE_BITS 12 -#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) - /* work queue */ /* The union type allows passing of 64 bit target pointers on 32 bit @@ -369,8 +366,7 @@ struct CPUState { CPUArchState *env_ptr; IcountDecr *icount_decr_ptr; - /* Accessed in parallel; all accesses must be atomic */ - TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; + CPUJumpCache *tb_jmp_cache; struct GDBRegisterState *gdb_regs; int gdb_num_regs; @@ -456,15 +452,6 @@ extern CPUTailQ cpus; extern __thread CPUState *current_cpu; -static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) -{ - unsigned int i; - - for (i = 0; i < TB_JMP_CACHE_SIZE; i++) { - qatomic_set(&cpu->tb_jmp_cache[i], NULL); - } -} - /** * qemu_tcg_mttcg_enabled: * Check whether we are running MultiThread TCG or not. diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index a4aee238c7..5f95169827 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -41,6 +41,7 @@ typedef struct CoMutex CoMutex; typedef struct ConfidentialGuestSupport ConfidentialGuestSupport; typedef struct CPUAddressSpace CPUAddressSpace; typedef struct CPUArchState CPUArchState; +typedef struct CPUJumpCache CPUJumpCache; typedef struct CPUState CPUState; typedef struct CPUTLBEntryFull CPUTLBEntryFull; typedef struct DeviceListener DeviceListener; diff --git a/plugins/core.c b/plugins/core.c index 792262da08..c3ae284994 100644 --- a/plugins/core.c +++ b/plugins/core.c @@ -56,7 +56,7 @@ struct qemu_plugin_ctx *plugin_id_to_ctx_locked(qemu_plugin_id_t id) static void plugin_cpu_update__async(CPUState *cpu, run_on_cpu_data data) { bitmap_copy(cpu->plugin_mask, &data.host_ulong, QEMU_PLUGIN_EV_MAX); - cpu_tb_jmp_cache_clear(cpu); + tcg_flush_jmp_cache(cpu); } static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata) diff --git a/trace/control-target.c b/trace/control-target.c index 8418673c18..232c97a4a1 100644 --- a/trace/control-target.c +++ b/trace/control-target.c @@ -65,7 +65,7 @@ static void trace_event_synchronize_vcpu_state_dynamic( { bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); - cpu_tb_jmp_cache_clear(vcpu); + tcg_flush_jmp_cache(vcpu); } void trace_event_set_vcpu_state_dynamic(CPUState *vcpu, -- cgit 1.4.1 From fbf59aad178d98afe193fa872a2d880266a75269 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 15 Aug 2022 15:16:06 -0500 Subject: accel/tcg: Introduce tb_pc and log_pc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The availability of tb->pc will shortly be conditional. Introduce accessor functions to minimize ifdefs. Pass around a known pc to places like tcg_gen_code, where the caller must already have the value. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 46 ++++++++++++++++++--------------- accel/tcg/internal.h | 6 +++++ accel/tcg/translate-all.c | 37 ++++++++++++++------------ include/exec/exec-all.h | 6 +++++ include/tcg/tcg.h | 2 +- target/arm/cpu.c | 4 +-- target/avr/cpu.c | 2 +- target/hexagon/cpu.c | 2 +- target/hppa/cpu.c | 4 +-- target/i386/tcg/tcg-cpu.c | 2 +- target/loongarch/cpu.c | 2 +- target/microblaze/cpu.c | 2 +- target/mips/tcg/exception.c | 2 +- target/mips/tcg/sysemu/special_helper.c | 2 +- target/openrisc/cpu.c | 2 +- target/riscv/cpu.c | 4 +-- target/rx/cpu.c | 2 +- target/sh4/cpu.c | 4 +-- target/sparc/cpu.c | 2 +- target/tricore/cpu.c | 2 +- tcg/tcg.c | 8 +++--- 21 files changed, 82 insertions(+), 61 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 2d7e610ee2..8b3f8435fb 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -186,7 +186,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) const TranslationBlock *tb = p; const struct tb_desc *desc = d; - if (tb->pc == desc->pc && + if (tb_pc(tb) == desc->pc && tb->page_addr[0] == desc->page_addr0 && tb->cs_base == desc->cs_base && tb->flags == desc->flags && @@ -271,12 +271,10 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, return tb; } -static inline void log_cpu_exec(target_ulong pc, CPUState *cpu, - const TranslationBlock *tb) +static void log_cpu_exec(target_ulong pc, CPUState *cpu, + const TranslationBlock *tb) { - if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) - && qemu_log_in_addr_range(pc)) { - + if (qemu_log_in_addr_range(pc)) { qemu_log_mask(CPU_LOG_EXEC, "Trace %d: %p [" TARGET_FMT_lx "/" TARGET_FMT_lx "/%08x/%08x] %s\n", @@ -400,7 +398,9 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) return tcg_code_gen_epilogue; } - log_cpu_exec(pc, cpu, tb); + if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) { + log_cpu_exec(pc, cpu, tb); + } return tb->tc.ptr; } @@ -423,7 +423,9 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) TranslationBlock *last_tb; const void *tb_ptr = itb->tc.ptr; - log_cpu_exec(itb->pc, cpu, itb); + if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) { + log_cpu_exec(log_pc(cpu, itb), cpu, itb); + } qemu_thread_jit_execute(); ret = tcg_qemu_tb_exec(env, tb_ptr); @@ -447,16 +449,20 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) * of the start of the TB. */ CPUClass *cc = CPU_GET_CLASS(cpu); - qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc, - "Stopped execution of TB chain before %p [" - TARGET_FMT_lx "] %s\n", - last_tb->tc.ptr, last_tb->pc, - lookup_symbol(last_tb->pc)); + if (cc->tcg_ops->synchronize_from_tb) { cc->tcg_ops->synchronize_from_tb(cpu, last_tb); } else { assert(cc->set_pc); - cc->set_pc(cpu, last_tb->pc); + cc->set_pc(cpu, tb_pc(last_tb)); + } + if (qemu_loglevel_mask(CPU_LOG_EXEC)) { + target_ulong pc = log_pc(cpu, last_tb); + if (qemu_log_in_addr_range(pc)) { + qemu_log("Stopped execution of TB chain before %p [" + TARGET_FMT_lx "] %s\n", + last_tb->tc.ptr, pc, lookup_symbol(pc)); + } } } @@ -598,11 +604,8 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, qemu_spin_unlock(&tb_next->jmp_lock); - qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, - "Linking TBs %p [" TARGET_FMT_lx - "] index %d -> %p [" TARGET_FMT_lx "]\n", - tb->tc.ptr, tb->pc, n, - tb_next->tc.ptr, tb_next->pc); + qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n", + tb->tc.ptr, n, tb_next->tc.ptr); return; out_unlock_next: @@ -848,11 +851,12 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, } static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, + target_ulong pc, TranslationBlock **last_tb, int *tb_exit) { int32_t insns_left; - trace_exec_tb(tb, tb->pc); + trace_exec_tb(tb, pc); tb = cpu_tb_exec(cpu, tb, tb_exit); if (*tb_exit != TB_EXIT_REQUESTED) { *last_tb = tb; @@ -1017,7 +1021,7 @@ int cpu_exec(CPUState *cpu) tb_add_jump(last_tb, tb_exit, tb); } - cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit); + cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit); /* Try to align the host and virtual clocks if the guest is in advance */ diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 3092bfa964..a3875a3b5a 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -18,4 +18,10 @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); void page_init(void); void tb_htable_init(void); +/* Return the current PC from CPU, which may be cached in TB. */ +static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) +{ + return tb_pc(tb); +} + #endif /* ACCEL_TCG_INTERNAL_H */ diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 63ecc15236..13c964dcd8 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -299,7 +299,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { if (i == 0) { - prev = (j == 0 ? tb->pc : 0); + prev = (j == 0 ? tb_pc(tb) : 0); } else { prev = tcg_ctx->gen_insn_data[i - 1][j]; } @@ -327,7 +327,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc, bool reset_icount) { - target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc }; + target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) }; uintptr_t host_pc = (uintptr_t)tb->tc.ptr; CPUArchState *env = cpu->env_ptr; const uint8_t *p = tb->tc.ptr + tb->tc.size; @@ -885,7 +885,7 @@ static bool tb_cmp(const void *ap, const void *bp) const TranslationBlock *a = ap; const TranslationBlock *b = bp; - return a->pc == b->pc && + return tb_pc(a) == tb_pc(b) && a->cs_base == b->cs_base && a->flags == b->flags && (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) && @@ -1013,9 +1013,10 @@ static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp) TranslationBlock *tb = p; target_ulong addr = *(target_ulong *)userp; - if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) { + if (!(addr + TARGET_PAGE_SIZE <= tb_pc(tb) || + addr >= tb_pc(tb) + tb->size)) { printf("ERROR invalidate: address=" TARGET_FMT_lx - " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size); + " PC=%08lx size=%04x\n", addr, (long)tb_pc(tb), tb->size); } } @@ -1034,11 +1035,11 @@ static void do_tb_page_check(void *p, uint32_t hash, void *userp) TranslationBlock *tb = p; int flags1, flags2; - flags1 = page_get_flags(tb->pc); - flags2 = page_get_flags(tb->pc + tb->size - 1); + flags1 = page_get_flags(tb_pc(tb)); + flags2 = page_get_flags(tb_pc(tb) + tb->size - 1); if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n", - (long)tb->pc, tb->size, flags1, flags2); + (long)tb_pc(tb), tb->size, flags1, flags2); } } @@ -1169,7 +1170,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) /* remove the TB from the hash list */ phys_pc = tb->page_addr[0]; - h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags, + h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags, tb->trace_vcpu_dstate); if (!qht_remove(&tb_ctx.htable, tb, h)) { return; @@ -1301,7 +1302,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, } /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags, + h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags, tb->trace_vcpu_dstate); qht_insert(&tb_ctx.htable, tb, h, &existing_tb); @@ -1401,7 +1402,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_ctx->cpu = NULL; max_insns = tb->icount; - trace_translate_block(tb, tb->pc, tb->tc.ptr); + trace_translate_block(tb, pc, tb->tc.ptr); /* generate machine code */ tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; @@ -1422,7 +1423,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, ti = profile_getclock(); #endif - gen_code_size = tcg_gen_code(tcg_ctx, tb); + gen_code_size = tcg_gen_code(tcg_ctx, tb, pc); if (unlikely(gen_code_size < 0)) { error_return: switch (gen_code_size) { @@ -1478,7 +1479,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && - qemu_log_in_addr_range(tb->pc)) { + qemu_log_in_addr_range(pc)) { FILE *logfile = qemu_log_trylock(); if (logfile) { int code_size, data_size; @@ -1918,9 +1919,13 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) */ cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; - qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, - "cpu_io_recompile: rewound execution of TB to " - TARGET_FMT_lx "\n", tb->pc); + if (qemu_loglevel_mask(CPU_LOG_EXEC)) { + target_ulong pc = log_pc(cpu, tb); + if (qemu_log_in_addr_range(pc)) { + qemu_log("cpu_io_recompile: rewound execution of TB to " + TARGET_FMT_lx "\n", pc); + } + } cpu_loop_exit_noexc(cpu); } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index b1b920a713..7ea6026ba9 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -570,6 +570,12 @@ struct TranslationBlock { uintptr_t jmp_dest[2]; }; +/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */ +static inline target_ulong tb_pc(const TranslationBlock *tb) +{ + return tb->pc; +} + /* Hide the qatomic_read to make code a little easier on the eyes */ static inline uint32_t tb_cflags(const TranslationBlock *tb) { diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 26a70526f1..d84bae6e3f 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -840,7 +840,7 @@ void tcg_register_thread(void); void tcg_prologue_init(TCGContext *s); void tcg_func_start(TCGContext *s); -int tcg_gen_code(TCGContext *s, TranslationBlock *tb); +int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start); void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size); diff --git a/target/arm/cpu.c b/target/arm/cpu.c index fa67ba6647..94ca6f163f 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -84,9 +84,9 @@ void arm_cpu_synchronize_from_tb(CPUState *cs, * never possible for an AArch64 TB to chain to an AArch32 TB. */ if (is_a64(env)) { - env->pc = tb->pc; + env->pc = tb_pc(tb); } else { - env->regs[15] = tb->pc; + env->regs[15] = tb_pc(tb); } } #endif /* CONFIG_TCG */ diff --git a/target/avr/cpu.c b/target/avr/cpu.c index 6900444d03..0d2861179d 100644 --- a/target/avr/cpu.c +++ b/target/avr/cpu.c @@ -54,7 +54,7 @@ static void avr_cpu_synchronize_from_tb(CPUState *cs, AVRCPU *cpu = AVR_CPU(cs); CPUAVRState *env = &cpu->env; - env->pc_w = tb->pc / 2; /* internally PC points to words */ + env->pc_w = tb_pc(tb) / 2; /* internally PC points to words */ } static void avr_cpu_reset(DeviceState *ds) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 04a497db5e..fa6d722555 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -263,7 +263,7 @@ static void hexagon_cpu_synchronize_from_tb(CPUState *cs, { HexagonCPU *cpu = HEXAGON_CPU(cs); CPUHexagonState *env = &cpu->env; - env->gpr[HEX_REG_PC] = tb->pc; + env->gpr[HEX_REG_PC] = tb_pc(tb); } static bool hexagon_cpu_has_work(CPUState *cs) diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c index e25d3db6d5..e677ca09d4 100644 --- a/target/hppa/cpu.c +++ b/target/hppa/cpu.c @@ -49,7 +49,7 @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs, HPPACPU *cpu = HPPA_CPU(cs); #ifdef CONFIG_USER_ONLY - cpu->env.iaoq_f = tb->pc; + cpu->env.iaoq_f = tb_pc(tb); cpu->env.iaoq_b = tb->cs_base; #else /* Recover the IAOQ values from the GVA + PRIV. */ @@ -59,7 +59,7 @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs, int32_t diff = cs_base; cpu->env.iasq_f = iasq_f; - cpu->env.iaoq_f = (tb->pc & ~iasq_f) + priv; + cpu->env.iaoq_f = (tb_pc(tb) & ~iasq_f) + priv; if (diff) { cpu->env.iaoq_b = cpu->env.iaoq_f + diff; } diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index d3c2b8fb49..6cf14c83ff 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -51,7 +51,7 @@ static void x86_cpu_synchronize_from_tb(CPUState *cs, { X86CPU *cpu = X86_CPU(cs); - cpu->env.eip = tb->pc - tb->cs_base; + cpu->env.eip = tb_pc(tb) - tb->cs_base; } #ifndef CONFIG_USER_ONLY diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c index 20a92ea56c..1722ed2a4d 100644 --- a/target/loongarch/cpu.c +++ b/target/loongarch/cpu.c @@ -317,7 +317,7 @@ static void loongarch_cpu_synchronize_from_tb(CPUState *cs, LoongArchCPU *cpu = LOONGARCH_CPU(cs); CPULoongArchState *env = &cpu->env; - env->pc = tb->pc; + env->pc = tb_pc(tb); } #endif /* CONFIG_TCG */ diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index 73af51769e..c10b8ac029 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -96,7 +96,7 @@ static void mb_cpu_synchronize_from_tb(CPUState *cs, { MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); - cpu->env.pc = tb->pc; + cpu->env.pc = tb_pc(tb); cpu->env.iflags = tb->flags & IFLAGS_TB_MASK; } diff --git a/target/mips/tcg/exception.c b/target/mips/tcg/exception.c index 2bd77a61de..96e61170e6 100644 --- a/target/mips/tcg/exception.c +++ b/target/mips/tcg/exception.c @@ -82,7 +82,7 @@ void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) MIPSCPU *cpu = MIPS_CPU(cs); CPUMIPSState *env = &cpu->env; - env->active_tc.PC = tb->pc; + env->active_tc.PC = tb_pc(tb); env->hflags &= ~MIPS_HFLAG_BMASK; env->hflags |= tb->flags & MIPS_HFLAG_BMASK; } diff --git a/target/mips/tcg/sysemu/special_helper.c b/target/mips/tcg/sysemu/special_helper.c index f4f8fe8afc..3c5f35c759 100644 --- a/target/mips/tcg/sysemu/special_helper.c +++ b/target/mips/tcg/sysemu/special_helper.c @@ -94,7 +94,7 @@ bool mips_io_recompile_replay_branch(CPUState *cs, const TranslationBlock *tb) CPUMIPSState *env = &cpu->env; if ((env->hflags & MIPS_HFLAG_BMASK) != 0 - && env->active_tc.PC != tb->pc) { + && env->active_tc.PC != tb_pc(tb)) { env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4); env->hflags &= ~MIPS_HFLAG_BMASK; return true; diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c index 33cf717210..f6fd437785 100644 --- a/target/openrisc/cpu.c +++ b/target/openrisc/cpu.c @@ -43,7 +43,7 @@ static void openrisc_cpu_synchronize_from_tb(CPUState *cs, { OpenRISCCPU *cpu = OPENRISC_CPU(cs); - cpu->env.pc = tb->pc; + cpu->env.pc = tb_pc(tb); } diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 6ca05c6eaf..e6d9c706bb 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -482,9 +482,9 @@ static void riscv_cpu_synchronize_from_tb(CPUState *cs, RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); if (xl == MXL_RV32) { - env->pc = (int32_t)tb->pc; + env->pc = (int32_t)tb_pc(tb); } else { - env->pc = tb->pc; + env->pc = tb_pc(tb); } } diff --git a/target/rx/cpu.c b/target/rx/cpu.c index 134b4b6bb6..2f28099723 100644 --- a/target/rx/cpu.c +++ b/target/rx/cpu.c @@ -44,7 +44,7 @@ static void rx_cpu_synchronize_from_tb(CPUState *cs, { RXCPU *cpu = RX_CPU(cs); - cpu->env.pc = tb->pc; + cpu->env.pc = tb_pc(tb); } static bool rx_cpu_has_work(CPUState *cs) diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index 4bafbf8596..a65a66de43 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -46,7 +46,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs, { SuperHCPU *cpu = SUPERH_CPU(cs); - cpu->env.pc = tb->pc; + cpu->env.pc = tb_pc(tb); cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK; } @@ -58,7 +58,7 @@ static bool superh_io_recompile_replay_branch(CPUState *cs, CPUSH4State *env = &cpu->env; if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0 - && env->pc != tb->pc) { + && env->pc != tb_pc(tb)) { env->pc -= 2; env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); return true; diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 1b2afb0cb8..1f9ef7afd8 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -705,7 +705,7 @@ static void sparc_cpu_synchronize_from_tb(CPUState *cs, { SPARCCPU *cpu = SPARC_CPU(cs); - cpu->env.pc = tb->pc; + cpu->env.pc = tb_pc(tb); cpu->env.npc = tb->cs_base; } diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c index 91b16bdefc..ab7a1e3a6d 100644 --- a/target/tricore/cpu.c +++ b/target/tricore/cpu.c @@ -55,7 +55,7 @@ static void tricore_cpu_synchronize_from_tb(CPUState *cs, TriCoreCPU *cpu = TRICORE_CPU(cs); CPUTriCoreState *env = &cpu->env; - env->PC = tb->pc; + env->PC = tb_pc(tb); } static void tricore_cpu_reset(DeviceState *dev) diff --git a/tcg/tcg.c b/tcg/tcg.c index 0f9cfe96f2..612a12f58f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -4188,7 +4188,7 @@ int64_t tcg_cpu_exec_time(void) #endif -int tcg_gen_code(TCGContext *s, TranslationBlock *tb) +int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) { #ifdef CONFIG_PROFILER TCGProfile *prof = &s->prof; @@ -4218,7 +4218,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) #ifdef DEBUG_DISAS if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) - && qemu_log_in_addr_range(tb->pc))) { + && qemu_log_in_addr_range(pc_start))) { FILE *logfile = qemu_log_trylock(); if (logfile) { fprintf(logfile, "OP:\n"); @@ -4265,7 +4265,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) if (s->nb_indirects > 0) { #ifdef DEBUG_DISAS if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) - && qemu_log_in_addr_range(tb->pc))) { + && qemu_log_in_addr_range(pc_start))) { FILE *logfile = qemu_log_trylock(); if (logfile) { fprintf(logfile, "OP before indirect lowering:\n"); @@ -4288,7 +4288,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) #ifdef DEBUG_DISAS if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) - && qemu_log_in_addr_range(tb->pc))) { + && qemu_log_in_addr_range(pc_start))) { FILE *logfile = qemu_log_trylock(); if (logfile) { fprintf(logfile, "OP after optimization and liveness analysis:\n"); -- cgit 1.4.1 From 8ed558ec0cbcc29ecf490e93c54dd65d276e8e69 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 12 Aug 2022 09:53:53 -0700 Subject: accel/tcg: Introduce TARGET_TB_PCREL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare for targets to be able to produce TBs that can run in more than one virtual context. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 16 +++++++----- accel/tcg/internal.h | 4 +++ accel/tcg/tb-jmp-cache.h | 41 ++++++++++++++++++++++++++++++ accel/tcg/translate-all.c | 64 +++++++++++++++++++++++++++++++---------------- include/exec/cpu-defs.h | 3 +++ include/exec/exec-all.h | 32 ++++++++++++++++++++++-- 6 files changed, 131 insertions(+), 29 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 8b3f8435fb..f9e5cc9ba0 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -186,7 +186,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) const TranslationBlock *tb = p; const struct tb_desc *desc = d; - if (tb_pc(tb) == desc->pc && + if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) && tb->page_addr[0] == desc->page_addr0 && tb->cs_base == desc->cs_base && tb->flags == desc->flags && @@ -237,7 +237,8 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, return NULL; } desc.page_addr0 = phys_pc; - h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate); + h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc), + flags, cflags, *cpu->trace_dstate); return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); } @@ -247,16 +248,18 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, uint32_t flags, uint32_t cflags) { TranslationBlock *tb; + CPUJumpCache *jc; uint32_t hash; /* we should never be trying to look up an INVALID tb */ tcg_debug_assert(!(cflags & CF_INVALID)); hash = tb_jmp_cache_hash_func(pc); - tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb); + jc = cpu->tb_jmp_cache; + tb = tb_jmp_cache_get_tb(jc, hash); if (likely(tb && - tb->pc == pc && + tb_jmp_cache_get_pc(jc, hash, tb) == pc && tb->cs_base == cs_base && tb->flags == flags && tb->trace_vcpu_dstate == *cpu->trace_dstate && @@ -267,7 +270,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, if (tb == NULL) { return NULL; } - qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb); + tb_jmp_cache_set(jc, hash, tb, pc); return tb; } @@ -453,6 +456,7 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) if (cc->tcg_ops->synchronize_from_tb) { cc->tcg_ops->synchronize_from_tb(cpu, last_tb); } else { + assert(!TARGET_TB_PCREL); assert(cc->set_pc); cc->set_pc(cpu, tb_pc(last_tb)); } @@ -1002,7 +1006,7 @@ int cpu_exec(CPUState *cpu) * for the fast lookup */ h = tb_jmp_cache_hash_func(pc); - qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb); + tb_jmp_cache_set(cpu->tb_jmp_cache, h, tb, pc); } #ifndef CONFIG_USER_ONLY diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index a3875a3b5a..dc800fd485 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -21,7 +21,11 @@ void tb_htable_init(void); /* Return the current PC from CPU, which may be cached in TB. */ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) { +#if TARGET_TB_PCREL + return cpu->cc->get_pc(cpu); +#else return tb_pc(tb); +#endif } #endif /* ACCEL_TCG_INTERNAL_H */ diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h index 2d8fbb1bfe..ff5ffc8fc2 100644 --- a/accel/tcg/tb-jmp-cache.h +++ b/accel/tcg/tb-jmp-cache.h @@ -14,11 +14,52 @@ /* * Accessed in parallel; all accesses to 'tb' must be atomic. + * For TARGET_TB_PCREL, accesses to 'pc' must be protected by + * a load_acquire/store_release to 'tb'. */ struct CPUJumpCache { struct { TranslationBlock *tb; +#if TARGET_TB_PCREL + target_ulong pc; +#endif } array[TB_JMP_CACHE_SIZE]; }; +static inline TranslationBlock * +tb_jmp_cache_get_tb(CPUJumpCache *jc, uint32_t hash) +{ +#if TARGET_TB_PCREL + /* Use acquire to ensure current load of pc from jc. */ + return qatomic_load_acquire(&jc->array[hash].tb); +#else + /* Use rcu_read to ensure current load of pc from *tb. */ + return qatomic_rcu_read(&jc->array[hash].tb); +#endif +} + +static inline target_ulong +tb_jmp_cache_get_pc(CPUJumpCache *jc, uint32_t hash, TranslationBlock *tb) +{ +#if TARGET_TB_PCREL + return jc->array[hash].pc; +#else + return tb_pc(tb); +#endif +} + +static inline void +tb_jmp_cache_set(CPUJumpCache *jc, uint32_t hash, + TranslationBlock *tb, target_ulong pc) +{ +#if TARGET_TB_PCREL + jc->array[hash].pc = pc; + /* Use store_release on tb to ensure pc is written first. */ + qatomic_store_release(&jc->array[hash].tb, tb); +#else + /* Use the pc value already stored in tb->pc. */ + qatomic_set(&jc->array[hash].tb, tb); +#endif +} + #endif /* ACCEL_TCG_TB_JMP_CACHE_H */ diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 13c964dcd8..4ed75a13e1 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -299,7 +299,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { if (i == 0) { - prev = (j == 0 ? tb_pc(tb) : 0); + prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0); } else { prev = tcg_ctx->gen_insn_data[i - 1][j]; } @@ -327,7 +327,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc, bool reset_icount) { - target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) }; + target_ulong data[TARGET_INSN_START_WORDS]; uintptr_t host_pc = (uintptr_t)tb->tc.ptr; CPUArchState *env = cpu->env_ptr; const uint8_t *p = tb->tc.ptr + tb->tc.size; @@ -343,6 +343,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, return -1; } + memset(data, 0, sizeof(data)); + if (!TARGET_TB_PCREL) { + data[0] = tb_pc(tb); + } + /* Reconstruct the stored insn data while looking for the point at which the end of the insn exceeds the searched_pc. */ for (i = 0; i < num_insns; ++i) { @@ -885,13 +890,13 @@ static bool tb_cmp(const void *ap, const void *bp) const TranslationBlock *a = ap; const TranslationBlock *b = bp; - return tb_pc(a) == tb_pc(b) && - a->cs_base == b->cs_base && - a->flags == b->flags && - (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) && - a->trace_vcpu_dstate == b->trace_vcpu_dstate && - a->page_addr[0] == b->page_addr[0] && - a->page_addr[1] == b->page_addr[1]; + return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) && + a->cs_base == b->cs_base && + a->flags == b->flags && + (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) && + a->trace_vcpu_dstate == b->trace_vcpu_dstate && + a->page_addr[0] == b->page_addr[0] && + a->page_addr[1] == b->page_addr[1]); } void tb_htable_init(void) @@ -1148,6 +1153,28 @@ static inline void tb_jmp_unlink(TranslationBlock *dest) qemu_spin_unlock(&dest->jmp_lock); } +static void tb_jmp_cache_inval_tb(TranslationBlock *tb) +{ + CPUState *cpu; + + if (TARGET_TB_PCREL) { + /* A TB may be at any virtual address */ + CPU_FOREACH(cpu) { + tcg_flush_jmp_cache(cpu); + } + } else { + uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb)); + + CPU_FOREACH(cpu) { + CPUJumpCache *jc = cpu->tb_jmp_cache; + + if (qatomic_read(&jc->array[h].tb) == tb) { + qatomic_set(&jc->array[h].tb, NULL); + } + } + } +} + /* * In user-mode, call with mmap_lock held. * In !user-mode, if @rm_from_page_list is set, call with the TB's pages' @@ -1155,7 +1182,6 @@ static inline void tb_jmp_unlink(TranslationBlock *dest) */ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) { - CPUState *cpu; PageDesc *p; uint32_t h; tb_page_addr_t phys_pc; @@ -1170,8 +1196,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) /* remove the TB from the hash list */ phys_pc = tb->page_addr[0]; - h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags, - tb->trace_vcpu_dstate); + h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)), + tb->flags, orig_cflags, tb->trace_vcpu_dstate); if (!qht_remove(&tb_ctx.htable, tb, h)) { return; } @@ -1187,13 +1213,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) } /* remove the TB from the hash list */ - h = tb_jmp_cache_hash_func(tb->pc); - CPU_FOREACH(cpu) { - CPUJumpCache *jc = cpu->tb_jmp_cache; - if (qatomic_read(&jc->array[h].tb) == tb) { - qatomic_set(&jc->array[h].tb, NULL); - } - } + tb_jmp_cache_inval_tb(tb); /* suppress this TB from the two jump lists */ tb_remove_from_jmp_list(tb, 0); @@ -1302,8 +1322,8 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, } /* add in the hash table */ - h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags, - tb->trace_vcpu_dstate); + h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)), + tb->flags, tb->cflags, tb->trace_vcpu_dstate); qht_insert(&tb_ctx.htable, tb, h, &existing_tb); /* remove TB from the page(s) if we couldn't insert it */ @@ -1373,7 +1393,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, gen_code_buf = tcg_ctx->code_gen_ptr; tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); +#if !TARGET_TB_PCREL tb->pc = pc; +#endif tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index 67239b4e5e..21309cf567 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -54,6 +54,9 @@ # error TARGET_PAGE_BITS must be defined in cpu-param.h # endif #endif +#ifndef TARGET_TB_PCREL +# define TARGET_TB_PCREL 0 +#endif #define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 7ea6026ba9..e5f8b224a5 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -496,8 +496,32 @@ struct tb_tc { }; struct TranslationBlock { - target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */ - target_ulong cs_base; /* CS base for this block */ +#if !TARGET_TB_PCREL + /* + * Guest PC corresponding to this block. This must be the true + * virtual address. Therefore e.g. x86 stores EIP + CS_BASE, and + * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or + * privilege, must store those bits elsewhere. + * + * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are + * written such that the TB is associated only with the physical + * page and may be run in any virtual address context. In this case, + * PC must always be taken from ENV in a target-specific manner. + * Unwind information is taken as offsets from the page, to be + * deposited into the "current" PC. + */ + target_ulong pc; +#endif + + /* + * Target-specific data associated with the TranslationBlock, e.g.: + * x86: the original user, the Code Segment virtual base, + * arm: an extension of tb->flags, + * s390x: instruction data for EXECUTE, + * sparc: the next pc of the instruction queue (for delay slots). + */ + target_ulong cs_base; + uint32_t flags; /* flags defining in which context the code was generated */ uint32_t cflags; /* compile flags */ @@ -573,7 +597,11 @@ struct TranslationBlock { /* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */ static inline target_ulong tb_pc(const TranslationBlock *tb) { +#if TARGET_TB_PCREL + qemu_build_not_reached(); +#else return tb->pc; +#endif } /* Hide the qatomic_read to make code a little easier on the eyes */ -- cgit 1.4.1