From 403f290c0603f35f2d09c982bf5549b6d0803ec1 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Tue, 16 Oct 2018 11:38:40 -0400 Subject: cputlb: read CPUTLBEntry.addr_write atomically Updates can come from other threads, so readers that do not take tlb_lock must use atomic_read to avoid undefined behaviour (UB). This completes the conversion to tlb_lock. This conversion results on average in no performance loss, as the following experiments (run on an Intel i7-6700K CPU @ 4.00GHz) show. 1. aarch64 bootup+shutdown test: - Before: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% ) 31,574,905,303 cycles # 4.217 GHz ( +- 0.12% ) 57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% ) 10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% ) 173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% ) 7.504481349 seconds time elapsed ( +- 0.14% ) - After: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% ) 31,478,476,520 cycles # 4.218 GHz ( +- 0.07% ) 57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% ) 10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% ) 173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% ) 7.474970463 seconds time elapsed ( +- 0.07% ) 2. SPEC06int: SPEC06int (test set) [Y axis: Speedup over master] 1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+ | | 1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+ | +++ | +++ tlb-lock-v3 (spinl|ck) | | +++ | | +++ +++ | | | 1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+ | ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### | 1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+ | *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # | 0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+ | * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # | | * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # | 0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+ | * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # | 0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+ | * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | | * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | 0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+ | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+ 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean png: https://imgur.com/a/BHzpPTW Notes: - tlb-lock-v2 corresponds to an implementation with a mutex. - tlb-lock-v3 corresponds to the current implementation, i.e. a spinlock and a single lock acquisition in tlb_set_page_with_attrs. Signed-off-by: Emilio G. Cota Message-Id: <20181016153840.25877-1-cota@braap.org> Signed-off-by: Richard Henderson --- include/exec/cpu_ldst.h | 11 ++++++++++- include/exec/cpu_ldst_template.h | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/exec') diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index f54d91ff68..959068495a 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -126,6 +126,15 @@ extern __thread uintptr_t helper_retaddr; /* The memory helpers for tcg-generated code need tcg_target_long etc. */ #include "tcg.h" +static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) +{ +#if TCG_OVERSIZED_GUEST + return entry->addr_write; +#else + return atomic_read(&entry->addr_write); +#endif +} + /* Find the TLB index corresponding to the mmu_idx + address pair. */ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, target_ulong addr) @@ -439,7 +448,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, tlb_addr = tlbentry->addr_read; break; case 1: - tlb_addr = tlbentry->addr_write; + tlb_addr = tlb_addr_write(tlbentry); break; case 2: tlb_addr = tlbentry->addr_code; diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h index d21a0b59bf..0f061d47ef 100644 --- a/include/exec/cpu_ldst_template.h +++ b/include/exec/cpu_ldst_template.h @@ -177,7 +177,7 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, addr = ptr; mmu_idx = CPU_MMU_INDEX; entry = tlb_entry(env, mmu_idx, addr); - if (unlikely(entry->addr_write != + if (unlikely(tlb_addr_write(entry) != (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { oi = make_memop_idx(SHIFT, mmu_idx); glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi, -- cgit 1.4.1