summary refs log tree commit diff stats
path: root/accel/tcg/cpu-exec.c
diff options
context:
space:
mode:
authorEmilio G. Cota <cota@braap.org>2017-08-02 20:34:06 -0400
committerRichard Henderson <richard.henderson@linaro.org>2018-06-15 08:18:48 -1000
commit194125e3ebd553acb02aaf3797a4f0387493fe94 (patch)
tree52dbde720bee78205de3d475cabf1e80bfc24388 /accel/tcg/cpu-exec.c
parent95590e24af11236ef334f6bc3e2b71404a790ddb (diff)
downloadfocaccia-qemu-194125e3ebd553acb02aaf3797a4f0387493fe94.tar.gz
focaccia-qemu-194125e3ebd553acb02aaf3797a4f0387493fe94.zip
translate-all: protect TB jumps with a per-destination-TB lock
This applies to both user-mode and !user-mode emulation.

Instead of relying on a global lock, protect the list of incoming
jumps with tb->jmp_lock. This lock also protects tb->cflags,
so update all tb->cflags readers outside tb->jmp_lock to use
atomic reads via tb_cflags().

In order to find the destination TB (and therefore its jmp_lock)
from the origin TB, we introduce tb->jmp_dest[].

I considered not using a linked list of jumps, which simplifies
code and makes the struct smaller. However, it unnecessarily increases
memory usage, which results in a performance decrease. See for
instance these numbers booting+shutting down debian-arm:
                      Time (s)  Rel. err (%)  Abs. err (s)  Rel. slowdown (%)
------------------------------------------------------------------------------
 before                  20.88          0.74      0.154512                 0.
 after                   20.81          0.38      0.079078        -0.33524904
 GTree                   21.02          0.28      0.058856         0.67049808
 GHashTable + xxhash     21.63          1.08      0.233604          3.5919540

Using a hash table or a binary tree to keep track of the jumps
doesn't really pay off, not only due to the increased memory usage,
but also because most TBs have only 0 or 1 jumps to them. The maximum
number of jumps when booting debian-arm that I measured is 35, but
as we can see in the histogram below a TB with that many incoming jumps
is extremely rare; the average TB has 0.80 incoming jumps.

n_jumps: 379208; avg jumps/tb: 0.801099
dist: [0.0,1.0)|▄█▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁ ▁▁▁  ▁▁▁     ▁|[34.0,35.0]

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'accel/tcg/cpu-exec.c')
-rw-r--r--accel/tcg/cpu-exec.c41
1 files changed, 27 insertions, 14 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 45f6ebc65e..c482008bc7 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -352,28 +352,43 @@ void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
     }
 }
 
-/* Called with tb_lock held.  */
 static inline void tb_add_jump(TranslationBlock *tb, int n,
                                TranslationBlock *tb_next)
 {
+    uintptr_t old;
+
     assert(n < ARRAY_SIZE(tb->jmp_list_next));
-    if (tb->jmp_list_next[n]) {
-        /* Another thread has already done this while we were
-         * outside of the lock; nothing to do in this case */
-        return;
+    qemu_spin_lock(&tb_next->jmp_lock);
+
+    /* make sure the destination TB is valid */
+    if (tb_next->cflags & CF_INVALID) {
+        goto out_unlock_next;
+    }
+    /* Atomically claim the jump destination slot only if it was NULL */
+    old = atomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL, (uintptr_t)tb_next);
+    if (old) {
+        goto out_unlock_next;
     }
+
+    /* patch the native jump address */
+    tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
+
+    /* add in TB jmp list */
+    tb->jmp_list_next[n] = tb_next->jmp_list_head;
+    tb_next->jmp_list_head = (uintptr_t)tb | n;
+
+    qemu_spin_unlock(&tb_next->jmp_lock);
+
     qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
                            "Linking TBs %p [" TARGET_FMT_lx
                            "] index %d -> %p [" TARGET_FMT_lx "]\n",
                            tb->tc.ptr, tb->pc, n,
                            tb_next->tc.ptr, tb_next->pc);
+    return;
 
-    /* patch the native jump address */
-    tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
-
-    /* add in TB jmp circular list */
-    tb->jmp_list_next[n] = tb_next->jmp_list_first;
-    tb_next->jmp_list_first = (uintptr_t)tb | n;
+ out_unlock_next:
+    qemu_spin_unlock(&tb_next->jmp_lock);
+    return;
 }
 
 static inline TranslationBlock *tb_find(CPUState *cpu,
@@ -416,9 +431,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
             tb_lock();
             acquired_tb_lock = true;
         }
-        if (!(tb->cflags & CF_INVALID)) {
-            tb_add_jump(last_tb, tb_exit, tb);
-        }
+        tb_add_jump(last_tb, tb_exit, tb);
     }
     if (acquired_tb_lock) {
         tb_unlock();