7 files changed, 1629 insertions, 0 deletions
diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
index 22642e6f75..228cd84fa4 100644
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -1,4 +1,8 @@
 obj-$(CONFIG_SOFTMMU) += tcg-all.o
 obj-$(CONFIG_SOFTMMU) += cputlb.o
+obj-y += tcg-runtime.o
 obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
 obj-y += translator.o
+
+obj-$(CONFIG_USER_ONLY) += user-exec.o
+obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
new file mode 100644
index 0000000000..b400b2a3d3
--- /dev/null
+++ b/accel/tcg/atomic_template.h
@@ -0,0 +1,215 @@
+/*
+ * Atomic helper templates
+ * Included from tcg-runtime.c and cputlb.c.
+ *
+ * Copyright (c) 2016 Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if DATA_SIZE == 16
+# define SUFFIX     o
+# define DATA_TYPE  Int128
+# define BSWAP      bswap128
+#elif DATA_SIZE == 8
+# define SUFFIX     q
+# define DATA_TYPE  uint64_t
+# define BSWAP      bswap64
+#elif DATA_SIZE == 4
+# define SUFFIX     l
+# define DATA_TYPE  uint32_t
+# define BSWAP      bswap32
+#elif DATA_SIZE == 2
+# define SUFFIX     w
+# define DATA_TYPE  uint16_t
+# define BSWAP      bswap16
+#elif DATA_SIZE == 1
+# define SUFFIX     b
+# define DATA_TYPE  uint8_t
+# define BSWAP
+#else
+# error unsupported data size
+#endif
+
+#if DATA_SIZE >= 4
+# define ABI_TYPE  DATA_TYPE
+#else
+# define ABI_TYPE  uint32_t
+#endif
+
+/* Define host-endian atomic operations.  Note that END is used within
+   the ATOMIC_NAME macro, and redefined below.  */
+#if DATA_SIZE == 1
+# define END
+#elif defined(HOST_WORDS_BIGENDIAN)
+# define END  _be
+#else
+# define END  _le
+#endif
+
+ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
+                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
+}
+
+#if DATA_SIZE >= 16
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+{
+    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    return val;
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
+                     ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+}
+#else
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
+                           ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return atomic_xchg__nocheck(haddr, val);
+}
+
+#define GEN_ATOMIC_HELPER(X)                                        \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
+                 ABI_TYPE val EXTRA_ARGS)                           \
+{                                                                   \
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
+    return atomic_##X(haddr, val);                                  \
+}                                                                   \
+
+GEN_ATOMIC_HELPER(fetch_add)
+GEN_ATOMIC_HELPER(fetch_and)
+GEN_ATOMIC_HELPER(fetch_or)
+GEN_ATOMIC_HELPER(fetch_xor)
+GEN_ATOMIC_HELPER(add_fetch)
+GEN_ATOMIC_HELPER(and_fetch)
+GEN_ATOMIC_HELPER(or_fetch)
+GEN_ATOMIC_HELPER(xor_fetch)
+
+#undef GEN_ATOMIC_HELPER
+#endif /* DATA SIZE >= 16 */
+
+#undef END
+
+#if DATA_SIZE > 1
+
+/* Define reverse-host-endian atomic operations.  Note that END is used
+   within the ATOMIC_NAME macro.  */
+#ifdef HOST_WORDS_BIGENDIAN
+# define END  _le
+#else
+# define END  _be
+#endif
+
+ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
+                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
+}
+
+#if DATA_SIZE >= 16
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+{
+    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    return BSWAP(val);
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
+                     ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    val = BSWAP(val);
+    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+}
+#else
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
+                           ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return BSWAP(atomic_xchg__nocheck(haddr, BSWAP(val)));
+}
+
+#define GEN_ATOMIC_HELPER(X)                                        \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
+                 ABI_TYPE val EXTRA_ARGS)                           \
+{                                                                   \
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
+    return BSWAP(atomic_##X(haddr, BSWAP(val)));                    \
+}
+
+GEN_ATOMIC_HELPER(fetch_and)
+GEN_ATOMIC_HELPER(fetch_or)
+GEN_ATOMIC_HELPER(fetch_xor)
+GEN_ATOMIC_HELPER(and_fetch)
+GEN_ATOMIC_HELPER(or_fetch)
+GEN_ATOMIC_HELPER(xor_fetch)
+
+#undef GEN_ATOMIC_HELPER
+
+/* Note that for addition, we need to use a separate cmpxchg loop instead
+   of bswaps for the reverse-host-endian helpers.  */
+ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
+                         ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    DATA_TYPE ldo, ldn, ret, sto;
+
+    ldo = atomic_read__nocheck(haddr);
+    while (1) {
+        ret = BSWAP(ldo);
+        sto = BSWAP(ret + val);
+        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
+        if (ldn == ldo) {
+            return ret;
+        }
+        ldo = ldn;
+    }
+}
+
+ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
+                         ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    DATA_TYPE ldo, ldn, ret, sto;
+
+    ldo = atomic_read__nocheck(haddr);
+    while (1) {
+        ret = BSWAP(ldo) + val;
+        sto = BSWAP(ret);
+        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
+        if (ldn == ldo) {
+            return ret;
+        }
+        ldo = ldn;
+    }
+}
+#endif /* DATA_SIZE >= 16 */
+
+#undef END
+#endif /* DATA_SIZE > 1 */
+
+#undef BSWAP
+#undef ABI_TYPE
+#undef DATA_TYPE
+#undef SUFFIX
+#undef DATA_SIZE
diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h
new file mode 100644
index 0000000000..d7563292a5
--- /dev/null
+++ b/accel/tcg/softmmu_template.h
@@ -0,0 +1,433 @@
+/*
+ *  Software MMU support
+ *
+ * Generate helpers used by TCG for qemu_ld/st ops and code load
+ * functions.
+ *
+ * Included from target op helpers and exec.c.
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#if DATA_SIZE == 8
+#define SUFFIX q
+#define LSUFFIX q
+#define SDATA_TYPE  int64_t
+#define DATA_TYPE  uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX l
+#define LSUFFIX l
+#define SDATA_TYPE  int32_t
+#define DATA_TYPE  uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX w
+#define LSUFFIX uw
+#define SDATA_TYPE  int16_t
+#define DATA_TYPE  uint16_t
+#elif DATA_SIZE == 1
+#define SUFFIX b
+#define LSUFFIX ub
+#define SDATA_TYPE  int8_t
+#define DATA_TYPE  uint8_t
+#else
+#error unsupported data size
+#endif
+
+
+/* For the benefit of TCG generated code, we want to avoid the complication
+   of ABI-specific return type promotion and always return a value extended
+   to the register size of the host.  This is tcg_target_long, except in the
+   case of a 32-bit host and 64-bit data, and for that we always have
+   uint64_t.  Don't bother with this widened value for SOFTMMU_CODE_ACCESS.  */
+#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
+# define WORD_TYPE  DATA_TYPE
+# define USUFFIX    SUFFIX
+#else
+# define WORD_TYPE  tcg_target_ulong
+# define USUFFIX    glue(u, SUFFIX)
+# define SSUFFIX    glue(s, SUFFIX)
+#endif
+
+#ifdef SOFTMMU_CODE_ACCESS
+#define READ_ACCESS_TYPE MMU_INST_FETCH
+#define ADDR_READ addr_code
+#else
+#define READ_ACCESS_TYPE MMU_DATA_LOAD
+#define ADDR_READ addr_read
+#endif
+
+#if DATA_SIZE == 8
+# define BSWAP(X)  bswap64(X)
+#elif DATA_SIZE == 4
+# define BSWAP(X)  bswap32(X)
+#elif DATA_SIZE == 2
+# define BSWAP(X)  bswap16(X)
+#else
+# define BSWAP(X)  (X)
+#endif
+
+#if DATA_SIZE == 1
+# define helper_le_ld_name  glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
+# define helper_be_ld_name  helper_le_ld_name
+# define helper_le_lds_name glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX)
+# define helper_be_lds_name helper_le_lds_name
+# define helper_le_st_name  glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
+# define helper_be_st_name  helper_le_st_name
+#else
+# define helper_le_ld_name  glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
+# define helper_be_ld_name  glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
+# define helper_le_lds_name glue(glue(helper_le_ld, SSUFFIX), MMUSUFFIX)
+# define helper_be_lds_name glue(glue(helper_be_ld, SSUFFIX), MMUSUFFIX)
+# define helper_le_st_name  glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
+# define helper_be_st_name  glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
+#endif
+
+#ifndef SOFTMMU_CODE_ACCESS
+static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
+                                              size_t mmu_idx, size_t index,
+                                              target_ulong addr,
+                                              uintptr_t retaddr)
+{
+    CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
+    return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, DATA_SIZE);
+}
+#endif
+
+WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
+                            TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    unsigned mmu_idx = get_mmuidx(oi);
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    unsigned a_bits = get_alignment_bits(get_memop(oi));
+    uintptr_t haddr;
+    DATA_TYPE res;
+
+    if (addr & ((1 << a_bits) - 1)) {
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+                             mmu_idx, retaddr);
+    }
+
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        if (!VICTIM_TLB_HIT(ADDR_READ, addr)) {
+            tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+                     mmu_idx, retaddr);
+        }
+        tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    }
+
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr);
+        res = TGT_LE(res);
+        return res;
+    }
+
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                    >= TARGET_PAGE_SIZE)) {
+        target_ulong addr1, addr2;
+        DATA_TYPE res1, res2;
+        unsigned shift;
+    do_unaligned_access:
+        addr1 = addr & ~(DATA_SIZE - 1);
+        addr2 = addr1 + DATA_SIZE;
+        res1 = helper_le_ld_name(env, addr1, oi, retaddr);
+        res2 = helper_le_ld_name(env, addr2, oi, retaddr);
+        shift = (addr & (DATA_SIZE - 1)) * 8;
+
+        /* Little-endian combine.  */
+        res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
+        return res;
+    }
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+#if DATA_SIZE == 1
+    res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr);
+#else
+    res = glue(glue(ld, LSUFFIX), _le_p)((uint8_t *)haddr);
+#endif
+    return res;
+}
+
+#if DATA_SIZE > 1
+WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
+                            TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    unsigned mmu_idx = get_mmuidx(oi);
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    unsigned a_bits = get_alignment_bits(get_memop(oi));
+    uintptr_t haddr;
+    DATA_TYPE res;
+
+    if (addr & ((1 << a_bits) - 1)) {
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+                             mmu_idx, retaddr);
+    }
+
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        if (!VICTIM_TLB_HIT(ADDR_READ, addr)) {
+            tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
+                     mmu_idx, retaddr);
+        }
+        tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    }
+
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr);
+        res = TGT_BE(res);
+        return res;
+    }
+
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                    >= TARGET_PAGE_SIZE)) {
+        target_ulong addr1, addr2;
+        DATA_TYPE res1, res2;
+        unsigned shift;
+    do_unaligned_access:
+        addr1 = addr & ~(DATA_SIZE - 1);
+        addr2 = addr1 + DATA_SIZE;
+        res1 = helper_be_ld_name(env, addr1, oi, retaddr);
+        res2 = helper_be_ld_name(env, addr2, oi, retaddr);
+        shift = (addr & (DATA_SIZE - 1)) * 8;
+
+        /* Big-endian combine.  */
+        res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
+        return res;
+    }
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr);
+    return res;
+}
+#endif /* DATA_SIZE > 1 */
+
+#ifndef SOFTMMU_CODE_ACCESS
+
+/* Provide signed versions of the load routines as well.  We can of course
+   avoid this for 64-bit data, or for 32-bit data on 32-bit host.  */
+#if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
+WORD_TYPE helper_le_lds_name(CPUArchState *env, target_ulong addr,
+                             TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    return (SDATA_TYPE)helper_le_ld_name(env, addr, oi, retaddr);
+}
+
+# if DATA_SIZE > 1
+WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr,
+                             TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    return (SDATA_TYPE)helper_be_ld_name(env, addr, oi, retaddr);
+}
+# endif
+#endif
+
+static inline void glue(io_write, SUFFIX)(CPUArchState *env,
+                                          size_t mmu_idx, size_t index,
+                                          DATA_TYPE val,
+                                          target_ulong addr,
+                                          uintptr_t retaddr)
+{
+    CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
+    return io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, DATA_SIZE);
+}
+
+void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
+                       TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    unsigned mmu_idx = get_mmuidx(oi);
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    unsigned a_bits = get_alignment_bits(get_memop(oi));
+    uintptr_t haddr;
+
+    if (addr & ((1 << a_bits) - 1)) {
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
+                             mmu_idx, retaddr);
+    }
+
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        if (!VICTIM_TLB_HIT(addr_write, addr)) {
+            tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
+        }
+        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    }
+
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        val = TGT_LE(val);
+        glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr);
+        return;
+    }
+
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                     >= TARGET_PAGE_SIZE)) {
+        int i, index2;
+        target_ulong page2, tlb_addr2;
+    do_unaligned_access:
+        /* Ensure the second page is in the TLB.  Note that the first page
+           is already guaranteed to be filled, and that the second page
+           cannot evict the first.  */
+        page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
+        index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+        tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
+        if (page2 != (tlb_addr2 & (TARGET_PAGE_MASK | TLB_INVALID_MASK))
+            && !VICTIM_TLB_HIT(addr_write, page2)) {
+            tlb_fill(ENV_GET_CPU(env), page2, MMU_DATA_STORE,
+                     mmu_idx, retaddr);
+        }
+
+        /* XXX: not efficient, but simple.  */
+        /* This loop must go in the forward direction to avoid issues
+           with self-modifying code in Windows 64-bit.  */
+        for (i = 0; i < DATA_SIZE; ++i) {
+            /* Little-endian extract.  */
+            uint8_t val8 = val >> (i * 8);
+            glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
+                                            oi, retaddr);
+        }
+        return;
+    }
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+#if DATA_SIZE == 1
+    glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
+#else
+    glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
+#endif
+}
+
+#if DATA_SIZE > 1
+void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
+                       TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    unsigned mmu_idx = get_mmuidx(oi);
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    unsigned a_bits = get_alignment_bits(get_memop(oi));
+    uintptr_t haddr;
+
+    if (addr & ((1 << a_bits) - 1)) {
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
+                             mmu_idx, retaddr);
+    }
+
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        if (!VICTIM_TLB_HIT(addr_write, addr)) {
+            tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
+        }
+        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    }
+
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        val = TGT_BE(val);
+        glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr);
+        return;
+    }
+
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                     >= TARGET_PAGE_SIZE)) {
+        int i, index2;
+        target_ulong page2, tlb_addr2;
+    do_unaligned_access:
+        /* Ensure the second page is in the TLB.  Note that the first page
+           is already guaranteed to be filled, and that the second page
+           cannot evict the first.  */
+        page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
+        index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+        tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
+        if (page2 != (tlb_addr2 & (TARGET_PAGE_MASK | TLB_INVALID_MASK))
+            && !VICTIM_TLB_HIT(addr_write, page2)) {
+            tlb_fill(ENV_GET_CPU(env), page2, MMU_DATA_STORE,
+                     mmu_idx, retaddr);
+        }
+
+        /* XXX: not efficient, but simple */
+        /* This loop must go in the forward direction to avoid issues
+           with self-modifying code.  */
+        for (i = 0; i < DATA_SIZE; ++i) {
+            /* Big-endian extract.  */
+            uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
+            glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
+                                            oi, retaddr);
+        }
+        return;
+    }
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
+}
+#endif /* DATA_SIZE > 1 */
+#endif /* !defined(SOFTMMU_CODE_ACCESS) */
+
+#undef READ_ACCESS_TYPE
+#undef DATA_TYPE
+#undef SUFFIX
+#undef LSUFFIX
+#undef DATA_SIZE
+#undef ADDR_READ
+#undef WORD_TYPE
+#undef SDATA_TYPE
+#undef USUFFIX
+#undef SSUFFIX
+#undef BSWAP
+#undef helper_le_ld_name
+#undef helper_be_ld_name
+#undef helper_le_lds_name
+#undef helper_be_lds_name
+#undef helper_le_st_name
+#undef helper_be_st_name
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
new file mode 100644
index 0000000000..aafb171294
--- /dev/null
+++ b/accel/tcg/tcg-runtime.c
@@ -0,0 +1,180 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
+#include "exec/tb-hash.h"
+#include "disas/disas.h"
+#include "exec/log.h"
+
+/* 32-bit helpers */
+
+int32_t HELPER(div_i32)(int32_t arg1, int32_t arg2)
+{
+    return arg1 / arg2;
+}
+
+int32_t HELPER(rem_i32)(int32_t arg1, int32_t arg2)
+{
+    return arg1 % arg2;
+}
+
+uint32_t HELPER(divu_i32)(uint32_t arg1, uint32_t arg2)
+{
+    return arg1 / arg2;
+}
+
+uint32_t HELPER(remu_i32)(uint32_t arg1, uint32_t arg2)
+{
+    return arg1 % arg2;
+}
+
+/* 64-bit helpers */
+
+uint64_t HELPER(shl_i64)(uint64_t arg1, uint64_t arg2)
+{
+    return arg1 << arg2;
+}
+
+uint64_t HELPER(shr_i64)(uint64_t arg1, uint64_t arg2)
+{
+    return arg1 >> arg2;
+}
+
+int64_t HELPER(sar_i64)(int64_t arg1, int64_t arg2)
+{
+    return arg1 >> arg2;
+}
+
+int64_t HELPER(div_i64)(int64_t arg1, int64_t arg2)
+{
+    return arg1 / arg2;
+}
+
+int64_t HELPER(rem_i64)(int64_t arg1, int64_t arg2)
+{
+    return arg1 % arg2;
+}
+
+uint64_t HELPER(divu_i64)(uint64_t arg1, uint64_t arg2)
+{
+    return arg1 / arg2;
+}
+
+uint64_t HELPER(remu_i64)(uint64_t arg1, uint64_t arg2)
+{
+    return arg1 % arg2;
+}
+
+uint64_t HELPER(muluh_i64)(uint64_t arg1, uint64_t arg2)
+{
+    uint64_t l, h;
+    mulu64(&l, &h, arg1, arg2);
+    return h;
+}
+
+int64_t HELPER(mulsh_i64)(int64_t arg1, int64_t arg2)
+{
+    uint64_t l, h;
+    muls64(&l, &h, arg1, arg2);
+    return h;
+}
+
+uint32_t HELPER(clz_i32)(uint32_t arg, uint32_t zero_val)
+{
+    return arg ? clz32(arg) : zero_val;
+}
+
+uint32_t HELPER(ctz_i32)(uint32_t arg, uint32_t zero_val)
+{
+    return arg ? ctz32(arg) : zero_val;
+}
+
+uint64_t HELPER(clz_i64)(uint64_t arg, uint64_t zero_val)
+{
+    return arg ? clz64(arg) : zero_val;
+}
+
+uint64_t HELPER(ctz_i64)(uint64_t arg, uint64_t zero_val)
+{
+    return arg ? ctz64(arg) : zero_val;
+}
+
+uint32_t HELPER(clrsb_i32)(uint32_t arg)
+{
+    return clrsb32(arg);
+}
+
+uint64_t HELPER(clrsb_i64)(uint64_t arg)
+{
+    return clrsb64(arg);
+}
+
+uint32_t HELPER(ctpop_i32)(uint32_t arg)
+{
+    return ctpop32(arg);
+}
+
+uint64_t HELPER(ctpop_i64)(uint64_t arg)
+{
+    return ctpop64(arg);
+}
+
+void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    TranslationBlock *tb;
+    target_ulong cs_base, pc;
+    uint32_t flags, addr_hash;
+
+    addr_hash = tb_jmp_cache_hash_func(addr);
+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[addr_hash]);
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+
+    if (unlikely(!(tb
+                   && tb->pc == addr
+                   && tb->cs_base == cs_base
+                   && tb->flags == flags
+                   && tb->trace_vcpu_dstate == *cpu->trace_dstate))) {
+        tb = tb_htable_lookup(cpu, addr, cs_base, flags);
+        if (!tb) {
+            return tcg_ctx.code_gen_epilogue;
+        }
+        atomic_set(&cpu->tb_jmp_cache[addr_hash], tb);
+    }
+
+    qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
+                           "Chain %p [%d: " TARGET_FMT_lx "] %s\n",
+                           tb->tc_ptr, cpu->cpu_index, addr,
+                           lookup_symbol(addr));
+    return tb->tc_ptr;
+}
+
+void HELPER(exit_atomic)(CPUArchState *env)
+{
+    cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
+}
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
new file mode 100644
index 0000000000..c41d38a557
--- /dev/null
+++ b/accel/tcg/tcg-runtime.h
@@ -0,0 +1,136 @@
+DEF_HELPER_FLAGS_2(div_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32)
+DEF_HELPER_FLAGS_2(rem_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32)
+DEF_HELPER_FLAGS_2(divu_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_2(remu_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+
+DEF_HELPER_FLAGS_2(div_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
+DEF_HELPER_FLAGS_2(rem_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
+DEF_HELPER_FLAGS_2(divu_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(remu_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(shl_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(shr_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
+
+DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
+DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(clz_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_2(ctz_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_2(clz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(ctz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(clrsb_i32, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
+
+DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl)
+
+DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
+
+#ifdef CONFIG_SOFTMMU
+
+DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
+                   i32, env, tl, i32, i32, i32)
+DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
+                   i32, env, tl, i32, i32, i32)
+DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
+                   i32, env, tl, i32, i32, i32)
+DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
+                   i32, env, tl, i32, i32, i32)
+DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
+                   i32, env, tl, i32, i32, i32)
+#ifdef CONFIG_ATOMIC64
+DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
+                   i64, env, tl, i64, i64, i32)
+DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
+                   i64, env, tl, i64, i64, i32)
+#endif
+
+#ifdef CONFIG_ATOMIC64
+#define GEN_ATOMIC_HELPERS(NAME)                                  \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b),              \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le),           \
+                       TCG_CALL_NO_WG, i64, env, tl, i64, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be),           \
+                       TCG_CALL_NO_WG, i64, env, tl, i64, i32)
+#else
+#define GEN_ATOMIC_HELPERS(NAME)                                  \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b),              \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+#endif /* CONFIG_ATOMIC64 */
+
+#else
+
+DEF_HELPER_FLAGS_4(atomic_cmpxchgb, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+DEF_HELPER_FLAGS_4(atomic_cmpxchgw_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+DEF_HELPER_FLAGS_4(atomic_cmpxchgw_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+DEF_HELPER_FLAGS_4(atomic_cmpxchgl_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+DEF_HELPER_FLAGS_4(atomic_cmpxchgl_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+#ifdef CONFIG_ATOMIC64
+DEF_HELPER_FLAGS_4(atomic_cmpxchgq_be, TCG_CALL_NO_WG, i64, env, tl, i64, i64)
+DEF_HELPER_FLAGS_4(atomic_cmpxchgq_le, TCG_CALL_NO_WG, i64, env, tl, i64, i64)
+#endif
+
+#ifdef CONFIG_ATOMIC64
+#define GEN_ATOMIC_HELPERS(NAME)                             \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b),         \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_le),      \
+                       TCG_CALL_NO_WG, i64, env, tl, i64)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_be),      \
+                       TCG_CALL_NO_WG, i64, env, tl, i64)
+#else
+#define GEN_ATOMIC_HELPERS(NAME)                             \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b),         \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)
+#endif /* CONFIG_ATOMIC64 */
+
+#endif /* CONFIG_SOFTMMU */
+
+GEN_ATOMIC_HELPERS(fetch_add)
+GEN_ATOMIC_HELPERS(fetch_and)
+GEN_ATOMIC_HELPERS(fetch_or)
+GEN_ATOMIC_HELPERS(fetch_xor)
+
+GEN_ATOMIC_HELPERS(add_fetch)
+GEN_ATOMIC_HELPERS(and_fetch)
+GEN_ATOMIC_HELPERS(or_fetch)
+GEN_ATOMIC_HELPERS(xor_fetch)
+
+GEN_ATOMIC_HELPERS(xchg)
+
+#undef GEN_ATOMIC_HELPERS
diff --git a/accel/tcg/user-exec-stub.c b/accel/tcg/user-exec-stub.c
new file mode 100644
index 0000000000..dbcf1ade9c
--- /dev/null
+++ b/accel/tcg/user-exec-stub.c
@@ -0,0 +1,34 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qom/cpu.h"
+#include "sysemu/replay.h"
+
+void cpu_resume(CPUState *cpu)
+{
+}
+
+void qemu_init_vcpu(CPUState *cpu)
+{
+}
+
+/* User mode emulation does not support record/replay yet.  */
+
+bool replay_exception(void)
+{
+    return true;
+}
+
+bool replay_has_exception(void)
+{
+    return false;
+}
+
+bool replay_interrupt(void)
+{
+    return true;
+}
+
+bool replay_has_interrupt(void)
+{
+    return false;
+}
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
new file mode 100644
index 0000000000..492ea0826c
--- /dev/null
+++ b/accel/tcg/user-exec.c
@@ -0,0 +1,627 @@
+/*
+ *  User emulator execution
+ *
+ *  Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg.h"
+#include "qemu/bitops.h"
+#include "exec/cpu_ldst.h"
+#include "translate-all.h"
+#include "exec/helper-proto.h"
+
+#undef EAX
+#undef ECX
+#undef EDX
+#undef EBX
+#undef ESP
+#undef EBP
+#undef ESI
+#undef EDI
+#undef EIP
+#ifdef __linux__
+#include <sys/ucontext.h>
+#endif
+
+//#define DEBUG_SIGNAL
+
+/* exit the current TB from a signal handler. The host registers are
+   restored in a state compatible with the CPU emulator
+ */
+static void cpu_exit_tb_from_sighandler(CPUState *cpu, sigset_t *old_set)
+{
+    /* XXX: use siglongjmp ? */
+    sigprocmask(SIG_SETMASK, old_set, NULL);
+    cpu_loop_exit_noexc(cpu);
+}
+
+/* 'pc' is the host PC at which the exception was raised. 'address' is
+   the effective address of the memory exception. 'is_write' is 1 if a
+   write caused the exception and otherwise 0'. 'old_set' is the
+   signal set which should be restored */
+static inline int handle_cpu_signal(uintptr_t pc, unsigned long address,
+                                    int is_write, sigset_t *old_set)
+{
+    CPUState *cpu = current_cpu;
+    CPUClass *cc;
+    int ret;
+
+    /* For synchronous signals we expect to be coming from the vCPU
+     * thread (so current_cpu should be valid) and either from running
+     * code or during translation which can fault as we cross pages.
+     *
+     * If neither is true then something has gone wrong and we should
+     * abort rather than try and restart the vCPU execution.
+     */
+    if (!cpu || !cpu->running) {
+        printf("qemu:%s received signal outside vCPU context @ pc=0x%"
+               PRIxPTR "\n",  __func__, pc);
+        abort();
+    }
+
+#if defined(DEBUG_SIGNAL)
+    printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
+           pc, address, is_write, *(unsigned long *)old_set);
+#endif
+    /* XXX: locking issue */
+    if (is_write && h2g_valid(address)) {
+        switch (page_unprotect(h2g(address), pc)) {
+        case 0:
+            /* Fault not caused by a page marked unwritable to protect
+             * cached translations, must be the guest binary's problem
+             */
+            break;
+        case 1:
+            /* Fault caused by protection of cached translation; TBs
+             * invalidated, so resume execution
+             */
+            return 1;
+        case 2:
+            /* Fault caused by protection of cached translation, and the
+             * currently executing TB was modified and must be exited
+             * immediately.
+             */
+            cpu_exit_tb_from_sighandler(cpu, old_set);
+            g_assert_not_reached();
+        default:
+            g_assert_not_reached();
+        }
+    }
+
+    /* Convert forcefully to guest address space, invalid addresses
+       are still valid segv ones */
+    address = h2g_nocheck(address);
+
+    cc = CPU_GET_CLASS(cpu);
+    /* see if it is an MMU fault */
+    g_assert(cc->handle_mmu_fault);
+    ret = cc->handle_mmu_fault(cpu, address, is_write, MMU_USER_IDX);
+    if (ret < 0) {
+        return 0; /* not an MMU fault */
+    }
+    if (ret == 0) {
+        return 1; /* the MMU fault was handled without causing real CPU fault */
+    }
+
+    /* Now we have a real cpu fault.  Since this is the exact location of
+     * the exception, we must undo the adjustment done by cpu_restore_state
+     * for handling call return addresses.  */
+    cpu_restore_state(cpu, pc + GETPC_ADJ);
+
+    sigprocmask(SIG_SETMASK, old_set, NULL);
+    cpu_loop_exit(cpu);
+
+    /* never comes here */
+    return 1;
+}
+
+#if defined(__i386__)
+
+#if defined(__NetBSD__)
+#include <ucontext.h>
+
+#define EIP_sig(context)     ((context)->uc_mcontext.__gregs[_REG_EIP])
+#define TRAP_sig(context)    ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
+#define ERROR_sig(context)   ((context)->uc_mcontext.__gregs[_REG_ERR])
+#define MASK_sig(context)    ((context)->uc_sigmask)
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
+#include <ucontext.h>
+
+#define EIP_sig(context)  (*((unsigned long *)&(context)->uc_mcontext.mc_eip))
+#define TRAP_sig(context)    ((context)->uc_mcontext.mc_trapno)
+#define ERROR_sig(context)   ((context)->uc_mcontext.mc_err)
+#define MASK_sig(context)    ((context)->uc_sigmask)
+#elif defined(__OpenBSD__)
+#define EIP_sig(context)     ((context)->sc_eip)
+#define TRAP_sig(context)    ((context)->sc_trapno)
+#define ERROR_sig(context)   ((context)->sc_err)
+#define MASK_sig(context)    ((context)->sc_mask)
+#else
+#define EIP_sig(context)     ((context)->uc_mcontext.gregs[REG_EIP])
+#define TRAP_sig(context)    ((context)->uc_mcontext.gregs[REG_TRAPNO])
+#define ERROR_sig(context)   ((context)->uc_mcontext.gregs[REG_ERR])
+#define MASK_sig(context)    ((context)->uc_sigmask)
+#endif
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
+    ucontext_t *uc = puc;
+#elif defined(__OpenBSD__)
+    struct sigcontext *uc = puc;
+#else
+    ucontext_t *uc = puc;
+#endif
+    unsigned long pc;
+    int trapno;
+
+#ifndef REG_EIP
+/* for glibc 2.1 */
+#define REG_EIP    EIP
+#define REG_ERR    ERR
+#define REG_TRAPNO TRAPNO
+#endif
+    pc = EIP_sig(uc);
+    trapno = TRAP_sig(uc);
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             trapno == 0xe ?
+                             (ERROR_sig(uc) >> 1) & 1 : 0,
+                             &MASK_sig(uc));
+}
+
+#elif defined(__x86_64__)
+
+#ifdef __NetBSD__
+#define PC_sig(context)       _UC_MACHINE_PC(context)
+#define TRAP_sig(context)     ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
+#define ERROR_sig(context)    ((context)->uc_mcontext.__gregs[_REG_ERR])
+#define MASK_sig(context)     ((context)->uc_sigmask)
+#elif defined(__OpenBSD__)
+#define PC_sig(context)       ((context)->sc_rip)
+#define TRAP_sig(context)     ((context)->sc_trapno)
+#define ERROR_sig(context)    ((context)->sc_err)
+#define MASK_sig(context)     ((context)->sc_mask)
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
+#include <ucontext.h>
+
+#define PC_sig(context)  (*((unsigned long *)&(context)->uc_mcontext.mc_rip))
+#define TRAP_sig(context)     ((context)->uc_mcontext.mc_trapno)
+#define ERROR_sig(context)    ((context)->uc_mcontext.mc_err)
+#define MASK_sig(context)     ((context)->uc_sigmask)
+#else
+#define PC_sig(context)       ((context)->uc_mcontext.gregs[REG_RIP])
+#define TRAP_sig(context)     ((context)->uc_mcontext.gregs[REG_TRAPNO])
+#define ERROR_sig(context)    ((context)->uc_mcontext.gregs[REG_ERR])
+#define MASK_sig(context)     ((context)->uc_sigmask)
+#endif
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    unsigned long pc;
+#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
+    ucontext_t *uc = puc;
+#elif defined(__OpenBSD__)
+    struct sigcontext *uc = puc;
+#else
+    ucontext_t *uc = puc;
+#endif
+
+    pc = PC_sig(uc);
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             TRAP_sig(uc) == 0xe ?
+                             (ERROR_sig(uc) >> 1) & 1 : 0,
+                             &MASK_sig(uc));
+}
+
+#elif defined(_ARCH_PPC)
+
+/***********************************************************************
+ * signal context platform-specific definitions
+ * From Wine
+ */
+#ifdef linux
+/* All Registers access - only for local access */
+#define REG_sig(reg_name, context)              \
+    ((context)->uc_mcontext.regs->reg_name)
+/* Gpr Registers access  */
+#define GPR_sig(reg_num, context)              REG_sig(gpr[reg_num], context)
+/* Program counter */
+#define IAR_sig(context)                       REG_sig(nip, context)
+/* Machine State Register (Supervisor) */
+#define MSR_sig(context)                       REG_sig(msr, context)
+/* Count register */
+#define CTR_sig(context)                       REG_sig(ctr, context)
+/* User's integer exception register */
+#define XER_sig(context)                       REG_sig(xer, context)
+/* Link register */
+#define LR_sig(context)                        REG_sig(link, context)
+/* Condition register */
+#define CR_sig(context)                        REG_sig(ccr, context)
+
+/* Float Registers access  */
+#define FLOAT_sig(reg_num, context)                                     \
+    (((double *)((char *)((context)->uc_mcontext.regs + 48 * 4)))[reg_num])
+#define FPSCR_sig(context) \
+    (*(int *)((char *)((context)->uc_mcontext.regs + (48 + 32 * 2) * 4)))
+/* Exception Registers access */
+#define DAR_sig(context)                       REG_sig(dar, context)
+#define DSISR_sig(context)                     REG_sig(dsisr, context)
+#define TRAP_sig(context)                      REG_sig(trap, context)
+#endif /* linux */
+
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <ucontext.h>
+#define IAR_sig(context)               ((context)->uc_mcontext.mc_srr0)
+#define MSR_sig(context)               ((context)->uc_mcontext.mc_srr1)
+#define CTR_sig(context)               ((context)->uc_mcontext.mc_ctr)
+#define XER_sig(context)               ((context)->uc_mcontext.mc_xer)
+#define LR_sig(context)                ((context)->uc_mcontext.mc_lr)
+#define CR_sig(context)                ((context)->uc_mcontext.mc_cr)
+/* Exception Registers access */
+#define DAR_sig(context)               ((context)->uc_mcontext.mc_dar)
+#define DSISR_sig(context)             ((context)->uc_mcontext.mc_dsisr)
+#define TRAP_sig(context)              ((context)->uc_mcontext.mc_exc)
+#endif /* __FreeBSD__|| __FreeBSD_kernel__ */
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+    ucontext_t *uc = puc;
+#else
+    ucontext_t *uc = puc;
+#endif
+    unsigned long pc;
+    int is_write;
+
+    pc = IAR_sig(uc);
+    is_write = 0;
+#if 0
+    /* ppc 4xx case */
+    if (DSISR_sig(uc) & 0x00800000) {
+        is_write = 1;
+    }
+#else
+    if (TRAP_sig(uc) != 0x400 && (DSISR_sig(uc) & 0x02000000)) {
+        is_write = 1;
+    }
+#endif
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write, &uc->uc_sigmask);
+}
+
+#elif defined(__alpha__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                           void *puc)
+{
+    siginfo_t *info = pinfo;
+    ucontext_t *uc = puc;
+    uint32_t *pc = uc->uc_mcontext.sc_pc;
+    uint32_t insn = *pc;
+    int is_write = 0;
+
+    /* XXX: need kernel patch to get write flag faster */
+    switch (insn >> 26) {
+    case 0x0d: /* stw */
+    case 0x0e: /* stb */
+    case 0x0f: /* stq_u */
+    case 0x24: /* stf */
+    case 0x25: /* stg */
+    case 0x26: /* sts */
+    case 0x27: /* stt */
+    case 0x2c: /* stl */
+    case 0x2d: /* stq */
+    case 0x2e: /* stl_c */
+    case 0x2f: /* stq_c */
+        is_write = 1;
+    }
+
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write, &uc->uc_sigmask);
+}
+#elif defined(__sparc__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    int is_write;
+    uint32_t insn;
+#if !defined(__arch64__) || defined(CONFIG_SOLARIS)
+    uint32_t *regs = (uint32_t *)(info + 1);
+    void *sigmask = (regs + 20);
+    /* XXX: is there a standard glibc define ? */
+    unsigned long pc = regs[1];
+#else
+#ifdef __linux__
+    struct sigcontext *sc = puc;
+    unsigned long pc = sc->sigc_regs.tpc;
+    void *sigmask = (void *)sc->sigc_mask;
+#elif defined(__OpenBSD__)
+    struct sigcontext *uc = puc;
+    unsigned long pc = uc->sc_pc;
+    void *sigmask = (void *)(long)uc->sc_mask;
+#elif defined(__NetBSD__)
+    ucontext_t *uc = puc;
+    unsigned long pc = _UC_MACHINE_PC(uc);
+    void *sigmask = (void *)&uc->uc_sigmask;
+#endif
+#endif
+
+    /* XXX: need kernel patch to get write flag faster */
+    is_write = 0;
+    insn = *(uint32_t *)pc;
+    if ((insn >> 30) == 3) {
+        switch ((insn >> 19) & 0x3f) {
+        case 0x05: /* stb */
+        case 0x15: /* stba */
+        case 0x06: /* sth */
+        case 0x16: /* stha */
+        case 0x04: /* st */
+        case 0x14: /* sta */
+        case 0x07: /* std */
+        case 0x17: /* stda */
+        case 0x0e: /* stx */
+        case 0x1e: /* stxa */
+        case 0x24: /* stf */
+        case 0x34: /* stfa */
+        case 0x27: /* stdf */
+        case 0x37: /* stdfa */
+        case 0x26: /* stqf */
+        case 0x36: /* stqfa */
+        case 0x25: /* stfsr */
+        case 0x3c: /* casa */
+        case 0x3e: /* casxa */
+            is_write = 1;
+            break;
+        }
+    }
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write, sigmask);
+}
+
+#elif defined(__arm__)
+
+#if defined(__NetBSD__)
+#include <ucontext.h>
+#endif
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+#if defined(__NetBSD__)
+    ucontext_t *uc = puc;
+#else
+    ucontext_t *uc = puc;
+#endif
+    unsigned long pc;
+    int is_write;
+
+#if defined(__NetBSD__)
+    pc = uc->uc_mcontext.__gregs[_REG_R15];
+#elif defined(__GLIBC__) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3))
+    pc = uc->uc_mcontext.gregs[R15];
+#else
+    pc = uc->uc_mcontext.arm_pc;
+#endif
+
+    /* error_code is the FSR value, in which bit 11 is WnR (assuming a v6 or
+     * later processor; on v5 we will always report this as a read).
+     */
+    is_write = extract32(uc->uc_mcontext.error_code, 11, 1);
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write,
+                             &uc->uc_sigmask);
+}
+
+#elif defined(__aarch64__)
+
+int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
+{
+    siginfo_t *info = pinfo;
+    ucontext_t *uc = puc;
+    uintptr_t pc = uc->uc_mcontext.pc;
+    uint32_t insn = *(uint32_t *)pc;
+    bool is_write;
+
+    /* XXX: need kernel patch to get write flag faster.  */
+    is_write = (   (insn & 0xbfff0000) == 0x0c000000   /* C3.3.1 */
+                || (insn & 0xbfe00000) == 0x0c800000   /* C3.3.2 */
+                || (insn & 0xbfdf0000) == 0x0d000000   /* C3.3.3 */
+                || (insn & 0xbfc00000) == 0x0d800000   /* C3.3.4 */
+                || (insn & 0x3f400000) == 0x08000000   /* C3.3.6 */
+                || (insn & 0x3bc00000) == 0x39000000   /* C3.3.13 */
+                || (insn & 0x3fc00000) == 0x3d800000   /* ... 128bit */
+                /* Ingore bits 10, 11 & 21, controlling indexing.  */
+                || (insn & 0x3bc00000) == 0x38000000   /* C3.3.8-12 */
+                || (insn & 0x3fe00000) == 0x3c800000   /* ... 128bit */
+                /* Ignore bits 23 & 24, controlling indexing.  */
+                || (insn & 0x3a400000) == 0x28000000); /* C3.3.7,14-16 */
+
+    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+                             is_write, &uc->uc_sigmask);
+}
+
+#elif defined(__ia64)
+
+#ifndef __ISR_VALID
+  /* This ought to be in <bits/siginfo.h>... */
+# define __ISR_VALID    1
+#endif
+
+int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
+{
+    siginfo_t *info = pinfo;
+    ucontext_t *uc = puc;
+    unsigned long ip;
+    int is_write = 0;
+
+    ip = uc->uc_mcontext.sc_ip;
+    switch (host_signum) {
+    case SIGILL:
+    case SIGFPE:
+    case SIGSEGV:
+    case SIGBUS:
+    case SIGTRAP:
+        if (info->si_code && (info->si_segvflags & __ISR_VALID)) {
+            /* ISR.W (write-access) is bit 33:  */
+            is_write = (info->si_isr >> 33) & 1;
+        }
+        break;
+
+    default:
+        break;
+    }
+    return handle_cpu_signal(ip, (unsigned long)info->si_addr,
+                             is_write,
+                             (sigset_t *)&uc->uc_sigmask);
+}
+
+#elif defined(__s390__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    ucontext_t *uc = puc;
+    unsigned long pc;
+    uint16_t *pinsn;
+    int is_write = 0;
+
+    pc = uc->uc_mcontext.psw.addr;
+
+    /* ??? On linux, the non-rt signal handler has 4 (!) arguments instead
+       of the normal 2 arguments.  The 3rd argument contains the "int_code"
+       from the hardware which does in fact contain the is_write value.
+       The rt signal handler, as far as I can tell, does not give this value
+       at all.  Not that we could get to it from here even if it were.  */
+    /* ??? This is not even close to complete, since it ignores all
+       of the read-modify-write instructions.  */
+    pinsn = (uint16_t *)pc;
+    switch (pinsn[0] >> 8) {
+    case 0x50: /* ST */
+    case 0x42: /* STC */
+    case 0x40: /* STH */
+        is_write = 1;
+        break;
+    case 0xc4: /* RIL format insns */
+        switch (pinsn[0] & 0xf) {
+        case 0xf: /* STRL */
+        case 0xb: /* STGRL */
+        case 0x7: /* STHRL */
+            is_write = 1;
+        }
+        break;
+    case 0xe3: /* RXY format insns */
+        switch (pinsn[2] & 0xff) {
+        case 0x50: /* STY */
+        case 0x24: /* STG */
+        case 0x72: /* STCY */
+        case 0x70: /* STHY */
+        case 0x8e: /* STPQ */
+        case 0x3f: /* STRVH */
+        case 0x3e: /* STRV */
+        case 0x2f: /* STRVG */
+            is_write = 1;
+        }
+        break;
+    }
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write, &uc->uc_sigmask);
+}
+
+#elif defined(__mips__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+                       void *puc)
+{
+    siginfo_t *info = pinfo;
+    ucontext_t *uc = puc;
+    greg_t pc = uc->uc_mcontext.pc;
+    int is_write;
+
+    /* XXX: compute is_write */
+    is_write = 0;
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
+                             is_write, &uc->uc_sigmask);
+}
+
+#else
+
+#error host CPU specific signal handler needed
+
+#endif
+
+/* The softmmu versions of these helpers are in cputlb.c.  */
+
+/* Do not allow unaligned operations to proceed.  Return the host address.  */
+static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
+                               int size, uintptr_t retaddr)
+{
+    /* Enforce qemu required alignment.  */
+    if (unlikely(addr & (size - 1))) {
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
+    }
+    return g2h(addr);
+}
+
+/* Macro to call the above, with local variables from the use context.  */
+#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
+
+#define ATOMIC_NAME(X)   HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
+#define EXTRA_ARGS
+
+#define DATA_SIZE 1
+#include "atomic_template.h"
+
+#define DATA_SIZE 2
+#include "atomic_template.h"
+
+#define DATA_SIZE 4
+#include "atomic_template.h"
+
+#ifdef CONFIG_ATOMIC64
+#define DATA_SIZE 8
+#include "atomic_template.h"
+#endif
+
+/* The following is only callable from other helpers, and matches up
+   with the softmmu version.  */
+
+#ifdef CONFIG_ATOMIC128
+
+#undef EXTRA_ARGS
+#undef ATOMIC_NAME
+#undef ATOMIC_MMU_LOOKUP
+
+#define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
+#define ATOMIC_NAME(X) \
+    HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
+#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr)
+
+#define DATA_SIZE 16
+#include "atomic_template.h"
+#endif /* CONFIG_ATOMIC128 */