22 files changed, 585 insertions, 612 deletions
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index b08078e7fc..a953897fcc 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -201,7 +201,6 @@ static void alpha_cpu_initfn(Object *obj)
     CPUAlphaState *env = &cpu->env;
 
     cs->env_ptr = env;
-    tlb_flush(cs);
 
     env->lock_addr = -1;
 #if defined(CONFIG_USER_ONLY)
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 7f6ad3000b..61799d20e1 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -30,6 +30,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
+#include "qemu/atomic128.h"
 #include "tcg.h"
 #include "fpu/softfloat.h"
 #include <zlib.h> /* For crc32 */
@@ -509,189 +510,187 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
     return crc32c(acc, buf, bytes) ^ 0xffffffff;
 }
 
-/* Returns 0 on success; 1 otherwise.  */
-static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr,
-                                       uint64_t new_lo, uint64_t new_hi,
-                                       bool parallel, uintptr_t ra)
+uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
+                                     uint64_t new_lo, uint64_t new_hi)
 {
-    Int128 oldv, cmpv, newv;
+    Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    Int128 newv = int128_make128(new_lo, new_hi);
+    Int128 oldv;
+    uintptr_t ra = GETPC();
+    uint64_t o0, o1;
     bool success;
 
-    cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
-    newv = int128_make128(new_lo, new_hi);
-
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
-        oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
-        success = int128_eq(oldv, cmpv);
-#endif
-    } else {
-        uint64_t o0, o1;
-
 #ifdef CONFIG_USER_ONLY
-        /* ??? Enforce alignment.  */
-        uint64_t *haddr = g2h(addr);
-
-        helper_retaddr = ra;
-        o0 = ldq_le_p(haddr + 0);
-        o1 = ldq_le_p(haddr + 1);
-        oldv = int128_make128(o0, o1);
-
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            stq_le_p(haddr + 0, int128_getlo(newv));
-            stq_le_p(haddr + 1, int128_gethi(newv));
-        }
-        helper_retaddr = 0;
+    /* ??? Enforce alignment.  */
+    uint64_t *haddr = g2h(addr);
+
+    helper_retaddr = ra;
+    o0 = ldq_le_p(haddr + 0);
+    o1 = ldq_le_p(haddr + 1);
+    oldv = int128_make128(o0, o1);
+
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        stq_le_p(haddr + 0, int128_getlo(newv));
+        stq_le_p(haddr + 1, int128_gethi(newv));
+    }
+    helper_retaddr = 0;
 #else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
-        TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
-
-        o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
-        o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
-        oldv = int128_make128(o0, o1);
-
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
-            helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
-        }
-#endif
+    int mem_idx = cpu_mmu_index(env, false);
+    TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+    TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
+
+    o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
+    o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
+    oldv = int128_make128(o0, o1);
+
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
+        helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
     }
+#endif
 
     return !success;
 }
 
-uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
-                                              uint64_t new_lo, uint64_t new_hi)
-{
-    return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC());
-}
-
 uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
                                               uint64_t new_lo, uint64_t new_hi)
 {
-    return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC());
-}
-
-static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr,
-                                       uint64_t new_lo, uint64_t new_hi,
-                                       bool parallel, uintptr_t ra)
-{
     Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
     bool success;
+    int mem_idx;
+    TCGMemOpIdx oi;
 
-    /* high and low need to be switched here because this is not actually a
-     * 128bit store but two doublewords stored consecutively
-     */
-    cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
-    newv = int128_make128(new_hi, new_lo);
+    assert(HAVE_CMPXCHG128);
 
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
-        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-        success = int128_eq(oldv, cmpv);
-#endif
-    } else {
-        uint64_t o0, o1;
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 
-#ifdef CONFIG_USER_ONLY
-        /* ??? Enforce alignment.  */
-        uint64_t *haddr = g2h(addr);
-
-        helper_retaddr = ra;
-        o1 = ldq_be_p(haddr + 0);
-        o0 = ldq_be_p(haddr + 1);
-        oldv = int128_make128(o0, o1);
-
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            stq_be_p(haddr + 0, int128_gethi(newv));
-            stq_be_p(haddr + 1, int128_getlo(newv));
-        }
-        helper_retaddr = 0;
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
-        TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
-
-        o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
-        o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
-        oldv = int128_make128(o0, o1);
-
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
-            helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
-        }
-#endif
-    }
+    cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    newv = int128_make128(new_lo, new_hi);
+    oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 
+    success = int128_eq(oldv, cmpv);
     return !success;
 }
 
 uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
                                      uint64_t new_lo, uint64_t new_hi)
 {
-    return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC());
+    /*
+     * High and low need to be switched here because this is not actually a
+     * 128bit store but two doublewords stored consecutively
+     */
+    Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    Int128 newv = int128_make128(new_lo, new_hi);
+    Int128 oldv;
+    uintptr_t ra = GETPC();
+    uint64_t o0, o1;
+    bool success;
+
+#ifdef CONFIG_USER_ONLY
+    /* ??? Enforce alignment.  */
+    uint64_t *haddr = g2h(addr);
+
+    helper_retaddr = ra;
+    o1 = ldq_be_p(haddr + 0);
+    o0 = ldq_be_p(haddr + 1);
+    oldv = int128_make128(o0, o1);
+
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        stq_be_p(haddr + 0, int128_gethi(newv));
+        stq_be_p(haddr + 1, int128_getlo(newv));
+    }
+    helper_retaddr = 0;
+#else
+    int mem_idx = cpu_mmu_index(env, false);
+    TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
+    TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
+
+    o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
+    o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
+    oldv = int128_make128(o0, o1);
+
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
+        helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
+    }
+#endif
+
+    return !success;
 }
 
 uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
-                                     uint64_t new_lo, uint64_t new_hi)
+                                              uint64_t new_lo, uint64_t new_hi)
 {
-    return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
+    Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
+    bool success;
+    int mem_idx;
+    TCGMemOpIdx oi;
+
+    assert(HAVE_CMPXCHG128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
+
+    /*
+     * High and low need to be switched here because this is not actually a
+     * 128bit store but two doublewords stored consecutively
+     */
+    cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
+    newv = int128_make128(new_hi, new_lo);
+    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+
+    success = int128_eq(oldv, cmpv);
+    return !success;
 }
 
 /* Writes back the old data into Rs.  */
 void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
                               uint64_t new_lo, uint64_t new_hi)
 {
-    uintptr_t ra = GETPC();
-#ifndef CONFIG_ATOMIC128
-    cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
     Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
+    int mem_idx;
+    TCGMemOpIdx oi;
+
+    assert(HAVE_CMPXCHG128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 
     cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
     newv = int128_make128(new_lo, new_hi);
-
-    int mem_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
     oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 
     env->xregs[rs] = int128_getlo(oldv);
     env->xregs[rs + 1] = int128_gethi(oldv);
-#endif
 }
 
 void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
                               uint64_t new_hi, uint64_t new_lo)
 {
-    uintptr_t ra = GETPC();
-#ifndef CONFIG_ATOMIC128
-    cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
     Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
+    int mem_idx;
+    TCGMemOpIdx oi;
+
+    assert(HAVE_CMPXCHG128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 
     cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
     newv = int128_make128(new_lo, new_hi);
-
-    int mem_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
     oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
 
     env->xregs[rs + 1] = int128_getlo(oldv);
     env->xregs[rs] = int128_gethi(oldv);
-#endif
 }
 
 /*
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8a24278d79..bb9c4d8ac7 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -37,6 +37,7 @@
 
 #include "trace-tcg.h"
 #include "translate-a64.h"
+#include "qemu/atomic128.h"
 
 static TCGv_i64 cpu_X[32];
 static TCGv_i64 cpu_pc;
@@ -2086,26 +2087,27 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
                                        get_mem_index(s),
                                        MO_64 | MO_ALIGN | s->be_data);
             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
-        } else if (s->be_data == MO_LE) {
-            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+            if (!HAVE_CMPXCHG128) {
+                gen_helper_exit_atomic(cpu_env);
+                s->base.is_jmp = DISAS_NORETURN;
+            } else if (s->be_data == MO_LE) {
                 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
                                                         cpu_exclusive_addr,
                                                         cpu_reg(s, rt),
                                                         cpu_reg(s, rt2));
             } else {
-                gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
-                                               cpu_reg(s, rt), cpu_reg(s, rt2));
-            }
-        } else {
-            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
                 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
                                                         cpu_exclusive_addr,
                                                         cpu_reg(s, rt),
                                                         cpu_reg(s, rt2));
-            } else {
-                gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
-                                               cpu_reg(s, rt), cpu_reg(s, rt2));
             }
+        } else if (s->be_data == MO_LE) {
+            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
+                                           cpu_reg(s, rt), cpu_reg(s, rt2));
+        } else {
+            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
+                                           cpu_reg(s, rt), cpu_reg(s, rt2));
         }
     } else {
         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
@@ -2175,14 +2177,18 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
         }
         tcg_temp_free_i64(cmp);
     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
-        TCGv_i32 tcg_rs = tcg_const_i32(rs);
-
-        if (s->be_data == MO_LE) {
-            gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+        if (HAVE_CMPXCHG128) {
+            TCGv_i32 tcg_rs = tcg_const_i32(rs);
+            if (s->be_data == MO_LE) {
+                gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+            } else {
+                gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+            }
+            tcg_temp_free_i32(tcg_rs);
         } else {
-            gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+            gen_helper_exit_atomic(cpu_env);
+            s->base.is_jmp = DISAS_NORETURN;
         }
-        tcg_temp_free_i32(tcg_rs);
     } else {
         TCGv_i64 d1 = tcg_temp_new_i64();
         TCGv_i64 d2 = tcg_temp_new_i64();
diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs
index 04678f5503..32bf966300 100644
--- a/target/i386/Makefile.objs
+++ b/target/i386/Makefile.objs
@@ -3,17 +3,20 @@ obj-$(CONFIG_TCG) += translate.o
 obj-$(CONFIG_TCG) += bpt_helper.o cc_helper.o excp_helper.o fpu_helper.o
 obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o mpx_helper.o
 obj-$(CONFIG_TCG) += seg_helper.o smm_helper.o svm_helper.o
-obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o
-obj-$(CONFIG_KVM) += kvm.o hyperv.o
-obj-$(CONFIG_SEV) += sev.o
+ifeq ($(CONFIG_SOFTMMU),y)
+obj-y += machine.o arch_memory_mapping.o arch_dump.o monitor.o
+obj-$(CONFIG_KVM) += kvm.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
-obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o
-# HAX support
-ifdef CONFIG_WIN32
+obj-$(CONFIG_HYPERV) += hyperv.o
+obj-$(call lnot,$(CONFIG_HYPERV)) += hyperv-stub.o
+ifeq ($(CONFIG_WIN32),y)
 obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-windows.o
 endif
-ifdef CONFIG_DARWIN
+ifeq ($(CONFIG_DARWIN),y)
 obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-darwin.o
 obj-$(CONFIG_HVF) += hvf/
 endif
 obj-$(CONFIG_WHPX) += whpx-all.o
+endif
+obj-$(CONFIG_SEV) += sev.o
+obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index c88876dfe3..f0d9f7cf49 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5564,6 +5564,7 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false),
     DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false),
     DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false),
+    DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false),
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
     DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
@@ -5606,6 +5607,8 @@ static Property x86_cpu_properties[] = {
      * to the specific Windows version being used."
      */
     DEFINE_PROP_INT32("x-hv-max-vps", X86CPU, hv_max_vps, -1),
+    DEFINE_PROP_BOOL("x-hv-synic-kvm-only", X86CPU, hyperv_synic_kvm_only,
+                     false),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 730c06f80a..663f3a5e67 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1378,10 +1378,12 @@ struct X86CPU {
     bool hyperv_vpindex;
     bool hyperv_runtime;
     bool hyperv_synic;
+    bool hyperv_synic_kvm_only;
     bool hyperv_stimer;
     bool hyperv_frequencies;
     bool hyperv_reenlightenment;
     bool hyperv_tlbflush;
+    bool hyperv_ipi;
     bool check_cpuid;
     bool enforce_cpuid;
     bool expose_kvm;
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 9f52bc413a..e193022c03 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -73,7 +73,6 @@
 #include "target/i386/cpu.h"
 
 HVFState *hvf_state;
-int hvf_disabled = 1;
 
 static void assert_hvf_ok(hv_return_t ret)
 {
@@ -604,11 +603,6 @@ int hvf_init_vcpu(CPUState *cpu)
     return 0;
 }
 
-void hvf_disable(int shouldDisable)
-{
-    hvf_disabled = shouldDisable;
-}
-
 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
@@ -934,7 +928,7 @@ int hvf_vcpu_exec(CPUState *cpu)
     return ret;
 }
 
-static bool hvf_allowed;
+bool hvf_allowed;
 
 static int hvf_accel_init(MachineState *ms)
 {
@@ -942,7 +936,6 @@ static int hvf_accel_init(MachineState *ms)
     hv_return_t ret;
     HVFState *s;
 
-    hvf_disable(0);
     ret = hv_vm_create(HV_VM_DEFAULT);
     assert_hvf_ok(ret);
 
diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c
index 2d7540fe7c..2e33b69541 100644
--- a/target/i386/hvf/x86_decode.c
+++ b/target/i386/hvf/x86_decode.c
@@ -113,7 +113,8 @@ static void decode_modrm_reg(CPUX86State *env, struct x86_decode *decode,
 {
     op->type = X86_VAR_REG;
     op->reg = decode->modrm.reg;
-    op->ptr = get_reg_ref(env, op->reg, decode->rex.r, decode->operand_size);
+    op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r,
+                          decode->operand_size);
 }
 
 static void decode_rax(CPUX86State *env, struct x86_decode *decode,
@@ -121,7 +122,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode,
 {
     op->type = X86_VAR_REG;
     op->reg = R_EAX;
-    op->ptr = get_reg_ref(env, op->reg, 0, decode->operand_size);
+    op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0,
+                          decode->operand_size);
 }
 
 static inline void decode_immediate(CPUX86State *env, struct x86_decode *decode,
@@ -263,16 +265,16 @@ static void decode_incgroup(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0x40;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_decgroup(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0x48;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_incgroup2(CPUX86State *env, struct x86_decode *decode)
@@ -288,16 +290,16 @@ static void decode_pushgroup(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0x50;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_popgroup(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0x58;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_jxx(CPUX86State *env, struct x86_decode *decode)
@@ -378,16 +380,16 @@ static void decode_xchgroup(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0x90;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_movgroup(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0xb8;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
     decode_immediate(env, decode, &decode->op[1], decode->operand_size);
 }
 
@@ -402,8 +404,8 @@ static void decode_movgroup8(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0xb0;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
     decode_immediate(env, decode, &decode->op[1], decode->operand_size);
 }
 
@@ -412,7 +414,8 @@ static void decode_rcx(CPUX86State *env, struct x86_decode *decode,
 {
     op->type = X86_VAR_REG;
     op->reg = R_ECX;
-    op->ptr = get_reg_ref(env, op->reg, decode->rex.b, decode->operand_size);
+    op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b,
+                          decode->operand_size);
 }
 
 struct decode_tbl {
@@ -639,8 +642,8 @@ static void decode_bswap(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[1] - 0xc8;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_d9_4(CPUX86State *env, struct x86_decode *decode)
@@ -1686,7 +1689,8 @@ calc_addr:
     }
 }
 
-target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size)
+target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended,
+                         int size)
 {
     target_ulong ptr = 0;
     int which = 0;
@@ -1698,7 +1702,7 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size)
 
     switch (size) {
     case 1:
-        if (is_extended || reg < 4) {
+        if (is_extended || reg < 4 || rex) {
             which = 1;
             ptr = (target_ulong)&RL(env, reg);
         } else {
@@ -1714,10 +1718,11 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size)
     return ptr;
 }
 
-target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size)
+target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended,
+                         int size)
 {
     target_ulong val = 0;
-    memcpy(&val, (void *)get_reg_ref(env, reg, is_extended, size), size);
+    memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size);
     return val;
 }
 
@@ -1739,7 +1744,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode,
         if (base_reg == R_ESP || base_reg == R_EBP) {
             *sel = R_SS;
         }
-        base = get_reg_val(env, decode->sib.base, decode->rex.b, addr_size);
+        base = get_reg_val(env, decode->sib.base, decode->rex.rex,
+                           decode->rex.b, addr_size);
     }
 
     if (decode->rex.x) {
@@ -1747,7 +1753,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode,
     }
 
     if (index_reg != R_ESP) {
-        scaled_index = get_reg_val(env, index_reg, decode->rex.x, addr_size) <<
+        scaled_index = get_reg_val(env, index_reg, decode->rex.rex,
+                                   decode->rex.x, addr_size) <<
                                    decode->sib.scale;
     }
     return base + scaled_index;
@@ -1776,7 +1783,8 @@ void calc_modrm_operand32(CPUX86State *env, struct x86_decode *decode,
         if (decode->modrm.rm == R_EBP || decode->modrm.rm == R_ESP) {
             seg = R_SS;
         }
-        ptr += get_reg_val(env, decode->modrm.rm, decode->rex.b, addr_size);
+        ptr += get_reg_val(env, decode->modrm.rm, decode->rex.rex,
+                           decode->rex.b, addr_size);
     }
 
     if (X86_DECODE_CMD_LEA == decode->cmd) {
@@ -1805,7 +1813,8 @@ void calc_modrm_operand64(CPUX86State *env, struct x86_decode *decode,
     } else if (0 == mod && 5 == rm) {
         ptr = RIP(env) + decode->len + (int32_t) offset;
     } else {
-        ptr = get_reg_val(env, src, decode->rex.b, 8) + (int64_t) offset;
+        ptr = get_reg_val(env, src, decode->rex.rex, decode->rex.b, 8) +
+              (int64_t) offset;
     }
 
     if (X86_DECODE_CMD_LEA == decode->cmd) {
@@ -1822,8 +1831,8 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode,
     if (3 == decode->modrm.mod) {
         op->reg = decode->modrm.reg;
         op->type = X86_VAR_REG;
-        op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.b,
-                              decode->operand_size);
+        op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex,
+                              decode->rex.b, decode->operand_size);
         return;
     }
 
diff --git a/target/i386/hvf/x86_decode.h b/target/i386/hvf/x86_decode.h
index 5ab6f31fa5..ef4bcab310 100644
--- a/target/i386/hvf/x86_decode.h
+++ b/target/i386/hvf/x86_decode.h
@@ -303,8 +303,10 @@ uint64_t sign(uint64_t val, int size);
 
 uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode);
 
-target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size);
-target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size);
+target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended,
+                         int size);
+target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended,
+                         int size);
 void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode,
                         struct x86_decode_op *op);
 target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode,
diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h
index d6d5a79293..8c572cd7c2 100644
--- a/target/i386/hyperv-proto.h
+++ b/target/i386/hyperv-proto.h
@@ -1,7 +1,7 @@
 /*
- * Definitions for Hyper-V guest/hypervisor interaction
+ * Definitions for Hyper-V guest/hypervisor interaction - x86-specific part
  *
- * Copyright (C) 2017 Parallels International GmbH
+ * Copyright (c) 2017-2018 Virtuozzo International GmbH.
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -10,7 +10,7 @@
 #ifndef TARGET_I386_HYPERV_PROTO_H
 #define TARGET_I386_HYPERV_PROTO_H
 
-#include "qemu/bitmap.h"
+#include "hw/hyperv/hyperv-proto.h"
 
 #define HV_CPUID_VENDOR_AND_MAX_FUNCTIONS     0x40000000
 #define HV_CPUID_INTERFACE                    0x40000001
@@ -58,6 +58,7 @@
 #define HV_APIC_ACCESS_RECOMMENDED          (1u << 3)
 #define HV_SYSTEM_RESET_RECOMMENDED         (1u << 4)
 #define HV_RELAXED_TIMING_RECOMMENDED       (1u << 5)
+#define HV_CLUSTER_IPI_RECOMMENDED          (1u << 10)
 #define HV_EX_PROCESSOR_MASKS_RECOMMENDED   (1u << 11)
 
 /*
@@ -138,25 +139,6 @@
 #define HV_X64_MSR_TSC_EMULATION_STATUS         0x40000108
 
 /*
- * Hypercall status code
- */
-#define HV_STATUS_SUCCESS                     0
-#define HV_STATUS_INVALID_HYPERCALL_CODE      2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT     3
-#define HV_STATUS_INVALID_ALIGNMENT           4
-#define HV_STATUS_INVALID_PARAMETER           5
-#define HV_STATUS_INSUFFICIENT_MEMORY         11
-#define HV_STATUS_INVALID_CONNECTION_ID       18
-#define HV_STATUS_INSUFFICIENT_BUFFERS        19
-
-/*
- * Hypercall numbers
- */
-#define HV_POST_MESSAGE                       0x005c
-#define HV_SIGNAL_EVENT                       0x005d
-#define HV_HYPERCALL_FAST                     (1u << 16)
-
-/*
  * Hypercall MSR bits
  */
 #define HV_HYPERCALL_ENABLE                   (1u << 0)
@@ -165,7 +147,6 @@
  * Synthetic interrupt controller definitions
  */
 #define HV_SYNIC_VERSION                      1
-#define HV_SINT_COUNT                         16
 #define HV_SYNIC_ENABLE                       (1u << 0)
 #define HV_SIMP_ENABLE                        (1u << 0)
 #define HV_SIEFP_ENABLE                       (1u << 0)
@@ -175,94 +156,5 @@
 
 #define HV_STIMER_COUNT                       4
 
-/*
- * Message size
- */
-#define HV_MESSAGE_PAYLOAD_SIZE               240
-
-/*
- * Message types
- */
-#define HV_MESSAGE_NONE                       0x00000000
-#define HV_MESSAGE_VMBUS                      0x00000001
-#define HV_MESSAGE_UNMAPPED_GPA               0x80000000
-#define HV_MESSAGE_GPA_INTERCEPT              0x80000001
-#define HV_MESSAGE_TIMER_EXPIRED              0x80000010
-#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE  0x80000020
-#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION    0x80000021
-#define HV_MESSAGE_UNSUPPORTED_FEATURE        0x80000022
-#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE    0x80000040
-#define HV_MESSAGE_X64_IOPORT_INTERCEPT       0x80010000
-#define HV_MESSAGE_X64_MSR_INTERCEPT          0x80010001
-#define HV_MESSAGE_X64_CPUID_INTERCEPT        0x80010002
-#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT    0x80010003
-#define HV_MESSAGE_X64_APIC_EOI               0x80010004
-#define HV_MESSAGE_X64_LEGACY_FP_ERROR        0x80010005
-
-/*
- * Message flags
- */
-#define HV_MESSAGE_FLAG_PENDING               0x1
-
-/*
- * Event flags number per SINT
- */
-#define HV_EVENT_FLAGS_COUNT                  (256 * 8)
-
-/*
- * Connection id valid bits
- */
-#define HV_CONNECTION_ID_MASK                 0x00ffffff
-
-/*
- * Input structure for POST_MESSAGE hypercall
- */
-struct hyperv_post_message_input {
-    uint32_t connection_id;
-    uint32_t _reserved;
-    uint32_t message_type;
-    uint32_t payload_size;
-    uint8_t  payload[HV_MESSAGE_PAYLOAD_SIZE];
-};
-
-/*
- * Input structure for SIGNAL_EVENT hypercall
- */
-struct hyperv_signal_event_input {
-    uint32_t connection_id;
-    uint16_t flag_number;
-    uint16_t _reserved_zero;
-};
-
-/*
- * SynIC message structures
- */
-struct hyperv_message_header {
-    uint32_t message_type;
-    uint8_t  payload_size;
-    uint8_t  message_flags; /* HV_MESSAGE_FLAG_XX */
-    uint8_t  _reserved[2];
-    uint64_t sender;
-};
-
-struct hyperv_message {
-    struct hyperv_message_header header;
-    uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE];
-};
-
-struct hyperv_message_page {
-    struct hyperv_message slot[HV_SINT_COUNT];
-};
-
-/*
- * SynIC event flags structures
- */
-struct hyperv_event_flags {
-    DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT);
-};
-
-struct hyperv_event_flags_page {
-    struct hyperv_event_flags slot[HV_SINT_COUNT];
-};
 
 #endif
diff --git a/target/i386/hyperv-stub.c b/target/i386/hyperv-stub.c
new file mode 100644
index 0000000000..fe548cbae2
--- /dev/null
+++ b/target/i386/hyperv-stub.c
@@ -0,0 +1,48 @@
+/*
+ * Stubs for CONFIG_HYPERV=n
+ *
+ * Copyright (c) 2015-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hyperv.h"
+
+#ifdef CONFIG_KVM
+int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit)
+{
+    switch (exit->type) {
+    case KVM_EXIT_HYPERV_SYNIC:
+        if (!cpu->hyperv_synic) {
+            return -1;
+        }
+
+        /*
+         * Tracking the changes in the MSRs is unnecessary as there are no
+         * users for them beside save/load, which is handled nicely by the
+         * generic MSR save/load code
+         */
+        return 0;
+    case KVM_EXIT_HYPERV_HCALL:
+        exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE;
+        return 0;
+    default:
+        return -1;
+    }
+}
+#endif
+
+int hyperv_x86_synic_add(X86CPU *cpu)
+{
+    return -ENOSYS;
+}
+
+void hyperv_x86_synic_reset(X86CPU *cpu)
+{
+}
+
+void hyperv_x86_synic_update(X86CPU *cpu)
+{
+}
diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c
index 3065d765ed..b264a28620 100644
--- a/target/i386/hyperv.c
+++ b/target/i386/hyperv.c
@@ -14,16 +14,36 @@
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "hyperv.h"
+#include "hw/hyperv/hyperv.h"
 #include "hyperv-proto.h"
 
-uint32_t hyperv_vp_index(X86CPU *cpu)
+int hyperv_x86_synic_add(X86CPU *cpu)
 {
-    return CPU(cpu)->cpu_index;
+    hyperv_synic_add(CPU(cpu));
+    return 0;
 }
 
-X86CPU *hyperv_find_vcpu(uint32_t vp_index)
+void hyperv_x86_synic_reset(X86CPU *cpu)
 {
-    return X86_CPU(qemu_get_cpu(vp_index));
+    hyperv_synic_reset(CPU(cpu));
+}
+
+void hyperv_x86_synic_update(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+    bool enable = env->msr_hv_synic_control & HV_SYNIC_ENABLE;
+    hwaddr msg_page_addr = (env->msr_hv_synic_msg_page & HV_SIMP_ENABLE) ?
+        (env->msr_hv_synic_msg_page & TARGET_PAGE_MASK) : 0;
+    hwaddr event_page_addr = (env->msr_hv_synic_evt_page & HV_SIEFP_ENABLE) ?
+        (env->msr_hv_synic_evt_page & TARGET_PAGE_MASK) : 0;
+    hyperv_synic_update(CPU(cpu), enable, msg_page_addr, event_page_addr);
+}
+
+static void async_synic_update(CPUState *cs, run_on_cpu_data data)
+{
+    qemu_mutex_lock_iothread();
+    hyperv_x86_synic_update(X86_CPU(cs));
+    qemu_mutex_unlock_iothread();
 }
 
 int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit)
@@ -36,11 +56,6 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit)
             return -1;
         }
 
-        /*
-         * For now just track changes in SynIC control and msg/evt pages msr's.
-         * When SynIC messaging/events processing will be added in future
-         * here we will do messages queues flushing and pages remapping.
-         */
         switch (exit->u.synic.msr) {
         case HV_X64_MSR_SCONTROL:
             env->msr_hv_synic_control = exit->u.synic.control;
@@ -54,98 +69,33 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit)
         default:
             return -1;
         }
+
+        /*
+         * this will run in this cpu thread before it returns to KVM, but in a
+         * safe environment (i.e. when all cpus are quiescent) -- this is
+         * necessary because memory hierarchy is being changed
+         */
+        async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL);
+
         return 0;
     case KVM_EXIT_HYPERV_HCALL: {
-        uint16_t code;
+        uint16_t code = exit->u.hcall.input & 0xffff;
+        bool fast = exit->u.hcall.input & HV_HYPERCALL_FAST;
+        uint64_t param = exit->u.hcall.params[0];
 
-        code  = exit->u.hcall.input & 0xffff;
         switch (code) {
         case HV_POST_MESSAGE:
+            exit->u.hcall.result = hyperv_hcall_post_message(param, fast);
+            break;
         case HV_SIGNAL_EVENT:
+            exit->u.hcall.result = hyperv_hcall_signal_event(param, fast);
+            break;
         default:
             exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE;
-            return 0;
         }
+        return 0;
     }
     default:
         return -1;
     }
 }
-
-static void kvm_hv_sint_ack_handler(EventNotifier *notifier)
-{
-    HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
-                                           sint_ack_notifier);
-    event_notifier_test_and_clear(notifier);
-    if (sint_route->sint_ack_clb) {
-        sint_route->sint_ack_clb(sint_route);
-    }
-}
-
-HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint,
-                                      HvSintAckClb sint_ack_clb)
-{
-    HvSintRoute *sint_route;
-    int r, gsi;
-
-    sint_route = g_malloc0(sizeof(*sint_route));
-    r = event_notifier_init(&sint_route->sint_set_notifier, false);
-    if (r) {
-        goto err;
-    }
-
-    r = event_notifier_init(&sint_route->sint_ack_notifier, false);
-    if (r) {
-        goto err_sint_set_notifier;
-    }
-
-    event_notifier_set_handler(&sint_route->sint_ack_notifier,
-                               kvm_hv_sint_ack_handler);
-
-    gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
-    if (gsi < 0) {
-        goto err_gsi;
-    }
-
-    r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
-                                           &sint_route->sint_set_notifier,
-                                           &sint_route->sint_ack_notifier, gsi);
-    if (r) {
-        goto err_irqfd;
-    }
-    sint_route->gsi = gsi;
-    sint_route->sint_ack_clb = sint_ack_clb;
-    sint_route->vp_index = vp_index;
-    sint_route->sint = sint;
-
-    return sint_route;
-
-err_irqfd:
-    kvm_irqchip_release_virq(kvm_state, gsi);
-err_gsi:
-    event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
-    event_notifier_cleanup(&sint_route->sint_ack_notifier);
-err_sint_set_notifier:
-    event_notifier_cleanup(&sint_route->sint_set_notifier);
-err:
-    g_free(sint_route);
-
-    return NULL;
-}
-
-void kvm_hv_sint_route_destroy(HvSintRoute *sint_route)
-{
-    kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
-                                          &sint_route->sint_set_notifier,
-                                          sint_route->gsi);
-    kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
-    event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
-    event_notifier_cleanup(&sint_route->sint_ack_notifier);
-    event_notifier_cleanup(&sint_route->sint_set_notifier);
-    g_free(sint_route);
-}
-
-int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route)
-{
-    return event_notifier_set(&sint_route->sint_set_notifier);
-}
diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h
index 00c9b454bb..67543296c3 100644
--- a/target/i386/hyperv.h
+++ b/target/i386/hyperv.h
@@ -16,30 +16,14 @@
 
 #include "cpu.h"
 #include "sysemu/kvm.h"
-#include "qemu/event_notifier.h"
-
-typedef struct HvSintRoute HvSintRoute;
-typedef void (*HvSintAckClb)(HvSintRoute *sint_route);
-
-struct HvSintRoute {
-    uint32_t sint;
-    uint32_t vp_index;
-    int gsi;
-    EventNotifier sint_set_notifier;
-    EventNotifier sint_ack_notifier;
-    HvSintAckClb sint_ack_clb;
-};
+#include "hw/hyperv/hyperv.h"
 
+#ifdef CONFIG_KVM
 int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit);
+#endif
 
-HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint,
-                                      HvSintAckClb sint_ack_clb);
-
-void kvm_hv_sint_route_destroy(HvSintRoute *sint_route);
-
-int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route);
-
-uint32_t hyperv_vp_index(X86CPU *cpu);
-X86CPU *hyperv_find_vcpu(uint32_t vp_index);
+int hyperv_x86_synic_add(X86CPU *cpu);
+void hyperv_x86_synic_reset(X86CPU *cpu);
+void hyperv_x86_synic_update(X86CPU *cpu);
 
 #endif
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index dc4047b02f..115d8b4c14 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -608,7 +608,8 @@ static bool hyperv_enabled(X86CPU *cpu)
             cpu->hyperv_synic ||
             cpu->hyperv_stimer ||
             cpu->hyperv_reenlightenment ||
-            cpu->hyperv_tlbflush);
+            cpu->hyperv_tlbflush ||
+            cpu->hyperv_ipi);
 }
 
 static int kvm_arch_set_tsc_khz(CPUState *cs)
@@ -733,9 +734,20 @@ static int hyperv_handle_properties(CPUState *cs)
         env->features[FEAT_HYPERV_EAX] |= HV_VP_RUNTIME_AVAILABLE;
     }
     if (cpu->hyperv_synic) {
-        if (!has_msr_hv_synic ||
-            kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0)) {
-            fprintf(stderr, "Hyper-V SynIC is not supported by kernel\n");
+        unsigned int cap = KVM_CAP_HYPERV_SYNIC;
+        if (!cpu->hyperv_synic_kvm_only) {
+            if (!cpu->hyperv_vpindex) {
+                fprintf(stderr, "Hyper-V SynIC "
+                        "(requested by 'hv-synic' cpu flag) "
+                        "requires Hyper-V VP_INDEX ('hv-vpindex')\n");
+            return -ENOSYS;
+            }
+            cap = KVM_CAP_HYPERV_SYNIC2;
+        }
+
+        if (!has_msr_hv_synic || !kvm_check_extension(cs->kvm_state, cap)) {
+            fprintf(stderr, "Hyper-V SynIC (requested by 'hv-synic' cpu flag) "
+                    "is not supported by kernel\n");
             return -ENOSYS;
         }
 
@@ -753,12 +765,14 @@ static int hyperv_handle_properties(CPUState *cs)
 
 static int hyperv_init_vcpu(X86CPU *cpu)
 {
+    CPUState *cs = CPU(cpu);
+    int ret;
+
     if (cpu->hyperv_vpindex && !hv_vpindex_settable) {
         /*
          * the kernel doesn't support setting vp_index; assert that its value
          * is in sync
          */
-        int ret;
         struct {
             struct kvm_msrs info;
             struct kvm_msr_entry entries[1];
@@ -767,18 +781,38 @@ static int hyperv_init_vcpu(X86CPU *cpu)
             .entries[0].index = HV_X64_MSR_VP_INDEX,
         };
 
-        ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
+        ret = kvm_vcpu_ioctl(cs, KVM_GET_MSRS, &msr_data);
         if (ret < 0) {
             return ret;
         }
         assert(ret == 1);
 
-        if (msr_data.entries[0].data != hyperv_vp_index(cpu)) {
+        if (msr_data.entries[0].data != hyperv_vp_index(CPU(cpu))) {
             error_report("kernel's vp_index != QEMU's vp_index");
             return -ENXIO;
         }
     }
 
+    if (cpu->hyperv_synic) {
+        uint32_t synic_cap = cpu->hyperv_synic_kvm_only ?
+            KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2;
+        ret = kvm_vcpu_enable_cap(cs, synic_cap, 0);
+        if (ret < 0) {
+            error_report("failed to turn on HyperV SynIC in KVM: %s",
+                         strerror(-ret));
+            return ret;
+        }
+
+        if (!cpu->hyperv_synic_kvm_only) {
+            ret = hyperv_x86_synic_add(cpu);
+            if (ret < 0) {
+                error_report("failed to create HyperV SynIC: %s",
+                             strerror(-ret));
+                return ret;
+            }
+        }
+    }
+
     return 0;
 }
 
@@ -888,6 +922,17 @@ int kvm_arch_init_vcpu(CPUState *cs)
             c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED;
             c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
         }
+        if (cpu->hyperv_ipi) {
+            if (kvm_check_extension(cs->kvm_state,
+                                    KVM_CAP_HYPERV_SEND_IPI) <= 0) {
+                fprintf(stderr, "Hyper-V IPI send support "
+                        "(requested by 'hv-ipi' cpu flag) "
+                        " is not supported by kernel\n");
+                return -ENOSYS;
+            }
+            c->eax |= HV_CLUSTER_IPI_RECOMMENDED;
+            c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
+        }
 
         c->ebx = cpu->hyperv_spinlock_attempts;
 
@@ -1153,7 +1198,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
             if (local_err) {
                 error_report_err(local_err);
                 error_free(invtsc_mig_blocker);
-                goto fail;
+                return r;
             }
             /* for savevm */
             vmstate_x86_cpu.unmigratable = 1;
@@ -1226,6 +1271,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
         for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) {
             env->msr_hv_synic_sint[i] = HV_SINT_MASKED;
         }
+
+        hyperv_x86_synic_reset(cpu);
     }
 }
 
@@ -1937,7 +1984,8 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
             kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime);
         }
         if (cpu->hyperv_vpindex && hv_vpindex_settable) {
-            kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, hyperv_vp_index(cpu));
+            kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX,
+                              hyperv_vp_index(CPU(cpu)));
         }
         if (cpu->hyperv_synic) {
             int j;
@@ -2686,7 +2734,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
     events.exception.nr = env->exception_injected;
     events.exception.has_error_code = env->has_error_code;
     events.exception.error_code = env->error_code;
-    events.exception.pad = 0;
 
     events.interrupt.injected = (env->interrupt_injected >= 0);
     events.interrupt.nr = env->interrupt_injected;
@@ -2695,7 +2742,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
     events.nmi.injected = env->nmi_injected;
     events.nmi.pending = env->nmi_pending;
     events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
-    events.nmi.pad = 0;
 
     events.sipi_vector = env->sipi_vector;
     events.flags = 0;
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 084c2c73a8..225b5d433b 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -7,6 +7,7 @@
 #include "hw/i386/pc.h"
 #include "hw/isa/isa.h"
 #include "migration/cpu.h"
+#include "hyperv.h"
 
 #include "sysemu/kvm.h"
 
@@ -672,11 +673,19 @@ static bool hyperv_synic_enable_needed(void *opaque)
     return false;
 }
 
+static int hyperv_synic_post_load(void *opaque, int version_id)
+{
+    X86CPU *cpu = opaque;
+    hyperv_x86_synic_update(cpu);
+    return 0;
+}
+
 static const VMStateDescription vmstate_msr_hyperv_synic = {
     .name = "cpu/msr_hyperv_synic",
     .version_id = 1,
     .minimum_version_id = 1,
     .needed = hyperv_synic_enable_needed,
+    .post_load = hyperv_synic_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_hv_synic_control, X86CPU),
         VMSTATE_UINT64(env.msr_hv_synic_evt_page, X86CPU),
diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c
index 30c26b9d9c..6cc53bcb40 100644
--- a/target/i386/mem_helper.c
+++ b/target/i386/mem_helper.c
@@ -23,6 +23,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
+#include "qemu/atomic128.h"
 #include "tcg.h"
 
 void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
@@ -137,10 +138,7 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
 
     if ((a0 & 0xf) != 0) {
         raise_exception_ra(env, EXCP0D_GPF, ra);
-    } else {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
+    } else if (HAVE_CMPXCHG128) {
         int eflags = cpu_cc_compute_all(env, CC_OP);
 
         Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
@@ -159,7 +157,8 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
             eflags &= ~CC_Z;
         }
         CC_SRC = eflags;
-#endif
+    } else {
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
     }
 }
 #endif
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index ef64248bc4..7a1481fd0b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -800,7 +800,7 @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
 DEF_HELPER_1(tbegin, void, env)
 DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
 
-#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
+#ifdef TARGET_PPC64
 DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
 DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
 DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 8f0d86d104..a1485fad9b 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -25,6 +25,7 @@
 #include "exec/cpu_ldst.h"
 #include "tcg.h"
 #include "internal.h"
+#include "qemu/atomic128.h"
 
 //#define DEBUG_OP
 
@@ -215,11 +216,15 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
     return i;
 }
 
-#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
+#ifdef TARGET_PPC64
 uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
                                uint32_t opidx)
 {
-    Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
+    Int128 ret;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
     env->retxh = int128_gethi(ret);
     return int128_getlo(ret);
 }
@@ -227,7 +232,11 @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
 uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
                                uint32_t opidx)
 {
-    Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
+    Int128 ret;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
     env->retxh = int128_gethi(ret);
     return int128_getlo(ret);
 }
@@ -235,14 +244,22 @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
 void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr,
                             uint64_t lo, uint64_t hi, uint32_t opidx)
 {
-    Int128 val = int128_make128(lo, hi);
+    Int128 val;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    val = int128_make128(lo, hi);
     helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
 }
 
 void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
                             uint64_t lo, uint64_t hi, uint32_t opidx)
 {
-    Int128 val = int128_make128(lo, hi);
+    Int128 val;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    val = int128_make128(lo, hi);
     helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
 }
 
@@ -252,6 +269,9 @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
 {
     bool success = false;
 
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_CMPXCHG128);
+
     if (likely(addr == env->reserve_addr)) {
         Int128 oldv, cmpv, newv;
 
@@ -271,6 +291,9 @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
 {
     bool success = false;
 
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_CMPXCHG128);
+
     if (likely(addr == env->reserve_addr)) {
         Int128 oldv, cmpv, newv;
 
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 881743571b..4e59dd5f42 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -33,6 +33,7 @@
 #include "trace-tcg.h"
 #include "exec/translator.h"
 #include "exec/log.h"
+#include "qemu/atomic128.h"
 
 
 #define CPU_SINGLE_STEP 0x1
@@ -2654,22 +2655,22 @@ static void gen_lq(DisasContext *ctx)
     hi = cpu_gpr[rd];
 
     if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-#ifdef CONFIG_ATOMIC128
-        TCGv_i32 oi = tcg_temp_new_i32();
-        if (ctx->le_mode) {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
-            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+        if (HAVE_ATOMIC128) {
+            TCGv_i32 oi = tcg_temp_new_i32();
+            if (ctx->le_mode) {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
+                gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+            } else {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
+                gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            }
+            tcg_temp_free_i32(oi);
+            tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
         } else {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
-            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
         }
-        tcg_temp_free_i32(oi);
-        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
-#else
-        /* Restart with exclusive lock.  */
-        gen_helper_exit_atomic(cpu_env);
-        ctx->base.is_jmp = DISAS_NORETURN;
-#endif
     } else if (ctx->le_mode) {
         tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ);
         gen_addr_add(ctx, EA, EA, 8);
@@ -2805,21 +2806,21 @@ static void gen_std(DisasContext *ctx)
         hi = cpu_gpr[rs];
 
         if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-#ifdef CONFIG_ATOMIC128
-            TCGv_i32 oi = tcg_temp_new_i32();
-            if (ctx->le_mode) {
-                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
-                gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
+            if (HAVE_ATOMIC128) {
+                TCGv_i32 oi = tcg_temp_new_i32();
+                if (ctx->le_mode) {
+                    tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
+                    gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
+                } else {
+                    tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
+                    gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
+                }
+                tcg_temp_free_i32(oi);
             } else {
-                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
-                gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
+                /* Restart with exclusive lock.  */
+                gen_helper_exit_atomic(cpu_env);
+                ctx->base.is_jmp = DISAS_NORETURN;
             }
-            tcg_temp_free_i32(oi);
-#else
-            /* Restart with exclusive lock.  */
-            gen_helper_exit_atomic(cpu_env);
-            ctx->base.is_jmp = DISAS_NORETURN;
-#endif
         } else if (ctx->le_mode) {
             tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ);
             gen_addr_add(ctx, EA, EA, 8);
@@ -3404,26 +3405,26 @@ static void gen_lqarx(DisasContext *ctx)
     hi = cpu_gpr[rd];
 
     if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-#ifdef CONFIG_ATOMIC128
-        TCGv_i32 oi = tcg_temp_new_i32();
-        if (ctx->le_mode) {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
-                                                ctx->mem_idx));
-            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+        if (HAVE_ATOMIC128) {
+            TCGv_i32 oi = tcg_temp_new_i32();
+            if (ctx->le_mode) {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
+                                                    ctx->mem_idx));
+                gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+            } else {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
+                                                    ctx->mem_idx));
+                gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            }
+            tcg_temp_free_i32(oi);
+            tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
         } else {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
-                                                ctx->mem_idx));
-            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
+            tcg_temp_free(EA);
+            return;
         }
-        tcg_temp_free_i32(oi);
-        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
-#else
-        /* Restart with exclusive lock.  */
-        gen_helper_exit_atomic(cpu_env);
-        ctx->base.is_jmp = DISAS_NORETURN;
-        tcg_temp_free(EA);
-        return;
-#endif
     } else if (ctx->le_mode) {
         tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16);
         tcg_gen_mov_tl(cpu_reserve, EA);
@@ -3461,20 +3462,22 @@ static void gen_stqcx_(DisasContext *ctx)
     hi = cpu_gpr[rs];
 
     if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-        TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
-#ifdef CONFIG_ATOMIC128
-        if (ctx->le_mode) {
-            gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
+        if (HAVE_CMPXCHG128) {
+            TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
+            if (ctx->le_mode) {
+                gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
+                                             EA, lo, hi, oi);
+            } else {
+                gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
+                                             EA, lo, hi, oi);
+            }
+            tcg_temp_free_i32(oi);
         } else {
-            gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
         }
-#else
-        /* Restart with exclusive lock.  */
-        gen_helper_exit_atomic(cpu_env);
-        ctx->base.is_jmp = DISAS_NORETURN;
-#endif
         tcg_temp_free(EA);
-        tcg_temp_free_i32(oi);
     } else {
         TCGLabel *lab_fail = gen_new_label();
         TCGLabel *lab_over = gen_new_label();
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index bacae4f503..490c43e6e6 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -25,6 +25,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
+#include "qemu/atomic128.h"
 
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/s390x/storage-keys.h"
@@ -1379,65 +1380,62 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
     return cc;
 }
 
-static void do_cdsg(CPUS390XState *env, uint64_t addr,
-                    uint32_t r1, uint32_t r3, bool parallel)
+void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
+                  uint32_t r1, uint32_t r3)
 {
     uintptr_t ra = GETPC();
     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
     Int128 oldv;
+    uint64_t oldh, oldl;
     bool fail;
 
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-        fail = !int128_eq(oldv, cmpv);
-#endif
-    } else {
-        uint64_t oldh, oldl;
+    check_alignment(env, addr, 16, ra);
 
-        check_alignment(env, addr, 16, ra);
+    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
+    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
 
-        oldh = cpu_ldq_data_ra(env, addr + 0, ra);
-        oldl = cpu_ldq_data_ra(env, addr + 8, ra);
-
-        oldv = int128_make128(oldl, oldh);
-        fail = !int128_eq(oldv, cmpv);
-        if (fail) {
-            newv = oldv;
-        }
-
-        cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
-        cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
+    oldv = int128_make128(oldl, oldh);
+    fail = !int128_eq(oldv, cmpv);
+    if (fail) {
+        newv = oldv;
     }
 
+    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
+    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
+
     env->cc_op = fail;
     env->regs[r1] = int128_gethi(oldv);
     env->regs[r1 + 1] = int128_getlo(oldv);
 }
 
-void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
-                  uint32_t r1, uint32_t r3)
-{
-    do_cdsg(env, addr, r1, r3, false);
-}
-
 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
                            uint32_t r1, uint32_t r3)
 {
-    do_cdsg(env, addr, r1, r3, true);
+    uintptr_t ra = GETPC();
+    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
+    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 oldv;
+    bool fail;
+
+    assert(HAVE_CMPXCHG128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+    fail = !int128_eq(oldv, cmpv);
+
+    env->cc_op = fail;
+    env->regs[r1] = int128_gethi(oldv);
+    env->regs[r1 + 1] = int128_getlo(oldv);
 }
 
 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
                         uint64_t a2, bool parallel)
 {
-#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
     uint32_t mem_idx = cpu_mmu_index(env, false);
-#endif
     uintptr_t ra = GETPC();
     uint32_t fc = extract32(env->regs[0], 0, 8);
     uint32_t sc = extract32(env->regs[0], 8, 8);
@@ -1465,18 +1463,20 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
     probe_write(env, a2, 0, mem_idx, ra);
 #endif
 
-    /* Note that the compare-and-swap is atomic, and the store is atomic, but
-       the complete operation is not.  Therefore we do not need to assert serial
-       context in order to implement this.  That said, restart early if we can't
-       support either operation that is supposed to be atomic.  */
+    /*
+     * Note that the compare-and-swap is atomic, and the store is atomic,
+     * but the complete operation is not.  Therefore we do not need to
+     * assert serial context in order to implement this.  That said,
+     * restart early if we can't support either operation that is supposed
+     * to be atomic.
+     */
     if (parallel) {
-        int mask = 0;
-#if !defined(CONFIG_ATOMIC64)
-        mask = -8;
-#elif !defined(CONFIG_ATOMIC128)
-        mask = -16;
+        uint32_t max = 2;
+#ifdef CONFIG_ATOMIC64
+        max = 3;
 #endif
-        if (((4 << fc) | (1 << sc)) & mask) {
+        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
+            (HAVE_ATOMIC128  ? 0 : sc > max)) {
             cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
         }
     }
@@ -1546,16 +1546,7 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
             Int128 ov;
 
-            if (parallel) {
-#ifdef CONFIG_ATOMIC128
-                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
-                cc = !int128_eq(ov, cv);
-#else
-                /* Note that we asserted !parallel above.  */
-                g_assert_not_reached();
-#endif
-            } else {
+            if (!parallel) {
                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
 
@@ -1567,6 +1558,13 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
 
                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
+            } else if (HAVE_CMPXCHG128) {
+                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
+                cc = !int128_eq(ov, cv);
+            } else {
+                /* Note that we asserted !parallel above.  */
+                g_assert_not_reached();
             }
 
             env->regs[r3 + 0] = int128_gethi(ov);
@@ -1596,18 +1594,16 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
             cpu_stq_data_ra(env, a2, svh, ra);
             break;
         case 4:
-            if (parallel) {
-#ifdef CONFIG_ATOMIC128
+            if (!parallel) {
+                cpu_stq_data_ra(env, a2 + 0, svh, ra);
+                cpu_stq_data_ra(env, a2 + 8, svl, ra);
+            } else if (HAVE_ATOMIC128) {
                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
                 Int128 sv = int128_make128(svl, svh);
                 helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
-#else
+            } else {
                 /* Note that we asserted !parallel above.  */
                 g_assert_not_reached();
-#endif
-            } else {
-                cpu_stq_data_ra(env, a2 + 0, svh, ra);
-                cpu_stq_data_ra(env, a2 + 8, svl, ra);
             }
             break;
         default:
@@ -2100,76 +2096,64 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
 #endif
 
 /* load pair from quadword */
-static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
+uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
 {
     uintptr_t ra = GETPC();
     uint64_t hi, lo;
 
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-        Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
-        hi = int128_gethi(v);
-        lo = int128_getlo(v);
-#endif
-    } else {
-        check_alignment(env, addr, 16, ra);
-
-        hi = cpu_ldq_data_ra(env, addr + 0, ra);
-        lo = cpu_ldq_data_ra(env, addr + 8, ra);
-    }
+    check_alignment(env, addr, 16, ra);
+    hi = cpu_ldq_data_ra(env, addr + 0, ra);
+    lo = cpu_ldq_data_ra(env, addr + 8, ra);
 
     env->retxl = lo;
     return hi;
 }
 
-uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
-{
-    return do_lpq(env, addr, false);
-}
-
 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
 {
-    return do_lpq(env, addr, true);
-}
-
-/* store pair to quadword */
-static void do_stpq(CPUS390XState *env, uint64_t addr,
-                    uint64_t low, uint64_t high, bool parallel)
-{
     uintptr_t ra = GETPC();
+    uint64_t hi, lo;
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 v;
 
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    assert(HAVE_ATOMIC128);
 
-        Int128 v = int128_make128(low, high);
-        helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
-#endif
-    } else {
-        check_alignment(env, addr, 16, ra);
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
+    hi = int128_gethi(v);
+    lo = int128_getlo(v);
 
-        cpu_stq_data_ra(env, addr + 0, high, ra);
-        cpu_stq_data_ra(env, addr + 8, low, ra);
-    }
+    env->retxl = lo;
+    return hi;
 }
 
+/* store pair to quadword */
 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
                   uint64_t low, uint64_t high)
 {
-    do_stpq(env, addr, low, high, false);
+    uintptr_t ra = GETPC();
+
+    check_alignment(env, addr, 16, ra);
+    cpu_stq_data_ra(env, addr + 0, high, ra);
+    cpu_stq_data_ra(env, addr + 8, low, ra);
 }
 
 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
                            uint64_t low, uint64_t high)
 {
-    do_stpq(env, addr, low, high, true);
+    uintptr_t ra = GETPC();
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 v;
+
+    assert(HAVE_ATOMIC128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    v = int128_make128(low, high);
+    helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
 }
 
 /* Execute instruction.  This instruction executes an insn modified with
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 18861cd186..b5bd56b7ee 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -44,6 +44,7 @@
 #include "trace-tcg.h"
 #include "exec/translator.h"
 #include "exec/log.h"
+#include "qemu/atomic128.h"
 
 
 /* Information that (most) every instruction needs to manipulate.  */
@@ -1128,11 +1129,19 @@ struct DisasInsn {
 
     const char *name;
 
+    /* Pre-process arguments before HELP_OP.  */
     void (*help_in1)(DisasContext *, DisasFields *, DisasOps *);
     void (*help_in2)(DisasContext *, DisasFields *, DisasOps *);
     void (*help_prep)(DisasContext *, DisasFields *, DisasOps *);
+
+    /*
+     * Post-process output after HELP_OP.
+     * Note that these are not called if HELP_OP returns DISAS_NORETURN.
+     */
     void (*help_wout)(DisasContext *, DisasFields *, DisasOps *);
     void (*help_cout)(DisasContext *, DisasOps *);
+
+    /* Implement the operation itself.  */
     DisasJumpType (*help_op)(DisasContext *, DisasOps *);
 
     uint64_t data;
@@ -2032,6 +2041,7 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
     int r3 = get_field(s->fields, r3);
     int d2 = get_field(s->fields, d2);
     int b2 = get_field(s->fields, b2);
+    DisasJumpType ret = DISAS_NEXT;
     TCGv_i64 addr;
     TCGv_i32 t_r1, t_r3;
 
@@ -2039,17 +2049,20 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
     addr = get_address(s, 0, b2, d2);
     t_r1 = tcg_const_i32(r1);
     t_r3 = tcg_const_i32(r3);
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
+    } else if (HAVE_CMPXCHG128) {
         gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
     } else {
-        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
+        gen_helper_exit_atomic(cpu_env);
+        ret = DISAS_NORETURN;
     }
     tcg_temp_free_i64(addr);
     tcg_temp_free_i32(t_r1);
     tcg_temp_free_i32(t_r3);
 
     set_cc_static(s);
-    return DISAS_NEXT;
+    return ret;
 }
 
 static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
@@ -3026,10 +3039,13 @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
 {
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_lpq(o->out, cpu_env, o->in2);
+    } else if (HAVE_ATOMIC128) {
         gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
     } else {
-        gen_helper_lpq(o->out, cpu_env, o->in2);
+        gen_helper_exit_atomic(cpu_env);
+        return DISAS_NORETURN;
     }
     return_low128(o->out2);
     return DISAS_NEXT;
@@ -4406,10 +4422,13 @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
 {
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
+    } else if (HAVE_ATOMIC128) {
         gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
     } else {
-        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
+        gen_helper_exit_atomic(cpu_env);
+        return DISAS_NORETURN;
     }
     return DISAS_NEXT;
 }
@@ -6125,11 +6144,13 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
     if (insn->help_op) {
         ret = insn->help_op(s, &o);
     }
-    if (insn->help_wout) {
-        insn->help_wout(s, &f, &o);
-    }
-    if (insn->help_cout) {
-        insn->help_cout(s, &o);
+    if (ret != DISAS_NORETURN) {
+        if (insn->help_wout) {
+            insn->help_wout(s, &f, &o);
+        }
+        if (insn->help_cout) {
+            insn->help_cout(s, &o);
+        }
     }
 
     /* Free any temporaries created by the helpers.  */
diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c
index 68f978d80b..2b49d1ca40 100644
--- a/target/unicore32/cpu.c
+++ b/target/unicore32/cpu.c
@@ -116,8 +116,6 @@ static void uc32_cpu_initfn(Object *obj)
     env->uncached_asr = ASR_MODE_PRIV;
     env->regs[31] = 0x03000000;
 #endif
-
-    tlb_flush(cs);
 }
 
 static const VMStateDescription vmstate_uc32_cpu = {