summary refs log tree commit diff stats
path: root/target
diff options
context:
space:
mode:
Diffstat (limited to 'target')
-rw-r--r--target/alpha/cpu.h1
-rw-r--r--target/alpha/translate.c2
-rw-r--r--target/arm/cpu.c4
-rw-r--r--target/arm/cpu.h17
-rw-r--r--target/arm/crypto_helper.c2
-rw-r--r--target/arm/helper.c2
-rw-r--r--target/arm/hvf/hvf.c1
-rw-r--r--target/arm/kvm.c1
-rw-r--r--target/arm/kvm64.c5
-rw-r--r--target/arm/neon_helper.c2
-rw-r--r--target/arm/sve_helper.c4
-rw-r--r--target/arm/translate-a64.h2
-rw-r--r--target/arm/translate-sve.c6
-rw-r--r--target/arm/translate-vfp.c2
-rw-r--r--target/arm/translate.c2
-rw-r--r--target/arm/vec_internal.h2
-rw-r--r--target/hexagon/gdbstub.c1
-rw-r--r--target/hppa/cpu.h1
-rw-r--r--target/hppa/translate.c2
-rw-r--r--target/i386/cpu.c2
-rw-r--r--target/i386/cpu.h28
-rw-r--r--target/i386/hax/hax-all.c1
-rw-r--r--target/i386/hax/hax-mem.c10
-rw-r--r--target/i386/hvf/hvf.c1
-rw-r--r--target/i386/hvf/x86.c1
-rw-r--r--target/i386/hvf/x86_cpuid.c1
-rw-r--r--target/i386/hvf/x86_decode.c1
-rw-r--r--target/i386/hvf/x86_descr.c1
-rw-r--r--target/i386/hvf/x86_emu.c1
-rw-r--r--target/i386/hvf/x86_flags.c1
-rw-r--r--target/i386/hvf/x86_mmu.c1
-rw-r--r--target/i386/hvf/x86_task.c1
-rw-r--r--target/i386/hvf/x86hvf.c1
-rw-r--r--target/i386/kvm/hyperv-proto.h37
-rw-r--r--target/i386/kvm/hyperv-stub.c6
-rw-r--r--target/i386/kvm/hyperv.c52
-rw-r--r--target/i386/kvm/kvm.c82
-rw-r--r--target/i386/kvm/sev-stub.c1
-rw-r--r--target/i386/nvmm/nvmm-all.c9
-rw-r--r--target/i386/ops_sse.h75
-rw-r--r--target/i386/sev.c42
-rw-r--r--target/i386/tcg/translate.c2
-rw-r--r--target/i386/whpx/whpx-accel-ops.c1
-rw-r--r--target/i386/whpx/whpx-accel-ops.h1
-rw-r--r--target/i386/whpx/whpx-all.c779
-rw-r--r--target/i386/whpx/whpx-apic.c1
-rw-r--r--target/i386/whpx/whpx-internal.h30
-rw-r--r--target/m68k/cpu.h1
-rw-r--r--target/m68k/m68k-semi.c22
-rw-r--r--target/microblaze/cpu.h2
-rw-r--r--target/mips/cpu.c4
-rw-r--r--target/mips/cpu.h2
-rw-r--r--target/mips/kvm.c1
-rw-r--r--target/mips/tcg/lmmi_helper.c2
-rw-r--r--target/mips/tcg/msa_helper.c64
-rw-r--r--target/nios2/nios2-semi.c24
-rw-r--r--target/openrisc/cpu.h1
-rw-r--r--target/ppc/arch_dump.c2
-rw-r--r--target/ppc/cpu.h3
-rw-r--r--target/ppc/cpu_init.c2
-rw-r--r--target/ppc/gdbstub.c4
-rw-r--r--target/ppc/int_helper.c22
-rw-r--r--target/ppc/kvm.c7
-rw-r--r--target/ppc/mem_helper.c4
-rw-r--r--target/ppc/translate.c2
-rw-r--r--target/ppc/translate/vmx-impl.c.inc4
-rw-r--r--target/ppc/translate/vsx-impl.c.inc2
-rw-r--r--target/riscv/cpu.h2
-rw-r--r--target/riscv/insn_trans/trans_rvv.c.inc4
-rw-r--r--target/riscv/kvm.c1
-rw-r--r--target/riscv/vector_helper.c2
-rw-r--r--target/rx/cpu.c1
-rw-r--r--target/rx/cpu.h1
-rw-r--r--target/rx/gdbstub.c1
-rw-r--r--target/s390x/cpu.h1
-rw-r--r--target/s390x/kvm/kvm.c1
-rw-r--r--target/s390x/tcg/translate.c2
-rw-r--r--target/s390x/tcg/translate_vx.c.inc2
-rw-r--r--target/s390x/tcg/vec.h2
-rw-r--r--target/s390x/tcg/vec_fpu_helper.c1
-rw-r--r--target/s390x/tcg/vec_int_helper.c1
-rw-r--r--target/s390x/tcg/vec_string_helper.c1
-rw-r--r--target/sh4/cpu.h1
-rw-r--r--target/sparc/cpu.h1
-rw-r--r--target/sparc/vis_helper.c4
-rw-r--r--target/tricore/cpu.h1
-rw-r--r--target/tricore/gdbstub.c1
-rw-r--r--target/xtensa/core-de233_fpu.c1
-rw-r--r--target/xtensa/core-dsp3400.c1
-rw-r--r--target/xtensa/core-test_mmuhifi_c3.c1
-rw-r--r--target/xtensa/cpu.h5
-rwxr-xr-xtarget/xtensa/import_core.sh1
-rw-r--r--target/xtensa/overlay_tool.h2
-rw-r--r--target/xtensa/translate.c6
94 files changed, 1205 insertions, 246 deletions
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index 58f00b7814..994a018b91 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -22,6 +22,7 @@
 
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
+#include "qemu/cpu-float.h"
 
 /* Alpha processors have a weak memory model */
 #define TCG_GUEST_DEFAULT_MO      (0)
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index 66768ab47a..4e887311ab 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -235,7 +235,7 @@ static TCGv dest_fpr(DisasContext *ctx, unsigned reg)
 static int get_flag_ofs(unsigned shift)
 {
     int ofs = offsetof(CPUAlphaState, flags);
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     ofs += 3 - (shift / 8);
 #else
     ofs += shift / 8;
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 5d4ca7a227..3609de0888 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -22,7 +22,7 @@
 #include "qemu/qemu-print.h"
 #include "qemu/timer.h"
 #include "qemu/log.h"
-#include "qemu-common.h"
+#include "exec/page-vary.h"
 #include "target/arm/idau.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
@@ -812,7 +812,7 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
 
     sctlr_b = arm_sctlr_b(env);
     if (bswap_code(sctlr_b)) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
         info->endian = BFD_ENDIAN_LITTLE;
 #else
         info->endian = BFD_ENDIAN_BIG;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 23879de5fa..cb5359a747 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -21,6 +21,7 @@
 #define ARM_CPU_H
 
 #include "kvm-consts.h"
+#include "qemu/cpu-float.h"
 #include "hw/registerfields.h"
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
@@ -95,7 +96,7 @@ enum {
  * therefore useful to be able to pass TCG the offset of the least
  * significant half of a uint64_t struct member.
  */
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 #define offsetoflow32(S, M) (offsetof(S, M) + sizeof(uint32_t))
 #define offsetofhigh32(S, M) offsetof(S, M)
 #else
@@ -382,7 +383,7 @@ typedef struct CPUArchState {
         union { /* Fault address registers. */
             struct {
                 uint64_t _unused_far0;
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
                 uint32_t ifar_ns;
                 uint32_t dfar_ns;
                 uint32_t ifar_s;
@@ -419,7 +420,7 @@ typedef struct CPUArchState {
         uint64_t c9_pminten; /* perf monitor interrupt enables */
         union { /* Memory attribute redirection */
             struct {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
                 uint64_t _unused_mair_0;
                 uint32_t mair1_ns;
                 uint32_t mair0_ns;
@@ -1093,7 +1094,7 @@ void aarch64_add_pauth_properties(Object *obj);
  */
 static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
 {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     int i;
 
     for (i = 0; i < nr; ++i) {
@@ -3549,12 +3550,12 @@ static inline int cpu_mmu_index(CPUARMState *env, bool ifetch)
 static inline bool bswap_code(bool sctlr_b)
 {
 #ifdef CONFIG_USER_ONLY
-    /* BE8 (SCTLR.B = 0, TARGET_WORDS_BIGENDIAN = 1) is mixed endian.
-     * The invalid combination SCTLR.B=1/CPSR.E=1/TARGET_WORDS_BIGENDIAN=0
+    /* BE8 (SCTLR.B = 0, TARGET_BIG_ENDIAN = 1) is mixed endian.
+     * The invalid combination SCTLR.B=1/CPSR.E=1/TARGET_BIG_ENDIAN=0
      * would also end up as a mixed-endian mode with BE code, LE data.
      */
     return
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
         1 ^
 #endif
         sctlr_b;
@@ -3570,7 +3571,7 @@ static inline bool bswap_code(bool sctlr_b)
 static inline bool arm_cpu_bswap_data(CPUARMState *env)
 {
     return
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
        1 ^
 #endif
        arm_cpu_data_is_big_endian(env);
diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c
index 28a84c2dbd..4c8fd34aec 100644
--- a/target/arm/crypto_helper.c
+++ b/target/arm/crypto_helper.c
@@ -23,7 +23,7 @@ union CRYPTO_STATE {
     uint64_t   l[2];
 };
 
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 #define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
 #define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
 #else
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 7d14650615..50d287f289 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8642,7 +8642,7 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
                 r2->cp = 15;
             }
 
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
             if (r2->fieldoffset) {
                 r2->fieldoffset += sizeof(uint32_t);
             }
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 8c34f86792..567e296b21 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -10,7 +10,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "qemu/error-report.h"
 
 #include "sysemu/runstate.h"
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index bbf1ce7ba3..5fc37ac10a 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -13,7 +13,6 @@
 
 #include <linux/kvm.h>
 
-#include "qemu-common.h"
 #include "qemu/timer.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index ccadfbbe72..17dd2f77d9 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -16,7 +16,6 @@
 #include <linux/elf.h>
 #include <linux/kvm.h>
 
-#include "qemu-common.h"
 #include "qapi/error.h"
 #include "cpu.h"
 #include "qemu/timer.h"
@@ -1023,7 +1022,7 @@ static int kvm_arch_put_fpsimd(CPUState *cs)
 
     for (i = 0; i < 32; i++) {
         uint64_t *q = aa64_vfp_qreg(env, i);
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
         uint64_t fp_val[2] = { q[1], q[0] };
         reg.addr = (uintptr_t)fp_val;
 #else
@@ -1242,7 +1241,7 @@ static int kvm_arch_get_fpsimd(CPUState *cs)
         if (ret) {
             return ret;
         } else {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
             uint64_t t;
             t = q[0], q[0] = q[1], q[1] = t;
 #endif
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index 338b9189d5..bc6c4a54e9 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -23,7 +23,7 @@ typedef struct \
 { \
     type v1; \
 } neon_##name;
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 #define NEON_TYPE2(name, type) \
 typedef struct \
 { \
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index d45d088615..e0f9aa9983 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -2802,7 +2802,7 @@ static void swap_memmove(void *vd, void *vs, size_t n)
     uintptr_t o = (d | s | n) & 7;
     size_t i;
 
-#ifndef HOST_WORDS_BIGENDIAN
+#if !HOST_BIG_ENDIAN
     o = 0;
 #endif
     switch (o) {
@@ -2864,7 +2864,7 @@ static void swap_memzero(void *vd, size_t n)
         return;
     }
 
-#ifndef HOST_WORDS_BIGENDIAN
+#if !HOST_BIG_ENDIAN
     o = 0;
 #endif
     switch (o) {
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index 58f50abca4..38884158aa 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -71,7 +71,7 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
 {
     int element_size = 1 << size;
     int offs = element * element_size;
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     /* This is complicated slightly because vfp.zregs[n].d[0] is
      * still the lowest and vfp.zregs[n].d[15] the highest of the
      * 256 byte vector, even on big endian systems.
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 2c23459e76..180e14d9f8 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -2872,7 +2872,7 @@ static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
      * The final adjustment for the vector register base
      * is added via constant offset to the load.
      */
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     /* Adjust for element ordering.  See vec_reg_offset.  */
     if (esz < 3) {
         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
@@ -5711,7 +5711,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
          * for this load operation.
          */
         TCGv_i64 tmp = tcg_temp_new_i64();
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
         poff += 6;
 #endif
         tcg_gen_ld16u_i64(tmp, cpu_env, poff);
@@ -5790,7 +5790,7 @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
          * for this load operation.
          */
         TCGv_i64 tmp = tcg_temp_new_i64();
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
         poff += 4;
 #endif
         tcg_gen_ld32u_i64(tmp, cpu_env, poff);
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
index 17f796e32a..6a95a67a69 100644
--- a/target/arm/translate-vfp.c
+++ b/target/arm/translate-vfp.c
@@ -93,7 +93,7 @@ uint64_t vfp_expand_imm(int size, uint8_t imm8)
 static inline long vfp_f16_offset(unsigned reg, bool top)
 {
     long offs = vfp_reg_offset(false, reg);
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     if (!top) {
         offs += 2;
     }
diff --git a/target/arm/translate.c b/target/arm/translate.c
index bf2196b9e2..e8dfa71364 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1158,7 +1158,7 @@ long neon_element_offset(int reg, int element, MemOp memop)
 {
     int element_size = 1 << (memop & MO_SIZE);
     int ofs = element * element_size;
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     /*
      * Calculate the offset assuming fully little-endian,
      * then XOR to account for the order of the 8-byte units.
diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h
index 2a33558290..fb43a2380e 100644
--- a/target/arm/vec_internal.h
+++ b/target/arm/vec_internal.h
@@ -29,7 +29,7 @@
  * The H1_<N> macros are used when performing byte arithmetic and then
  * casting the final pointer to a type of size N.
  */
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 #define H1(x)   ((x) ^ 7)
 #define H1_2(x) ((x) ^ 6)
 #define H1_4(x) ((x) ^ 4)
diff --git a/target/hexagon/gdbstub.c b/target/hexagon/gdbstub.c
index 9c8c04c961..d152d01bfe 100644
--- a/target/hexagon/gdbstub.c
+++ b/target/hexagon/gdbstub.c
@@ -16,7 +16,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "exec/gdbstub.h"
 #include "cpu.h"
 #include "internal.h"
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index 4cc936b6bf..c43b93a68f 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -22,6 +22,7 @@
 
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
+#include "qemu/cpu-float.h"
 
 /* PA-RISC 1.x processors have a strong memory model.  */
 /* ??? While we do not yet implement PA-RISC 2.0, those processors have
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index 5c0b1eb274..0b83ee4d98 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -566,7 +566,7 @@ static void save_gpr(DisasContext *ctx, unsigned reg, TCGv_reg t)
     }
 }
 
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 # define HI_OFS  0
 # define LO_OFS  4
 #else
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index cb6b5467d0..99343be926 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6927,6 +6927,8 @@ static Property x86_cpu_properties[] = {
                       HYPERV_FEAT_AVIC, 0),
     DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU,
                             hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF),
+    DEFINE_PROP_BIT64("hv-syndbg", X86CPU, hyperv_features,
+                      HYPERV_FEAT_SYNDBG, 0),
     DEFINE_PROP_BOOL("hv-passthrough", X86CPU, hyperv_passthrough, false),
     DEFINE_PROP_BOOL("hv-enforce-cpuid", X86CPU, hyperv_enforce_cpuid, false),
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 982c532353..9661f9fbd1 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -25,6 +25,7 @@
 #include "kvm/hyperv-proto.h"
 #include "exec/cpu-defs.h"
 #include "qapi/qapi-types-common.h"
+#include "qemu/cpu-float.h"
 
 /* The x86 has a strong memory model with some store-after-load re-ordering */
 #define TCG_GUEST_DEFAULT_MO      (TCG_MO_ALL & ~TCG_MO_ST_LD)
@@ -1084,6 +1085,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define HYPERV_FEAT_IPI                 13
 #define HYPERV_FEAT_STIMER_DIRECT       14
 #define HYPERV_FEAT_AVIC                15
+#define HYPERV_FEAT_SYNDBG              16
 
 #ifndef HYPERV_SPINLOCK_NEVER_NOTIFY
 #define HYPERV_SPINLOCK_NEVER_NOTIFY             0xFFFFFFFF
@@ -1217,20 +1219,6 @@ typedef struct SegmentCache {
         float64  _d_##n[(bits)/64]; \
     }
 
-typedef union {
-    uint8_t _b[16];
-    uint16_t _w[8];
-    uint32_t _l[4];
-    uint64_t _q[2];
-} XMMReg;
-
-typedef union {
-    uint8_t _b[32];
-    uint16_t _w[16];
-    uint32_t _l[8];
-    uint64_t _q[4];
-} YMMReg;
-
 typedef MMREG_UNION(ZMMReg, 512) ZMMReg;
 typedef MMREG_UNION(MMXReg, 64)  MMXReg;
 
@@ -1248,7 +1236,7 @@ typedef struct BNDCSReg {
 #define BNDCFG_BNDPRESERVE  2ULL
 #define BNDCFG_BDIR_MASK    TARGET_PAGE_MASK
 
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 #define ZMM_B(n) _b_ZMMReg[63 - (n)]
 #define ZMM_W(n) _w_ZMMReg[31 - (n)]
 #define ZMM_L(n) _l_ZMMReg[15 - (n)]
@@ -1529,11 +1517,7 @@ typedef struct CPUArchState {
     ZMMReg xmm_t0;
     MMXReg mmx_t0;
 
-    XMMReg ymmh_regs[CPU_NB_REGS];
-
     uint64_t opmask_regs[NB_OPMASK_REGS];
-    YMMReg zmmh_regs[CPU_NB_REGS];
-    ZMMReg hi16_zmm_regs[CPU_NB_REGS];
 #ifdef TARGET_X86_64
     uint8_t xtilecfg[64];
     uint8_t xtiledata[8192];
@@ -1600,6 +1584,12 @@ typedef struct CPUArchState {
     uint64_t msr_hv_hypercall;
     uint64_t msr_hv_guest_os_id;
     uint64_t msr_hv_tsc;
+    uint64_t msr_hv_syndbg_control;
+    uint64_t msr_hv_syndbg_status;
+    uint64_t msr_hv_syndbg_send_page;
+    uint64_t msr_hv_syndbg_recv_page;
+    uint64_t msr_hv_syndbg_pending_page;
+    uint64_t msr_hv_syndbg_options;
 
     /* Per-VCPU HV MSRs */
     uint64_t msr_hv_vapic;
diff --git a/target/i386/hax/hax-all.c b/target/i386/hax/hax-all.c
index 81f665e212..b185ee8de4 100644
--- a/target/i386/hax/hax-all.c
+++ b/target/i386/hax/hax-all.c
@@ -27,7 +27,6 @@
 #include "cpu.h"
 #include "exec/address-spaces.h"
 
-#include "qemu-common.h"
 #include "qemu/accel.h"
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
diff --git a/target/i386/hax/hax-mem.c b/target/i386/hax/hax-mem.c
index a226d174d8..05dbe8cce3 100644
--- a/target/i386/hax/hax-mem.c
+++ b/target/i386/hax/hax-mem.c
@@ -188,15 +188,15 @@ static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
     /* Adjust start_pa and size so that they are page-aligned. (Cf
      * kvm_set_phys_mem() in kvm-all.c).
      */
-    delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
-    delta &= ~qemu_real_host_page_mask;
+    delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
+    delta &= ~qemu_real_host_page_mask();
     if (delta > size) {
         return;
     }
     start_pa += delta;
     size -= delta;
-    size &= qemu_real_host_page_mask;
-    if (!size || (start_pa & ~qemu_real_host_page_mask)) {
+    size &= qemu_real_host_page_mask();
+    if (!size || (start_pa & ~qemu_real_host_page_mask())) {
         return;
     }
 
@@ -214,7 +214,7 @@ static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
      * call into the kernel. Instead, we split the mapping into smaller ones,
      * and call hax_update_mapping() on each.
      */
-    max_mapping_size = UINT32_MAX & qemu_real_host_page_mask;
+    max_mapping_size = UINT32_MAX & qemu_real_host_page_mask();
     while (size > max_mapping_size) {
         hax_update_mapping(start_pa, max_mapping_size, host_va, flags);
         start_pa += max_mapping_size;
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index fc12c02fb2..f8833277ab 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -47,7 +47,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "qemu/memalign.h"
 
diff --git a/target/i386/hvf/x86.c b/target/i386/hvf/x86.c
index 91a3fe002c..d086584f26 100644
--- a/target/i386/hvf/x86.c
+++ b/target/i386/hvf/x86.c
@@ -19,7 +19,6 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
-#include "qemu-common.h"
 #include "x86_decode.h"
 #include "x86_emu.h"
 #include "vmcs.h"
diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c
index 32b0d131df..f24dd50e48 100644
--- a/target/i386/hvf/x86_cpuid.c
+++ b/target/i386/hvf/x86_cpuid.c
@@ -21,7 +21,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "cpu.h"
 #include "x86.h"
 #include "vmx.h"
diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c
index 062713b1a4..3728d7705e 100644
--- a/target/i386/hvf/x86_decode.c
+++ b/target/i386/hvf/x86_decode.c
@@ -18,7 +18,6 @@
 
 #include "qemu/osdep.h"
 
-#include "qemu-common.h"
 #include "panic.h"
 #include "x86_decode.h"
 #include "vmx.h"
diff --git a/target/i386/hvf/x86_descr.c b/target/i386/hvf/x86_descr.c
index af15c06ac5..a484942cfc 100644
--- a/target/i386/hvf/x86_descr.c
+++ b/target/i386/hvf/x86_descr.c
@@ -18,7 +18,6 @@
 
 #include "qemu/osdep.h"
 
-#include "qemu-common.h"
 #include "vmx.h"
 #include "x86_descr.h"
 
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index 050428795b..f5704f63e8 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -37,7 +37,6 @@
 
 #include "qemu/osdep.h"
 #include "panic.h"
-#include "qemu-common.h"
 #include "x86_decode.h"
 #include "x86.h"
 #include "x86_emu.h"
diff --git a/target/i386/hvf/x86_flags.c b/target/i386/hvf/x86_flags.c
index fecbca7517..03d6de5efc 100644
--- a/target/i386/hvf/x86_flags.c
+++ b/target/i386/hvf/x86_flags.c
@@ -23,7 +23,6 @@
 
 #include "qemu/osdep.h"
 
-#include "qemu-common.h"
 #include "panic.h"
 #include "cpu.h"
 #include "x86_flags.h"
diff --git a/target/i386/hvf/x86_mmu.c b/target/i386/hvf/x86_mmu.c
index df0b91cd42..96d117567e 100644
--- a/target/i386/hvf/x86_mmu.c
+++ b/target/i386/hvf/x86_mmu.c
@@ -18,7 +18,6 @@
 
 #include "qemu/osdep.h"
 #include "panic.h"
-#include "qemu-common.h"
 #include "cpu.h"
 #include "x86.h"
 #include "x86_mmu.h"
diff --git a/target/i386/hvf/x86_task.c b/target/i386/hvf/x86_task.c
index d24daf6a41..beaeec0687 100644
--- a/target/i386/hvf/x86_task.c
+++ b/target/i386/hvf/x86_task.c
@@ -8,7 +8,6 @@
 // GNU General Public License for more details.
 #include "qemu/osdep.h"
 #include "panic.h"
-#include "qemu-common.h"
 #include "qemu/error-report.h"
 
 #include "sysemu/hvf.h"
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index bec9fc5814..69d4fb8cf5 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -19,7 +19,6 @@
 
 #include "qemu/osdep.h"
 
-#include "qemu-common.h"
 #include "x86hvf.h"
 #include "vmx.h"
 #include "vmcs.h"
diff --git a/target/i386/kvm/hyperv-proto.h b/target/i386/kvm/hyperv-proto.h
index 89f81afda7..e40e59411c 100644
--- a/target/i386/kvm/hyperv-proto.h
+++ b/target/i386/kvm/hyperv-proto.h
@@ -19,6 +19,9 @@
 #define HV_CPUID_ENLIGHTMENT_INFO             0x40000004
 #define HV_CPUID_IMPLEMENT_LIMITS             0x40000005
 #define HV_CPUID_NESTED_FEATURES              0x4000000A
+#define HV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS    0x40000080
+#define HV_CPUID_SYNDBG_INTERFACE                   0x40000081
+#define HV_CPUID_SYNDBG_PLATFORM_CAPABILITIES       0x40000082
 #define HV_CPUID_MIN                          0x40000005
 #define HV_CPUID_MAX                          0x4000ffff
 #define HV_HYPERVISOR_PRESENT_BIT             0x80000000
@@ -55,9 +58,15 @@
 #define HV_GUEST_IDLE_STATE_AVAILABLE           (1u << 5)
 #define HV_FREQUENCY_MSRS_AVAILABLE             (1u << 8)
 #define HV_GUEST_CRASH_MSR_AVAILABLE            (1u << 10)
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE         (1u << 11)
 #define HV_STIMER_DIRECT_MODE_AVAILABLE         (1u << 19)
 
 /*
+ * HV_CPUID_FEATURES.EBX bits
+ */
+#define HV_PARTITION_DEBUGGING_ALLOWED          (1u << 12)
+
+/*
  * HV_CPUID_ENLIGHTMENT_INFO.EAX bits
  */
 #define HV_AS_SWITCH_RECOMMENDED            (1u << 0)
@@ -73,6 +82,11 @@
 #define HV_NO_NONARCH_CORESHARING           (1u << 18)
 
 /*
+ * HV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX bits
+ */
+#define HV_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING    (1u << 1)
+
+/*
  * Basic virtualized MSRs
  */
 #define HV_X64_MSR_GUEST_OS_ID                0x40000000
@@ -131,6 +145,18 @@
 #define HV_X64_MSR_STIMER3_COUNT                0x400000B7
 
 /*
+ * Hyper-V Synthetic debug options MSR
+ */
+#define HV_X64_MSR_SYNDBG_CONTROL               0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS                0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER           0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER           0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER        0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS               0x400000FF
+
+#define HV_X64_SYNDBG_OPTION_USE_HCALLS         BIT(2)
+
+/*
  * Guest crash notification MSRs
  */
 #define HV_X64_MSR_CRASH_P0                     0x40000100
@@ -168,5 +194,16 @@
 
 #define HV_STIMER_COUNT                       4
 
+/*
+ * Synthetic debugger control definitions
+ */
+#define HV_SYNDBG_CONTROL_SEND              (1u << 0)
+#define HV_SYNDBG_CONTROL_RECV              (1u << 1)
+#define HV_SYNDBG_CONTROL_SEND_SIZE(ctl)    ((ctl >> 16) & 0xffff)
+#define HV_SYNDBG_STATUS_INVALID            (0)
+#define HV_SYNDBG_STATUS_SEND_SUCCESS       (1u << 0)
+#define HV_SYNDBG_STATUS_RECV_SUCCESS       (1u << 2)
+#define HV_SYNDBG_STATUS_RESET              (1u << 3)
+#define HV_SYNDBG_STATUS_SET_SIZE(st, sz)   (st | (sz << 16))
 
 #endif
diff --git a/target/i386/kvm/hyperv-stub.c b/target/i386/kvm/hyperv-stub.c
index 0028527e79..778ed782e6 100644
--- a/target/i386/kvm/hyperv-stub.c
+++ b/target/i386/kvm/hyperv-stub.c
@@ -28,6 +28,12 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit)
     case KVM_EXIT_HYPERV_HCALL:
         exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE;
         return 0;
+    case KVM_EXIT_HYPERV_SYNDBG:
+        if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) {
+            return -1;
+        }
+
+        return 0;
     default:
         return -1;
     }
diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c
index 26efc1e0e6..9026ef3a81 100644
--- a/target/i386/kvm/hyperv.c
+++ b/target/i386/kvm/hyperv.c
@@ -81,20 +81,66 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit)
     case KVM_EXIT_HYPERV_HCALL: {
         uint16_t code = exit->u.hcall.input & 0xffff;
         bool fast = exit->u.hcall.input & HV_HYPERCALL_FAST;
-        uint64_t param = exit->u.hcall.params[0];
+        uint64_t in_param = exit->u.hcall.params[0];
+        uint64_t out_param = exit->u.hcall.params[1];
 
         switch (code) {
         case HV_POST_MESSAGE:
-            exit->u.hcall.result = hyperv_hcall_post_message(param, fast);
+            exit->u.hcall.result = hyperv_hcall_post_message(in_param, fast);
             break;
         case HV_SIGNAL_EVENT:
-            exit->u.hcall.result = hyperv_hcall_signal_event(param, fast);
+            exit->u.hcall.result = hyperv_hcall_signal_event(in_param, fast);
+            break;
+        case HV_POST_DEBUG_DATA:
+            exit->u.hcall.result =
+                hyperv_hcall_post_dbg_data(in_param, out_param, fast);
+            break;
+        case HV_RETRIEVE_DEBUG_DATA:
+            exit->u.hcall.result =
+                hyperv_hcall_retreive_dbg_data(in_param, out_param, fast);
+            break;
+        case HV_RESET_DEBUG_SESSION:
+            exit->u.hcall.result =
+                hyperv_hcall_reset_dbg_session(out_param);
             break;
         default:
             exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE;
         }
         return 0;
     }
+
+    case KVM_EXIT_HYPERV_SYNDBG:
+        if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) {
+            return -1;
+        }
+
+        switch (exit->u.syndbg.msr) {
+        case HV_X64_MSR_SYNDBG_CONTROL: {
+            uint64_t control = exit->u.syndbg.control;
+            env->msr_hv_syndbg_control = control;
+            env->msr_hv_syndbg_send_page = exit->u.syndbg.send_page;
+            env->msr_hv_syndbg_recv_page = exit->u.syndbg.recv_page;
+            exit->u.syndbg.status = HV_STATUS_SUCCESS;
+            if (control & HV_SYNDBG_CONTROL_SEND) {
+                exit->u.syndbg.status =
+                    hyperv_syndbg_send(env->msr_hv_syndbg_send_page,
+                            HV_SYNDBG_CONTROL_SEND_SIZE(control));
+            } else if (control & HV_SYNDBG_CONTROL_RECV) {
+                exit->u.syndbg.status =
+                    hyperv_syndbg_recv(env->msr_hv_syndbg_recv_page,
+                            TARGET_PAGE_SIZE);
+            }
+            break;
+        }
+        case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
+            env->msr_hv_syndbg_pending_page = exit->u.syndbg.pending_page;
+            hyperv_syndbg_set_pending_page(env->msr_hv_syndbg_pending_page);
+            break;
+        default:
+            return -1;
+        }
+
+        return 0;
     default:
         return -1;
     }
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 9cf8e03669..c885763a5b 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -54,6 +54,8 @@
 #include "exec/memattrs.h"
 #include "trace.h"
 
+#include CONFIG_DEVICES
+
 //#define DEBUG_KVM
 
 #ifdef DEBUG_KVM
@@ -104,6 +106,7 @@ static bool has_msr_hv_synic;
 static bool has_msr_hv_stimer;
 static bool has_msr_hv_frequencies;
 static bool has_msr_hv_reenlightenment;
+static bool has_msr_hv_syndbg_options;
 static bool has_msr_xss;
 static bool has_msr_umwait;
 static bool has_msr_spec_ctrl;
@@ -964,6 +967,16 @@ static struct {
              .bits = HV_DEPRECATING_AEOI_RECOMMENDED}
         }
     },
+#ifdef CONFIG_SYNDBG
+    [HYPERV_FEAT_SYNDBG] = {
+        .desc = "Enable synthetic kernel debugger channel (hv-syndbg)",
+        .flags = {
+            {.func = HV_CPUID_FEATURES, .reg = R_EDX,
+             .bits = HV_FEATURE_DEBUG_MSRS_AVAILABLE}
+        },
+        .dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_RELAXED)
+    },
+#endif
 };
 
 static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max,
@@ -1004,8 +1017,8 @@ static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max,
 static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs)
 {
     struct kvm_cpuid2 *cpuid;
-    /* 0x40000000..0x40000005, 0x4000000A, 0x40000080..0x40000080 leaves */
-    int max = 10;
+    /* 0x40000000..0x40000005, 0x4000000A, 0x40000080..0x40000082 leaves */
+    int max = 11;
     int i;
     bool do_sys_ioctl;
 
@@ -1118,6 +1131,12 @@ static struct kvm_cpuid2 *get_supported_hv_cpuid_legacy(CPUState *cs)
         entry_feat->eax |= HV_SYNTIMERS_AVAILABLE;
     }
 
+    if (has_msr_hv_syndbg_options) {
+        entry_feat->edx |= HV_GUEST_DEBUGGING_AVAILABLE;
+        entry_feat->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
+        entry_feat->ebx |= HV_PARTITION_DEBUGGING_ALLOWED;
+    }
+
     if (kvm_check_extension(cs->kvm_state,
                             KVM_CAP_HYPERV_TLBFLUSH) > 0) {
         entry_recomm->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED;
@@ -1369,12 +1388,22 @@ static int hyperv_fill_cpuids(CPUState *cs,
 {
     X86CPU *cpu = X86_CPU(cs);
     struct kvm_cpuid_entry2 *c;
-    uint32_t cpuid_i = 0;
+    uint32_t signature[3];
+    uint32_t cpuid_i = 0, max_cpuid_leaf = 0;
+
+    max_cpuid_leaf = HV_CPUID_IMPLEMENT_LIMITS;
+    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
+        max_cpuid_leaf = MAX(max_cpuid_leaf, HV_CPUID_NESTED_FEATURES);
+    }
+
+    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) {
+        max_cpuid_leaf =
+            MAX(max_cpuid_leaf, HV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
+    }
 
     c = &cpuid_ent[cpuid_i++];
     c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
-    c->eax = hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ?
-        HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS;
+    c->eax = max_cpuid_leaf;
     c->ebx = cpu->hyperv_vendor_id[0];
     c->ecx = cpu->hyperv_vendor_id[1];
     c->edx = cpu->hyperv_vendor_id[2];
@@ -1453,6 +1482,33 @@ static int hyperv_fill_cpuids(CPUState *cs,
         c->eax = cpu->hyperv_nested[0];
     }
 
+    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) {
+        c = &cpuid_ent[cpuid_i++];
+        c->function = HV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS;
+        c->eax = hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ?
+            HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS;
+        memcpy(signature, "Microsoft VS", 12);
+        c->eax = 0;
+        c->ebx = signature[0];
+        c->ecx = signature[1];
+        c->edx = signature[2];
+
+        c = &cpuid_ent[cpuid_i++];
+        c->function = HV_CPUID_SYNDBG_INTERFACE;
+        memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12);
+        c->eax = signature[0];
+        c->ebx = 0;
+        c->ecx = 0;
+        c->edx = 0;
+
+        c = &cpuid_ent[cpuid_i++];
+        c->function = HV_CPUID_SYNDBG_PLATFORM_CAPABILITIES;
+        c->eax = HV_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+        c->ebx = 0;
+        c->ecx = 0;
+        c->edx = 0;
+    }
+
     return cpuid_i;
 }
 
@@ -2261,6 +2317,9 @@ static int kvm_get_supported_msrs(KVMState *s)
             case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
                 has_msr_hv_reenlightenment = true;
                 break;
+            case HV_X64_MSR_SYNDBG_OPTIONS:
+                has_msr_hv_syndbg_options = true;
+                break;
             case MSR_IA32_SPEC_CTRL:
                 has_msr_spec_ctrl = true;
                 break;
@@ -3178,6 +3237,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
                 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS,
                                   env->msr_hv_tsc_emulation_status);
             }
+#ifdef CONFIG_SYNDBG
+            if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG) &&
+                has_msr_hv_syndbg_options) {
+                kvm_msr_entry_add(cpu, HV_X64_MSR_SYNDBG_OPTIONS,
+                                  hyperv_syndbg_query_options());
+            }
+#endif
         }
         if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC)) {
             kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE,
@@ -3619,6 +3685,9 @@ static int kvm_get_msrs(X86CPU *cpu)
         kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
         kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, 0);
     }
+    if (has_msr_hv_syndbg_options) {
+        kvm_msr_entry_add(cpu, HV_X64_MSR_SYNDBG_OPTIONS, 0);
+    }
     if (has_msr_hv_crash) {
         int j;
 
@@ -3910,6 +3979,9 @@ static int kvm_get_msrs(X86CPU *cpu)
         case HV_X64_MSR_TSC_EMULATION_STATUS:
             env->msr_hv_tsc_emulation_status = msrs[i].data;
             break;
+        case HV_X64_MSR_SYNDBG_OPTIONS:
+            env->msr_hv_syndbg_options = msrs[i].data;
+            break;
         case MSR_MTRRdefType:
             env->mtrr_deftype = msrs[i].data;
             break;
diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c
index 6080c007a2..1be5341e8a 100644
--- a/target/i386/kvm/sev-stub.c
+++ b/target/i386/kvm/sev-stub.c
@@ -12,7 +12,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "sev.h"
 
 int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index b97d091a50..b75738ee9c 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -11,7 +11,6 @@
 #include "cpu.h"
 #include "exec/address-spaces.h"
 #include "exec/ioport.h"
-#include "qemu-common.h"
 #include "qemu/accel.h"
 #include "sysemu/nvmm.h"
 #include "sysemu/cpus.h"
@@ -1075,15 +1074,15 @@ nvmm_process_section(MemoryRegionSection *section, int add)
     }
 
     /* Adjust start_pa and size so that they are page-aligned. */
-    delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
-    delta &= ~qemu_real_host_page_mask;
+    delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
+    delta &= ~qemu_real_host_page_mask();
     if (delta > size) {
         return;
     }
     start_pa += delta;
     size -= delta;
-    size &= qemu_real_host_page_mask;
-    if (!size || (start_pa & ~qemu_real_host_page_mask)) {
+    size &= qemu_real_host_page_mask();
+    if (!size || (start_pa & ~qemu_real_host_page_mask())) {
         return;
     }
 
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 6f1fc174b3..e4d74b814a 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -22,6 +22,7 @@
 
 #if SHIFT == 0
 #define Reg MMXReg
+#define SIZE 8
 #define XMM_ONLY(...)
 #define B(n) MMX_B(n)
 #define W(n) MMX_W(n)
@@ -30,6 +31,7 @@
 #define SUFFIX _mmx
 #else
 #define Reg ZMMReg
+#define SIZE 16
 #define XMM_ONLY(...) __VA_ARGS__
 #define B(n) ZMM_B(n)
 #define W(n) ZMM_W(n)
@@ -38,6 +40,22 @@
 #define SUFFIX _xmm
 #endif
 
+/*
+ * Copy the relevant parts of a Reg value around. In the case where
+ * sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of
+ * a 64 byte ZMMReg, so we must copy only those and keep the top bytes
+ * untouched in the guest-visible destination destination register.
+ * Note that the "lower bytes" are placed last in memory on big-endian
+ * hosts, which store the vector backwards in memory.  In that case the
+ * copy *starts* at B(SIZE - 1) and ends at B(0), the opposite of
+ * the little-endian case.
+ */
+#if HOST_BIG_ENDIAN
+#define MOVE(d, r) memcpy(&((d).B(SIZE - 1)), &(r).B(SIZE - 1), SIZE)
+#else
+#define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
+#endif
+
 void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift;
@@ -516,7 +534,7 @@ void glue(helper_pshufw, SUFFIX)(Reg *d, Reg *s, int order)
     r.W(1) = s->W((order >> 2) & 3);
     r.W(2) = s->W((order >> 4) & 3);
     r.W(3) = s->W((order >> 6) & 3);
-    *d = r;
+    MOVE(*d, r);
 }
 #else
 void helper_shufps(Reg *d, Reg *s, int order)
@@ -527,7 +545,7 @@ void helper_shufps(Reg *d, Reg *s, int order)
     r.L(1) = d->L((order >> 2) & 3);
     r.L(2) = s->L((order >> 4) & 3);
     r.L(3) = s->L((order >> 6) & 3);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_shufpd(Reg *d, Reg *s, int order)
@@ -536,7 +554,7 @@ void helper_shufpd(Reg *d, Reg *s, int order)
 
     r.Q(0) = d->Q(order & 1);
     r.Q(1) = s->Q((order >> 1) & 1);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order)
@@ -547,7 +565,7 @@ void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order)
     r.L(1) = s->L((order >> 2) & 3);
     r.L(2) = s->L((order >> 4) & 3);
     r.L(3) = s->L((order >> 6) & 3);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order)
@@ -559,7 +577,7 @@ void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order)
     r.W(2) = s->W((order >> 4) & 3);
     r.W(3) = s->W((order >> 6) & 3);
     r.Q(1) = s->Q(1);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order)
@@ -571,7 +589,7 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order)
     r.W(5) = s->W(4 + ((order >> 2) & 3));
     r.W(6) = s->W(4 + ((order >> 4) & 3));
     r.W(7) = s->W(4 + ((order >> 6) & 3));
-    *d = r;
+    MOVE(*d, r);
 }
 #endif
 
@@ -937,7 +955,7 @@ void helper_haddps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
     r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
     r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
     r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_haddpd(CPUX86State *env, ZMMReg *d, ZMMReg *s)
@@ -946,7 +964,7 @@ void helper_haddpd(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 
     r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
     r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_hsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
@@ -957,7 +975,7 @@ void helper_hsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
     r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
     r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
     r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_hsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s)
@@ -966,7 +984,7 @@ void helper_hsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 
     r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
     r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_addsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
@@ -1153,7 +1171,7 @@ void glue(helper_packsswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     r.B(14) = satsb((int16_t)s->W(6));
     r.B(15) = satsb((int16_t)s->W(7));
 #endif
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1180,7 +1198,7 @@ void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     r.B(14) = satub((int16_t)s->W(6));
     r.B(15) = satub((int16_t)s->W(7));
 #endif
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1199,7 +1217,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     r.W(6) = satsw(s->L(2));
     r.W(7) = satsw(s->L(3));
 #endif
-    *d = r;
+    MOVE(*d, r);
 }
 
 #define UNPCK_OP(base_name, base)                                       \
@@ -1227,7 +1245,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
                  r.B(14) = d->B((base << (SHIFT + 2)) + 7);             \
                  r.B(15) = s->B((base << (SHIFT + 2)) + 7);             \
                                                                       ) \
-            *d = r;                                                     \
+        MOVE(*d, r);                                                    \
     }                                                                   \
                                                                         \
     void glue(helper_punpck ## base_name ## wd, SUFFIX)(CPUX86State *env,\
@@ -1245,7 +1263,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
                  r.W(6) = d->W((base << (SHIFT + 1)) + 3);              \
                  r.W(7) = s->W((base << (SHIFT + 1)) + 3);              \
                                                                       ) \
-            *d = r;                                                     \
+            MOVE(*d, r);                                                \
     }                                                                   \
                                                                         \
     void glue(helper_punpck ## base_name ## dq, SUFFIX)(CPUX86State *env,\
@@ -1259,7 +1277,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
                  r.L(2) = d->L((base << SHIFT) + 1);                    \
                  r.L(3) = s->L((base << SHIFT) + 1);                    \
                                                                       ) \
-            *d = r;                                                     \
+            MOVE(*d, r);                                                \
     }                                                                   \
                                                                         \
     XMM_ONLY(                                                           \
@@ -1272,7 +1290,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
                                                                         \
                  r.Q(0) = d->Q(base);                                   \
                  r.Q(1) = s->Q(base);                                   \
-                 *d = r;                                                \
+                 MOVE(*d, r);                                           \
              }                                                          \
                                                                         )
 
@@ -1313,7 +1331,7 @@ void helper_pfacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 
     r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
     r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_pfadd(CPUX86State *env, MMXReg *d, MMXReg *s)
@@ -1378,7 +1396,7 @@ void helper_pfnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 
     r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
     r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
@@ -1387,7 +1405,7 @@ void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 
     r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
     r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_pfrcp(CPUX86State *env, MMXReg *d, MMXReg *s)
@@ -1424,7 +1442,7 @@ void helper_pswapd(CPUX86State *env, MMXReg *d, MMXReg *s)
 
     r.MMX_L(0) = s->MMX_L(1);
     r.MMX_L(1) = s->MMX_L(0);
-    *d = r;
+    MOVE(*d, r);
 }
 #endif
 
@@ -1438,7 +1456,7 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
         r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1)));
     }
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1455,7 +1473,7 @@ void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
     XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1467,7 +1485,7 @@ void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
     XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1483,7 +1501,7 @@ void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
     XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1585,7 +1603,7 @@ void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
 #undef SHR
     }
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 #define XMM0 (env->xmm_regs[0])
@@ -1718,7 +1736,7 @@ void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     r.W(5) = satuw((int32_t) s->L(1));
     r.W(6) = satuw((int32_t) s->L(2));
     r.W(7) = satuw((int32_t) s->L(3));
-    *d = r;
+    MOVE(*d, r);
 }
 
 #define FMINSB(d, s) MIN((int8_t)d, (int8_t)s)
@@ -1984,7 +2002,7 @@ void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
         r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3));
     }
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 /* SSE4.2 op helpers */
@@ -2324,3 +2342,4 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
 #undef L
 #undef Q
 #undef SUFFIX
+#undef SIZE
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 025ff7a6f8..32f7dbac4e 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -531,12 +531,46 @@ e_free:
     return 1;
 }
 
+static int sev_get_cpu0_id(int fd, guchar **id, size_t *id_len, Error **errp)
+{
+    guchar *id_data;
+    struct sev_user_data_get_id2 get_id2 = {};
+    int err, r;
+
+    /* query the ID length */
+    r = sev_platform_ioctl(fd, SEV_GET_ID2, &get_id2, &err);
+    if (r < 0 && err != SEV_RET_INVALID_LEN) {
+        error_setg(errp, "SEV: Failed to get ID ret=%d fw_err=%d (%s)",
+                   r, err, fw_error_to_str(err));
+        return 1;
+    }
+
+    id_data = g_new(guchar, get_id2.length);
+    get_id2.address = (unsigned long)id_data;
+
+    r = sev_platform_ioctl(fd, SEV_GET_ID2, &get_id2, &err);
+    if (r < 0) {
+        error_setg(errp, "SEV: Failed to get ID ret=%d fw_err=%d (%s)",
+                   r, err, fw_error_to_str(err));
+        goto err;
+    }
+
+    *id = id_data;
+    *id_len = get_id2.length;
+    return 0;
+
+err:
+    g_free(id_data);
+    return 1;
+}
+
 static SevCapability *sev_get_capabilities(Error **errp)
 {
     SevCapability *cap = NULL;
     guchar *pdh_data = NULL;
     guchar *cert_chain_data = NULL;
-    size_t pdh_len = 0, cert_chain_len = 0;
+    guchar *cpu0_id_data = NULL;
+    size_t pdh_len = 0, cert_chain_len = 0, cpu0_id_len = 0;
     uint32_t ebx;
     int fd;
 
@@ -561,9 +595,14 @@ static SevCapability *sev_get_capabilities(Error **errp)
         goto out;
     }
 
+    if (sev_get_cpu0_id(fd, &cpu0_id_data, &cpu0_id_len, errp)) {
+        goto out;
+    }
+
     cap = g_new0(SevCapability, 1);
     cap->pdh = g_base64_encode(pdh_data, pdh_len);
     cap->cert_chain = g_base64_encode(cert_chain_data, cert_chain_len);
+    cap->cpu0_id = g_base64_encode(cpu0_id_data, cpu0_id_len);
 
     host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
     cap->cbitpos = ebx & 0x3f;
@@ -575,6 +614,7 @@ static SevCapability *sev_get_capabilities(Error **errp)
     cap->reduced_phys_bits = 1;
 
 out:
+    g_free(cpu0_id_data);
     g_free(pdh_data);
     g_free(cert_chain_data);
     close(fd);
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index c393913fe0..3ba1c99fff 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -359,7 +359,7 @@ static void gen_update_cc_op(DisasContext *s)
 
 #endif /* !TARGET_X86_64 */
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c
index dd2a9f7657..e8dc4b3a47 100644
--- a/target/i386/whpx/whpx-accel-ops.c
+++ b/target/i386/whpx/whpx-accel-ops.c
@@ -100,6 +100,7 @@ static void whpx_accel_ops_class_init(ObjectClass *oc, void *data)
     ops->synchronize_post_init = whpx_cpu_synchronize_post_init;
     ops->synchronize_state = whpx_cpu_synchronize_state;
     ops->synchronize_pre_loadvm = whpx_cpu_synchronize_pre_loadvm;
+    ops->synchronize_pre_resume = whpx_cpu_synchronize_pre_resume;
 }
 
 static const TypeInfo whpx_accel_ops_type = {
diff --git a/target/i386/whpx/whpx-accel-ops.h b/target/i386/whpx/whpx-accel-ops.h
index 2dee6d61ea..b5102dd1ee 100644
--- a/target/i386/whpx/whpx-accel-ops.h
+++ b/target/i386/whpx/whpx-accel-ops.h
@@ -21,6 +21,7 @@ void whpx_cpu_synchronize_state(CPUState *cpu);
 void whpx_cpu_synchronize_post_reset(CPUState *cpu);
 void whpx_cpu_synchronize_post_init(CPUState *cpu);
 void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu);
+void whpx_cpu_synchronize_pre_resume(bool step_pending);
 
 /* state subset only touched by the VCPU itself during runtime */
 #define WHPX_SET_RUNTIME_STATE   1
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index 03ba52da89..b625ad5bbb 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -12,7 +12,7 @@
 #include "cpu.h"
 #include "exec/address-spaces.h"
 #include "exec/ioport.h"
-#include "qemu-common.h"
+#include "exec/gdbstub.h"
 #include "qemu/accel.h"
 #include "sysemu/whpx.h"
 #include "sysemu/cpus.h"
@@ -148,6 +148,87 @@ struct whpx_register_set {
     WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
 };
 
+/*
+ * The current implementation of instruction stepping sets the TF flag
+ * in RFLAGS, causing the CPU to raise an INT1 after each instruction.
+ * This corresponds to the WHvX64ExceptionTypeDebugTrapOrFault exception.
+ *
+ * This approach has a few limitations:
+ *     1. Stepping over a PUSHF/SAHF instruction will save the TF flag
+ *        along with the other flags, possibly restoring it later. It would
+ *        result in another INT1 when the flags are restored, triggering
+ *        a stop in gdb that could be cleared by doing another step.
+ *
+ *        Stepping over a POPF/LAHF instruction will let it overwrite the
+ *        TF flags, ending the stepping mode.
+ *
+ *     2. Stepping over an instruction raising an exception (e.g. INT, DIV,
+ *        or anything that could result in a page fault) will save the flags
+ *        to the stack, clear the TF flag, and let the guest execute the
+ *        handler. Normally, the guest will restore the original flags,
+ *        that will continue single-stepping.
+ *
+ *     3. Debuggers running on the guest may wish to set TF to do instruction
+ *        stepping. INT1 events generated by it would be intercepted by us,
+ *        as long as the gdb is connected to QEMU.
+ *
+ * In practice this means that:
+ *     1. Stepping through flags-modifying instructions may cause gdb to
+ *        continue or stop in unexpected places. This will be fully recoverable
+ *        and will not crash the target.
+ *
+ *     2. Stepping over an instruction that triggers an exception will step
+ *        over the exception handler, not into it.
+ *
+ *     3. Debugging the guest via gdb, while running debugger on the guest
+ *        at the same time may lead to unexpected effects. Removing all
+ *        breakpoints set via QEMU will prevent any further interference
+ *        with the guest-level debuggers.
+ *
+ * The limitations can be addressed as shown below:
+ *     1. PUSHF/SAHF/POPF/LAHF/IRET instructions can be emulated instead of
+ *        stepping through them. The exact semantics of the instructions is
+ *        defined in the "Combined Volume Set of Intel 64 and IA-32
+ *        Architectures Software Developer's Manuals", however it involves a
+ *        fair amount of corner cases due to compatibility with real mode,
+ *        virtual 8086 mode, and differences between 64-bit and 32-bit modes.
+ *
+ *     2. We could step into the guest's exception handlers using the following
+ *        sequence:
+ *          a. Temporarily enable catching of all exception types via
+ *             whpx_set_exception_exit_bitmap().
+ *          b. Once an exception is intercepted, read the IDT/GDT and locate
+ *             the original handler.
+ *          c. Patch the original handler, injecting an INT3 at the beginning.
+ *          d. Update the exception exit bitmap to only catch the
+ *             WHvX64ExceptionTypeBreakpointTrap exception.
+ *          e. Let the affected CPU run in the exclusive mode.
+ *          f. Restore the original handler and the exception exit bitmap.
+ *        Note that handling all corner cases related to IDT/GDT is harder
+ *        than it may seem. See x86_cpu_get_phys_page_attrs_debug() for a
+ *        rough idea.
+ *
+ *     3. In order to properly support guest-level debugging in parallel with
+ *        the QEMU-level debugging, we would need to be able to pass some INT1
+ *        events to the guest. This could be done via the following methods:
+ *          a. Using the WHvRegisterPendingEvent register. As of Windows 21H1,
+ *             it seems to only work for interrupts and not software
+ *             exceptions.
+ *          b. Locating and patching the original handler by parsing IDT/GDT.
+ *             This involves relatively complex logic outlined in the previous
+ *             paragraph.
+ *          c. Emulating the exception invocation (i.e. manually updating RIP,
+ *             RFLAGS, and pushing the old values to stack). This is even more
+ *             complicated than the previous option, since it involves checking
+ *             CPL, gate attributes, and doing various adjustments depending
+ *             on the current CPU mode, whether the CPL is changing, etc.
+ */
+typedef enum WhpxStepMode {
+    WHPX_STEP_NONE = 0,
+    /* Halt other VCPUs */
+    WHPX_STEP_EXCLUSIVE,
+} WhpxStepMode;
+
 struct whpx_vcpu {
     WHV_EMULATOR_HANDLE emulator;
     bool window_registered;
@@ -786,6 +867,517 @@ static int whpx_handle_portio(CPUState *cpu,
     return 0;
 }
 
+/*
+ * Controls whether we should intercept various exceptions on the guest,
+ * namely breakpoint/single-step events.
+ *
+ * The 'exceptions' argument accepts a bitmask, e.g:
+ * (1 << WHvX64ExceptionTypeDebugTrapOrFault) | (...)
+ */
+static HRESULT whpx_set_exception_exit_bitmap(UINT64 exceptions)
+{
+    struct whpx_state *whpx = &whpx_global;
+    WHV_PARTITION_PROPERTY prop = { 0, };
+    HRESULT hr;
+
+    if (exceptions == whpx->exception_exit_bitmap) {
+        return S_OK;
+    }
+
+    prop.ExceptionExitBitmap = exceptions;
+
+    hr = whp_dispatch.WHvSetPartitionProperty(
+        whpx->partition,
+        WHvPartitionPropertyCodeExceptionExitBitmap,
+        &prop,
+        sizeof(WHV_PARTITION_PROPERTY));
+
+    if (SUCCEEDED(hr)) {
+        whpx->exception_exit_bitmap = exceptions;
+    }
+
+    return hr;
+}
+
+
+/*
+ * This function is called before/after stepping over a single instruction.
+ * It will update the CPU registers to arm/disarm the instruction stepping
+ * accordingly.
+ */
+static HRESULT whpx_vcpu_configure_single_stepping(CPUState *cpu,
+    bool set,
+    uint64_t *exit_context_rflags)
+{
+    WHV_REGISTER_NAME reg_name;
+    WHV_REGISTER_VALUE reg_value;
+    HRESULT hr;
+    struct whpx_state *whpx = &whpx_global;
+
+    /*
+     * If we are trying to step over a single instruction, we need to set the
+     * TF bit in rflags. Otherwise, clear it.
+     */
+    reg_name = WHvX64RegisterRflags;
+    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+        whpx->partition,
+        cpu->cpu_index,
+        &reg_name,
+        1,
+        &reg_value);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to get rflags, hr=%08lx", hr);
+        return hr;
+    }
+
+    if (exit_context_rflags) {
+        assert(*exit_context_rflags == reg_value.Reg64);
+    }
+
+    if (set) {
+        /* Raise WHvX64ExceptionTypeDebugTrapOrFault after each instruction */
+        reg_value.Reg64 |= TF_MASK;
+    } else {
+        reg_value.Reg64 &= ~TF_MASK;
+    }
+
+    if (exit_context_rflags) {
+        *exit_context_rflags = reg_value.Reg64;
+    }
+
+    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+        whpx->partition,
+        cpu->cpu_index,
+        &reg_name,
+        1,
+        &reg_value);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to set rflags,"
+            " hr=%08lx",
+            hr);
+        return hr;
+    }
+
+    reg_name = WHvRegisterInterruptState;
+    reg_value.Reg64 = 0;
+
+    /* Suspend delivery of hardware interrupts during single-stepping. */
+    reg_value.InterruptState.InterruptShadow = set != 0;
+
+    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+    whpx->partition,
+        cpu->cpu_index,
+        &reg_name,
+        1,
+        &reg_value);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to set InterruptState,"
+            " hr=%08lx",
+            hr);
+        return hr;
+    }
+
+    if (!set) {
+        /*
+         * We have just finished stepping over a single instruction,
+         * and intercepted the INT1 generated by it.
+         * We need to now hide the INT1 from the guest,
+         * as it would not be expecting it.
+         */
+
+        reg_name = WHvX64RegisterPendingDebugException;
+        hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+        whpx->partition,
+            cpu->cpu_index,
+            &reg_name,
+            1,
+            &reg_value);
+
+        if (FAILED(hr)) {
+            error_report("WHPX: Failed to get pending debug exceptions,"
+                         "hr=%08lx", hr);
+            return hr;
+        }
+
+        if (reg_value.PendingDebugException.SingleStep) {
+            reg_value.PendingDebugException.SingleStep = 0;
+
+            hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+                whpx->partition,
+                cpu->cpu_index,
+                &reg_name,
+                1,
+                &reg_value);
+
+            if (FAILED(hr)) {
+                error_report("WHPX: Failed to clear pending debug exceptions,"
+                             "hr=%08lx", hr);
+             return hr;
+            }
+        }
+
+    }
+
+    return S_OK;
+}
+
+/* Tries to find a breakpoint at the specified address. */
+static struct whpx_breakpoint *whpx_lookup_breakpoint_by_addr(uint64_t address)
+{
+    struct whpx_state *whpx = &whpx_global;
+    int i;
+
+    if (whpx->breakpoints.breakpoints) {
+        for (i = 0; i < whpx->breakpoints.breakpoints->used; i++) {
+            if (address == whpx->breakpoints.breakpoints->data[i].address) {
+                return &whpx->breakpoints.breakpoints->data[i];
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/*
+ * Linux uses int3 (0xCC) during startup (see int3_selftest()) and for
+ * debugging user-mode applications. Since the WHPX API does not offer
+ * an easy way to pass the intercepted exception back to the guest, we
+ * resort to using INT1 instead, and let the guest always handle INT3.
+ */
+static const uint8_t whpx_breakpoint_instruction = 0xF1;
+
+/*
+ * The WHPX QEMU backend implements breakpoints by writing the INT1
+ * instruction into memory (ignoring the DRx registers). This raises a few
+ * issues that need to be carefully handled:
+ *
+ * 1. Although unlikely, other parts of QEMU may set multiple breakpoints
+ *    at the same location, and later remove them in arbitrary order.
+ *    This should not cause memory corruption, and should only remove the
+ *    physical breakpoint instruction when the last QEMU breakpoint is gone.
+ *
+ * 2. Writing arbitrary virtual memory may fail if it's not mapped to a valid
+ *    physical location. Hence, physically adding/removing a breakpoint can
+ *    theoretically fail at any time. We need to keep track of it.
+ *
+ * The function below rebuilds a list of low-level breakpoints (one per
+ * address, tracking the original instruction and any errors) from the list of
+ * high-level breakpoints (set via cpu_breakpoint_insert()).
+ *
+ * In order to optimize performance, this function stores the list of
+ * high-level breakpoints (a.k.a. CPU breakpoints) used to compute the
+ * low-level ones, so that it won't be re-invoked until these breakpoints
+ * change.
+ *
+ * Note that this function decides which breakpoints should be inserted into,
+ * memory, but doesn't actually do it. The memory accessing is done in
+ * whpx_apply_breakpoints().
+ */
+static void whpx_translate_cpu_breakpoints(
+    struct whpx_breakpoints *breakpoints,
+    CPUState *cpu,
+    int cpu_breakpoint_count)
+{
+    CPUBreakpoint *bp;
+    int cpu_bp_index = 0;
+
+    breakpoints->original_addresses =
+        g_renew(vaddr, breakpoints->original_addresses, cpu_breakpoint_count);
+
+    breakpoints->original_address_count = cpu_breakpoint_count;
+
+    int max_breakpoints = cpu_breakpoint_count +
+        (breakpoints->breakpoints ? breakpoints->breakpoints->used : 0);
+
+    struct whpx_breakpoint_collection *new_breakpoints =
+        (struct whpx_breakpoint_collection *)g_malloc0(
+        sizeof(struct whpx_breakpoint_collection) +
+            max_breakpoints * sizeof(struct whpx_breakpoint));
+
+    new_breakpoints->allocated = max_breakpoints;
+    new_breakpoints->used = 0;
+
+    /*
+     * 1. Preserve all old breakpoints that could not be automatically
+     * cleared when the CPU got stopped.
+     */
+    if (breakpoints->breakpoints) {
+        int i;
+        for (i = 0; i < breakpoints->breakpoints->used; i++) {
+            if (breakpoints->breakpoints->data[i].state != WHPX_BP_CLEARED) {
+                new_breakpoints->data[new_breakpoints->used++] =
+                    breakpoints->breakpoints->data[i];
+            }
+        }
+    }
+
+    /* 2. Map all CPU breakpoints to WHPX breakpoints */
+    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
+        int i;
+        bool found = false;
+
+        /* This will be used to detect changed CPU breakpoints later. */
+        breakpoints->original_addresses[cpu_bp_index++] = bp->pc;
+
+        for (i = 0; i < new_breakpoints->used; i++) {
+            /*
+             * WARNING: This loop has O(N^2) complexity, where N is the
+             * number of breakpoints. It should not be a bottleneck in
+             * real-world scenarios, since it only needs to run once after
+             * the breakpoints have been modified.
+             * If this ever becomes a concern, it can be optimized by storing
+             * high-level breakpoint objects in a tree or hash map.
+             */
+
+            if (new_breakpoints->data[i].address == bp->pc) {
+                /* There was already a breakpoint at this address. */
+                if (new_breakpoints->data[i].state == WHPX_BP_CLEAR_PENDING) {
+                    new_breakpoints->data[i].state = WHPX_BP_SET;
+                } else if (new_breakpoints->data[i].state == WHPX_BP_SET) {
+                    new_breakpoints->data[i].state = WHPX_BP_SET_PENDING;
+                }
+
+                found = true;
+                break;
+            }
+        }
+
+        if (!found && new_breakpoints->used < new_breakpoints->allocated) {
+            /* No WHPX breakpoint at this address. Create one. */
+            new_breakpoints->data[new_breakpoints->used].address = bp->pc;
+            new_breakpoints->data[new_breakpoints->used].state =
+                WHPX_BP_SET_PENDING;
+            new_breakpoints->used++;
+        }
+    }
+
+    if (breakpoints->breakpoints) {
+        /*
+         * Free the previous breakpoint list. This can be optimized by keeping
+         * it as shadow buffer for the next computation instead of freeing
+         * it immediately.
+         */
+        g_free(breakpoints->breakpoints);
+    }
+
+    breakpoints->breakpoints = new_breakpoints;
+}
+
+/*
+ * Physically inserts/removes the breakpoints by reading and writing the
+ * physical memory, keeping a track of the failed attempts.
+ *
+ * Passing resuming=true  will try to set all previously unset breakpoints.
+ * Passing resuming=false will remove all inserted ones.
+ */
+static void whpx_apply_breakpoints(
+    struct whpx_breakpoint_collection *breakpoints,
+    CPUState *cpu,
+    bool resuming)
+{
+    int i, rc;
+    if (!breakpoints) {
+        return;
+    }
+
+    for (i = 0; i < breakpoints->used; i++) {
+        /* Decide what to do right now based on the last known state. */
+        WhpxBreakpointState state = breakpoints->data[i].state;
+        switch (state) {
+        case WHPX_BP_CLEARED:
+            if (resuming) {
+                state = WHPX_BP_SET_PENDING;
+            }
+            break;
+        case WHPX_BP_SET_PENDING:
+            if (!resuming) {
+                state = WHPX_BP_CLEARED;
+            }
+            break;
+        case WHPX_BP_SET:
+            if (!resuming) {
+                state = WHPX_BP_CLEAR_PENDING;
+            }
+            break;
+        case WHPX_BP_CLEAR_PENDING:
+            if (resuming) {
+                state = WHPX_BP_SET;
+            }
+            break;
+        }
+
+        if (state == WHPX_BP_SET_PENDING) {
+            /* Remember the original instruction. */
+            rc = cpu_memory_rw_debug(cpu,
+                breakpoints->data[i].address,
+                &breakpoints->data[i].original_instruction,
+                1,
+                false);
+
+            if (!rc) {
+                /* Write the breakpoint instruction. */
+                rc = cpu_memory_rw_debug(cpu,
+                    breakpoints->data[i].address,
+                    (void *)&whpx_breakpoint_instruction,
+                    1,
+                    true);
+            }
+
+            if (!rc) {
+                state = WHPX_BP_SET;
+            }
+
+        }
+
+        if (state == WHPX_BP_CLEAR_PENDING) {
+            /* Restore the original instruction. */
+            rc = cpu_memory_rw_debug(cpu,
+                breakpoints->data[i].address,
+                &breakpoints->data[i].original_instruction,
+                1,
+                true);
+
+            if (!rc) {
+                state = WHPX_BP_CLEARED;
+            }
+        }
+
+        breakpoints->data[i].state = state;
+    }
+}
+
+/*
+ * This function is called when the a VCPU is about to start and no other
+ * VCPUs have been started so far. Since the VCPU start order could be
+ * arbitrary, it doesn't have to be VCPU#0.
+ *
+ * It is used to commit the breakpoints into memory, and configure WHPX
+ * to intercept debug exceptions.
+ *
+ * Note that whpx_set_exception_exit_bitmap() cannot be called if one or
+ * more VCPUs are already running, so this is the best place to do it.
+ */
+static int whpx_first_vcpu_starting(CPUState *cpu)
+{
+    struct whpx_state *whpx = &whpx_global;
+    HRESULT hr;
+
+    g_assert(qemu_mutex_iothread_locked());
+
+    if (!QTAILQ_EMPTY(&cpu->breakpoints) ||
+            (whpx->breakpoints.breakpoints &&
+             whpx->breakpoints.breakpoints->used)) {
+        CPUBreakpoint *bp;
+        int i = 0;
+        bool update_pending = false;
+
+        QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
+            if (i >= whpx->breakpoints.original_address_count ||
+                bp->pc != whpx->breakpoints.original_addresses[i]) {
+                update_pending = true;
+            }
+
+            i++;
+        }
+
+        if (i != whpx->breakpoints.original_address_count) {
+            update_pending = true;
+        }
+
+        if (update_pending) {
+            /*
+             * The CPU breakpoints have changed since the last call to
+             * whpx_translate_cpu_breakpoints(). WHPX breakpoints must
+             * now be recomputed.
+             */
+            whpx_translate_cpu_breakpoints(&whpx->breakpoints, cpu, i);
+        }
+
+        /* Actually insert the breakpoints into the memory. */
+        whpx_apply_breakpoints(whpx->breakpoints.breakpoints, cpu, true);
+    }
+
+    uint64_t exception_mask;
+    if (whpx->step_pending ||
+        (whpx->breakpoints.breakpoints &&
+         whpx->breakpoints.breakpoints->used)) {
+        /*
+         * We are either attempting to single-step one or more CPUs, or
+         * have one or more breakpoints enabled. Both require intercepting
+         * the WHvX64ExceptionTypeBreakpointTrap exception.
+         */
+
+        exception_mask = 1UL << WHvX64ExceptionTypeDebugTrapOrFault;
+    } else {
+        /* Let the guest handle all exceptions. */
+        exception_mask = 0;
+    }
+
+    hr = whpx_set_exception_exit_bitmap(exception_mask);
+    if (!SUCCEEDED(hr)) {
+        error_report("WHPX: Failed to update exception exit mask,"
+                     "hr=%08lx.", hr);
+        return 1;
+    }
+
+    return 0;
+}
+
+/*
+ * This function is called when the last VCPU has finished running.
+ * It is used to remove any previously set breakpoints from memory.
+ */
+static int whpx_last_vcpu_stopping(CPUState *cpu)
+{
+    whpx_apply_breakpoints(whpx_global.breakpoints.breakpoints, cpu, false);
+    return 0;
+}
+
+/* Returns the address of the next instruction that is about to be executed. */
+static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid)
+{
+    if (cpu->vcpu_dirty) {
+        /* The CPU registers have been modified by other parts of QEMU. */
+        CPUArchState *env = (CPUArchState *)(cpu->env_ptr);
+        return env->eip;
+    } else if (exit_context_valid) {
+        /*
+         * The CPU registers have not been modified by neither other parts
+         * of QEMU, nor this port by calling WHvSetVirtualProcessorRegisters().
+         * This is the most common case.
+         */
+        struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
+        return vcpu->exit_ctx.VpContext.Rip;
+    } else {
+        /*
+         * The CPU registers have been modified by a call to
+         * WHvSetVirtualProcessorRegisters() and must be re-queried from
+         * the target.
+         */
+        WHV_REGISTER_VALUE reg_value;
+        WHV_REGISTER_NAME reg_name = WHvX64RegisterRip;
+        HRESULT hr;
+        struct whpx_state *whpx = &whpx_global;
+
+        hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+            whpx->partition,
+            cpu->cpu_index,
+            &reg_name,
+            1,
+            &reg_value);
+
+        if (FAILED(hr)) {
+            error_report("WHPX: Failed to get PC, hr=%08lx", hr);
+            return 0;
+        }
+
+        return reg_value.Reg64;
+    }
+}
+
 static int whpx_handle_halt(CPUState *cpu)
 {
     CPUX86State *env = cpu->env_ptr;
@@ -997,17 +1589,75 @@ static int whpx_vcpu_run(CPUState *cpu)
     HRESULT hr;
     struct whpx_state *whpx = &whpx_global;
     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
+    struct whpx_breakpoint *stepped_over_bp = NULL;
+    WhpxStepMode exclusive_step_mode = WHPX_STEP_NONE;
     int ret;
 
-    whpx_vcpu_process_async_events(cpu);
-    if (cpu->halted && !whpx_apic_in_platform()) {
-        cpu->exception_index = EXCP_HLT;
-        qatomic_set(&cpu->exit_request, false);
-        return 0;
+    g_assert(qemu_mutex_iothread_locked());
+
+    if (whpx->running_cpus++ == 0) {
+        /* Insert breakpoints into memory, update exception exit bitmap. */
+        ret = whpx_first_vcpu_starting(cpu);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+
+    if (whpx->breakpoints.breakpoints &&
+        whpx->breakpoints.breakpoints->used > 0)
+    {
+        uint64_t pc = whpx_vcpu_get_pc(cpu, true);
+        stepped_over_bp = whpx_lookup_breakpoint_by_addr(pc);
+        if (stepped_over_bp && stepped_over_bp->state != WHPX_BP_SET) {
+            stepped_over_bp = NULL;
+        }
+
+        if (stepped_over_bp) {
+            /*
+             * We are trying to run the instruction overwritten by an active
+             * breakpoint. We will temporarily disable the breakpoint, suspend
+             * other CPUs, and step over the instruction.
+             */
+            exclusive_step_mode = WHPX_STEP_EXCLUSIVE;
+        }
+    }
+
+    if (exclusive_step_mode == WHPX_STEP_NONE) {
+        whpx_vcpu_process_async_events(cpu);
+        if (cpu->halted && !whpx_apic_in_platform()) {
+            cpu->exception_index = EXCP_HLT;
+            qatomic_set(&cpu->exit_request, false);
+            return 0;
+        }
     }
 
     qemu_mutex_unlock_iothread();
-    cpu_exec_start(cpu);
+
+    if (exclusive_step_mode != WHPX_STEP_NONE) {
+        start_exclusive();
+        g_assert(cpu == current_cpu);
+        g_assert(!cpu->running);
+        cpu->running = true;
+
+        hr = whpx_set_exception_exit_bitmap(
+            1UL << WHvX64ExceptionTypeDebugTrapOrFault);
+        if (!SUCCEEDED(hr)) {
+            error_report("WHPX: Failed to update exception exit mask, "
+                         "hr=%08lx.", hr);
+            return 1;
+        }
+
+        if (stepped_over_bp) {
+            /* Temporarily disable the triggered breakpoint. */
+            cpu_memory_rw_debug(cpu,
+                stepped_over_bp->address,
+                &stepped_over_bp->original_instruction,
+                1,
+                true);
+        }
+    } else {
+        cpu_exec_start(cpu);
+    }
 
     do {
         if (cpu->vcpu_dirty) {
@@ -1015,10 +1665,16 @@ static int whpx_vcpu_run(CPUState *cpu)
             cpu->vcpu_dirty = false;
         }
 
-        whpx_vcpu_pre_run(cpu);
+        if (exclusive_step_mode == WHPX_STEP_NONE) {
+            whpx_vcpu_pre_run(cpu);
+
+            if (qatomic_read(&cpu->exit_request)) {
+                whpx_vcpu_kick(cpu);
+            }
+        }
 
-        if (qatomic_read(&cpu->exit_request)) {
-            whpx_vcpu_kick(cpu);
+        if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) {
+            whpx_vcpu_configure_single_stepping(cpu, true, NULL);
         }
 
         hr = whp_dispatch.WHvRunVirtualProcessor(
@@ -1032,6 +1688,12 @@ static int whpx_vcpu_run(CPUState *cpu)
             break;
         }
 
+        if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) {
+            whpx_vcpu_configure_single_stepping(cpu,
+                false,
+                &vcpu->exit_ctx.VpContext.Rflags);
+        }
+
         whpx_vcpu_post_run(cpu);
 
         switch (vcpu->exit_ctx.ExitReason) {
@@ -1055,6 +1717,10 @@ static int whpx_vcpu_run(CPUState *cpu)
             break;
 
         case WHvRunVpExitReasonX64Halt:
+            /*
+             * WARNING: as of build 19043.1526 (21H1), this exit reason is no
+             * longer used.
+             */
             ret = whpx_handle_halt(cpu);
             break;
 
@@ -1153,10 +1819,19 @@ static int whpx_vcpu_run(CPUState *cpu)
         }
 
         case WHvRunVpExitReasonCanceled:
-            cpu->exception_index = EXCP_INTERRUPT;
-            ret = 1;
+            if (exclusive_step_mode != WHPX_STEP_NONE) {
+                /*
+                 * We are trying to step over a single instruction, and
+                 * likely got a request to stop from another thread.
+                 * Delay it until we are done stepping
+                 * over.
+                 */
+                ret = 0;
+            } else {
+                cpu->exception_index = EXCP_INTERRUPT;
+                ret = 1;
+            }
             break;
-
         case WHvRunVpExitReasonX64MsrAccess: {
             WHV_REGISTER_VALUE reg_values[3] = {0};
             WHV_REGISTER_NAME reg_names[3];
@@ -1260,11 +1935,36 @@ static int whpx_vcpu_run(CPUState *cpu)
             ret = 0;
             break;
         }
+        case WHvRunVpExitReasonException:
+            whpx_get_registers(cpu);
+
+            if ((vcpu->exit_ctx.VpException.ExceptionType ==
+                 WHvX64ExceptionTypeDebugTrapOrFault) &&
+                (vcpu->exit_ctx.VpException.InstructionByteCount >= 1) &&
+                (vcpu->exit_ctx.VpException.InstructionBytes[0] ==
+                 whpx_breakpoint_instruction)) {
+                /* Stopped at a software breakpoint. */
+                cpu->exception_index = EXCP_DEBUG;
+            } else if ((vcpu->exit_ctx.VpException.ExceptionType ==
+                        WHvX64ExceptionTypeDebugTrapOrFault) &&
+                       !cpu->singlestep_enabled) {
+                /*
+                 * Just finished stepping over a breakpoint, but the
+                 * gdb does not expect us to do single-stepping.
+                 * Don't do anything special.
+                 */
+                cpu->exception_index = EXCP_INTERRUPT;
+            } else {
+                /* Another exception or debug event. Report it to GDB. */
+                cpu->exception_index = EXCP_DEBUG;
+            }
+
+            ret = 1;
+            break;
         case WHvRunVpExitReasonNone:
         case WHvRunVpExitReasonUnrecoverableException:
         case WHvRunVpExitReasonInvalidVpRegisterValue:
         case WHvRunVpExitReasonUnsupportedFeature:
-        case WHvRunVpExitReasonException:
         default:
             error_report("WHPX: Unexpected VP exit code %d",
                          vcpu->exit_ctx.ExitReason);
@@ -1277,10 +1977,32 @@ static int whpx_vcpu_run(CPUState *cpu)
 
     } while (!ret);
 
-    cpu_exec_end(cpu);
+    if (stepped_over_bp) {
+        /* Restore the breakpoint we stepped over */
+        cpu_memory_rw_debug(cpu,
+            stepped_over_bp->address,
+            (void *)&whpx_breakpoint_instruction,
+            1,
+            true);
+    }
+
+    if (exclusive_step_mode != WHPX_STEP_NONE) {
+        g_assert(cpu_in_exclusive_context(cpu));
+        cpu->running = false;
+        end_exclusive();
+
+        exclusive_step_mode = WHPX_STEP_NONE;
+    } else {
+        cpu_exec_end(cpu);
+    }
+
     qemu_mutex_lock_iothread();
     current_cpu = cpu;
 
+    if (--whpx->running_cpus == 0) {
+        whpx_last_vcpu_stopping(cpu);
+    }
+
     qatomic_set(&cpu->exit_request, false);
 
     return ret < 0;
@@ -1340,6 +2062,11 @@ void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
     run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 }
 
+void whpx_cpu_synchronize_pre_resume(bool step_pending)
+{
+    whpx_global.step_pending = step_pending;
+}
+
 /*
  * Vcpu support.
  */
@@ -1572,15 +2299,15 @@ static void whpx_process_section(MemoryRegionSection *section, int add)
         return;
     }
 
-    delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
-    delta &= ~qemu_real_host_page_mask;
+    delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
+    delta &= ~qemu_real_host_page_mask();
     if (delta > size) {
         return;
     }
     start_pa += delta;
     size -= delta;
-    size &= qemu_real_host_page_mask;
-    if (!size || (start_pa & ~qemu_real_host_page_mask)) {
+    size &= qemu_real_host_page_mask();
+    if (!size || (start_pa & ~qemu_real_host_page_mask())) {
         return;
     }
 
@@ -1839,6 +2566,7 @@ static int whpx_accel_init(MachineState *ms)
     memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
     prop.ExtendedVmExits.X64MsrExit = 1;
     prop.ExtendedVmExits.X64CpuidExit = 1;
+    prop.ExtendedVmExits.ExceptionExit = 1;
     if (whpx_apic_in_platform()) {
         prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1;
     }
@@ -1867,6 +2595,19 @@ static int whpx_accel_init(MachineState *ms)
         goto error;
     }
 
+    /*
+     * We do not want to intercept any exceptions from the guest,
+     * until we actually start debugging with gdb.
+     */
+    whpx->exception_exit_bitmap = -1;
+    hr = whpx_set_exception_exit_bitmap(0);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to set exception exit bitmap, hr=%08lx", hr);
+        ret = -EINVAL;
+        goto error;
+    }
+
     hr = whp_dispatch.WHvSetupPartition(whpx->partition);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
diff --git a/target/i386/whpx/whpx-apic.c b/target/i386/whpx/whpx-apic.c
index bba36f3ec9..c15df35ad6 100644
--- a/target/i386/whpx/whpx-apic.c
+++ b/target/i386/whpx/whpx-apic.c
@@ -11,7 +11,6 @@
  * See the COPYING file in the top-level directory.
  */
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "cpu.h"
 #include "hw/i386/apic_internal.h"
 #include "hw/i386/apic-msidef.h"
diff --git a/target/i386/whpx/whpx-internal.h b/target/i386/whpx/whpx-internal.h
index 908ababf6d..2416ec7922 100644
--- a/target/i386/whpx/whpx-internal.h
+++ b/target/i386/whpx/whpx-internal.h
@@ -5,9 +5,39 @@
 #include <WinHvPlatform.h>
 #include <WinHvEmulation.h>
 
+typedef enum WhpxBreakpointState {
+    WHPX_BP_CLEARED = 0,
+    WHPX_BP_SET_PENDING,
+    WHPX_BP_SET,
+    WHPX_BP_CLEAR_PENDING,
+} WhpxBreakpointState;
+
+struct whpx_breakpoint {
+    vaddr address;
+    WhpxBreakpointState state;
+    uint8_t original_instruction;
+};
+
+struct whpx_breakpoint_collection {
+    int allocated, used;
+    struct whpx_breakpoint data[0];
+};
+
+struct whpx_breakpoints {
+    int original_address_count;
+    vaddr *original_addresses;
+
+    struct whpx_breakpoint_collection *breakpoints;
+};
+
 struct whpx_state {
     uint64_t mem_quota;
     WHV_PARTITION_HANDLE partition;
+    uint64_t exception_exit_bitmap;
+    int32_t running_cpus;
+    struct whpx_breakpoints breakpoints;
+    bool step_pending;
+
     bool kernel_irqchip_allowed;
     bool kernel_irqchip_required;
     bool apic_in_platform;
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
index 872e8ce637..9b3bf7a448 100644
--- a/target/m68k/cpu.h
+++ b/target/m68k/cpu.h
@@ -22,6 +22,7 @@
 #define M68K_CPU_H
 
 #include "exec/cpu-defs.h"
+#include "qemu/cpu-float.h"
 #include "cpu-qom.h"
 
 #define OS_BYTE     0
diff --git a/target/m68k/m68k-semi.c b/target/m68k/m68k-semi.c
index c5c164e096..37343d47e2 100644
--- a/target/m68k/m68k-semi.c
+++ b/target/m68k/m68k-semi.c
@@ -378,19 +378,17 @@ void do_m68k_semihosting(CPUM68KState *env, int nr)
                            arg0, arg1);
             return;
         } else {
-            qemu_timeval tv;
             struct gdb_timeval *p;
-            result = qemu_gettimeofday(&tv);
-            if (result == 0) {
-                if (!(p = lock_user(VERIFY_WRITE,
-                                    arg0, sizeof(struct gdb_timeval), 0))) {
-                    /* FIXME - check error code? */
-                    result = -1;
-                } else {
-                    p->tv_sec = cpu_to_be32(tv.tv_sec);
-                    p->tv_usec = cpu_to_be64(tv.tv_usec);
-                    unlock_user(p, arg0, sizeof(struct gdb_timeval));
-                }
+            int64_t rt = g_get_real_time();
+            p = lock_user(VERIFY_WRITE, arg0, sizeof(struct gdb_timeval), 0);
+            if (!p) {
+                /* FIXME - check error code? */
+                result = -1;
+            } else {
+                result = 0;
+                p->tv_sec = cpu_to_be32(rt / G_USEC_PER_SEC);
+                p->tv_usec = cpu_to_be64(rt % G_USEC_PER_SEC);
+                unlock_user(p, arg0, sizeof(struct gdb_timeval));
             }
         }
         break;
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index 0a0ce71b6a..67aa88b8db 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h
@@ -22,7 +22,7 @@
 
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
-#include "fpu/softfloat-types.h"
+#include "qemu/cpu-float.h"
 
 typedef struct CPUArchState CPUMBState;
 #if !defined(CONFIG_USER_ONLY)
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index af287177d5..ad74fbe636 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -189,7 +189,7 @@ static void mips_cpu_reset(DeviceState *dev)
     /* Reset registers to their default values */
     env->CP0_PRid = env->cpu_model->CP0_PRid;
     env->CP0_Config0 = env->cpu_model->CP0_Config0;
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
     env->CP0_Config0 |= (1 << CP0C0_BE);
 #endif
     env->CP0_Config1 = env->cpu_model->CP0_Config1;
@@ -418,7 +418,7 @@ static void mips_cpu_disas_set_info(CPUState *s, disassemble_info *info)
     CPUMIPSState *env = &cpu->env;
 
     if (!(env->insn_flags & ISA_NANOMIPS32)) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
         info->print_insn = print_insn_big_mips;
 #else
         info->print_insn = print_insn_little_mips;
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index 52ce08a94d..5335ac10a3 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -35,7 +35,7 @@ union fpr_t {
  *define FP_ENDIAN_IDX to access the same location
  * in the fpr_t union regardless of the host endianness
  */
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #  define FP_ENDIAN_IDX 1
 #else
 #  define FP_ENDIAN_IDX 0
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
index 086debd9f0..caf70decd2 100644
--- a/target/mips/kvm.c
+++ b/target/mips/kvm.c
@@ -14,7 +14,6 @@
 
 #include <linux/kvm.h>
 
-#include "qemu-common.h"
 #include "cpu.h"
 #include "internal.h"
 #include "qemu/error-report.h"
diff --git a/target/mips/tcg/lmmi_helper.c b/target/mips/tcg/lmmi_helper.c
index abeb7736ae..2c8732525c 100644
--- a/target/mips/tcg/lmmi_helper.c
+++ b/target/mips/tcg/lmmi_helper.c
@@ -37,7 +37,7 @@ typedef union {
 } LMIValue;
 
 /* Some byte ordering issues can be mitigated by XORing in the following.  */
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 # define BYTE_ORDER_XOR(N) N
 #else
 # define BYTE_ORDER_XOR(N) 0
diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c
index 5667b1f0a1..4dde5d639a 100644
--- a/target/mips/tcg/msa_helper.c
+++ b/target/mips/tcg/msa_helper.c
@@ -4146,7 +4146,7 @@ void helper_msa_ilvev_b(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->b[8]  = pws->b[9];
     pwd->b[9]  = pwt->b[9];
     pwd->b[10] = pws->b[11];
@@ -4190,7 +4190,7 @@ void helper_msa_ilvev_h(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->h[4] = pws->h[5];
     pwd->h[5] = pwt->h[5];
     pwd->h[6] = pws->h[7];
@@ -4218,7 +4218,7 @@ void helper_msa_ilvev_w(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->w[2] = pws->w[3];
     pwd->w[3] = pwt->w[3];
     pwd->w[0] = pws->w[1];
@@ -4250,7 +4250,7 @@ void helper_msa_ilvod_b(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->b[7]  = pwt->b[6];
     pwd->b[6]  = pws->b[6];
     pwd->b[5]  = pwt->b[4];
@@ -4294,7 +4294,7 @@ void helper_msa_ilvod_h(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->h[3] = pwt->h[2];
     pwd->h[2] = pws->h[2];
     pwd->h[1] = pwt->h[0];
@@ -4322,7 +4322,7 @@ void helper_msa_ilvod_w(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->w[1] = pwt->w[0];
     pwd->w[0] = pws->w[0];
     pwd->w[3] = pwt->w[2];
@@ -4354,7 +4354,7 @@ void helper_msa_ilvl_b(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->b[7]  = pwt->b[15];
     pwd->b[6]  = pws->b[15];
     pwd->b[5]  = pwt->b[14];
@@ -4398,7 +4398,7 @@ void helper_msa_ilvl_h(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->h[3] = pwt->h[7];
     pwd->h[2] = pws->h[7];
     pwd->h[1] = pwt->h[6];
@@ -4426,7 +4426,7 @@ void helper_msa_ilvl_w(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->w[1] = pwt->w[3];
     pwd->w[0] = pws->w[3];
     pwd->w[3] = pwt->w[2];
@@ -4458,7 +4458,7 @@ void helper_msa_ilvr_b(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->b[8]  = pws->b[0];
     pwd->b[9]  = pwt->b[0];
     pwd->b[10] = pws->b[1];
@@ -4502,7 +4502,7 @@ void helper_msa_ilvr_h(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->h[4] = pws->h[0];
     pwd->h[5] = pwt->h[0];
     pwd->h[6] = pws->h[1];
@@ -4530,7 +4530,7 @@ void helper_msa_ilvr_w(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->w[2] = pws->w[0];
     pwd->w[3] = pwt->w[0];
     pwd->w[0] = pws->w[1];
@@ -4661,7 +4661,7 @@ void helper_msa_pckev_b(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->b[8]  = pws->b[9];
     pwd->b[10] = pws->b[13];
     pwd->b[12] = pws->b[1];
@@ -4705,7 +4705,7 @@ void helper_msa_pckev_h(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->h[4] = pws->h[5];
     pwd->h[6] = pws->h[1];
     pwd->h[0] = pwt->h[5];
@@ -4733,7 +4733,7 @@ void helper_msa_pckev_w(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->w[2] = pws->w[3];
     pwd->w[0] = pwt->w[3];
     pwd->w[3] = pws->w[1];
@@ -4765,7 +4765,7 @@ void helper_msa_pckod_b(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->b[7]  = pwt->b[6];
     pwd->b[5]  = pwt->b[2];
     pwd->b[3]  = pwt->b[14];
@@ -4810,7 +4810,7 @@ void helper_msa_pckod_h(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->h[3] = pwt->h[2];
     pwd->h[1] = pwt->h[6];
     pwd->h[7] = pws->h[2];
@@ -4838,7 +4838,7 @@ void helper_msa_pckod_w(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     pwd->w[1] = pwt->w[0];
     pwd->w[3] = pws->w[0];
     pwd->w[0] = pwt->w[2];
@@ -5926,7 +5926,7 @@ void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
                          uint32_t ws, uint32_t n)
 {
     n %= 16;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     if (n < 8) {
         n = 8 - n - 1;
     } else {
@@ -5940,7 +5940,7 @@ void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
                          uint32_t ws, uint32_t n)
 {
     n %= 8;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     if (n < 4) {
         n = 4 - n - 1;
     } else {
@@ -5954,7 +5954,7 @@ void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
                          uint32_t ws, uint32_t n)
 {
     n %= 4;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     if (n < 2) {
         n = 2 - n - 1;
     } else {
@@ -5975,7 +5975,7 @@ void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
                          uint32_t ws, uint32_t n)
 {
     n %= 16;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     if (n < 8) {
         n = 8 - n - 1;
     } else {
@@ -5989,7 +5989,7 @@ void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
                          uint32_t ws, uint32_t n)
 {
     n %= 8;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     if (n < 4) {
         n = 4 - n - 1;
     } else {
@@ -6003,7 +6003,7 @@ void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
                          uint32_t ws, uint32_t n)
 {
     n %= 4;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     if (n < 2) {
         n = 2 - n - 1;
     } else {
@@ -6019,7 +6019,7 @@ void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     target_ulong rs = env->active_tc.gpr[rs_num];
     n %= 16;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     if (n < 8) {
         n = 8 - n - 1;
     } else {
@@ -6035,7 +6035,7 @@ void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     target_ulong rs = env->active_tc.gpr[rs_num];
     n %= 8;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     if (n < 4) {
         n = 4 - n - 1;
     } else {
@@ -6051,7 +6051,7 @@ void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     target_ulong rs = env->active_tc.gpr[rs_num];
     n %= 4;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     if (n < 2) {
         n = 2 - n - 1;
     } else {
@@ -8218,7 +8218,7 @@ void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
 #define MEMOP_IDX(DF)
 #endif
 
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
 static inline uint64_t bswap16x4(uint64_t x)
 {
     uint64_t m = 0x00ff00ff00ff00ffull;
@@ -8258,7 +8258,7 @@ void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
      */
     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
     d0 = bswap16x4(d0);
     d1 = bswap16x4(d1);
 #endif
@@ -8279,7 +8279,7 @@ void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
      */
     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
     d0 = bswap32x2(d0);
     d1 = bswap32x2(d1);
 #endif
@@ -8345,7 +8345,7 @@ void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
     /* Store 8 bytes at a time.  See helper_msa_ld_h. */
     d0 = pwd->d[0];
     d1 = pwd->d[1];
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
     d0 = bswap16x4(d0);
     d1 = bswap16x4(d1);
 #endif
@@ -8366,7 +8366,7 @@ void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
     /* Store 8 bytes at a time.  See helper_msa_ld_w. */
     d0 = pwd->d[0];
     d1 = pwd->d[1];
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
     d0 = bswap32x2(d0);
     d1 = bswap32x2(d1);
 #endif
diff --git a/target/nios2/nios2-semi.c b/target/nios2/nios2-semi.c
index 5a7ad0c710..ec88474a73 100644
--- a/target/nios2/nios2-semi.c
+++ b/target/nios2/nios2-semi.c
@@ -28,7 +28,6 @@
 #if defined(CONFIG_USER_ONLY)
 #include "qemu.h"
 #else
-#include "qemu-common.h"
 #include "exec/softmmu-semi.h"
 #endif
 #include "qemu/log.h"
@@ -400,20 +399,17 @@ void do_nios2_semihosting(CPUNios2State *env)
                            arg0, 0);
             return;
         } else {
-            qemu_timeval tv;
             struct gdb_timeval *p;
-            result = qemu_gettimeofday(&tv);
-            if (result == 0) {
-                p = lock_user(VERIFY_WRITE, arg0, sizeof(struct gdb_timeval),
-                              0);
-                if (!p) {
-                    result = -1;
-                    errno = EFAULT;
-                } else {
-                    p->tv_sec = cpu_to_be32(tv.tv_sec);
-                    p->tv_usec = cpu_to_be64(tv.tv_usec);
-                    unlock_user(p, arg0, sizeof(struct gdb_timeval));
-                }
+            int64_t rt = g_get_real_time();
+            p = lock_user(VERIFY_WRITE, arg0, sizeof(struct gdb_timeval), 0);
+            if (!p) {
+                result = -1;
+                errno = EFAULT;
+            } else {
+                result = 0;
+                p->tv_sec = cpu_to_be32(rt / G_USEC_PER_SEC);
+                p->tv_usec = cpu_to_be64(rt % G_USEC_PER_SEC);
+                unlock_user(p, arg0, sizeof(struct gdb_timeval));
             }
         }
         break;
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index bdf29d2dc4..b9584f10d4 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -21,6 +21,7 @@
 #define OPENRISC_CPU_H
 
 #include "exec/cpu-defs.h"
+#include "fpu/softfloat-types.h"
 #include "hw/core/cpu.h"
 #include "qom/object.h"
 
diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index 993740897d..1139cead9f 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c
@@ -161,7 +161,7 @@ static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
         bool needs_byteswap;
         ppc_avr_t *avr = cpu_avr_ptr(&cpu->env, i);
 
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
         needs_byteswap = s->dump_info.d_endian == ELFDATA2LSB;
 #else
         needs_byteswap = s->dump_info.d_endian == ELFDATA2MSB;
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 047b24ba50..473436a49e 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -21,6 +21,7 @@
 #define PPC_CPU_H
 
 #include "qemu/int128.h"
+#include "qemu/cpu-float.h"
 #include "exec/cpu-defs.h"
 #include "cpu-qom.h"
 #include "qom/object.h"
@@ -2642,7 +2643,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx)
 }
 
 /* Accessors for FP, VMX and VSX registers */
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define VsrB(i) u8[i]
 #define VsrSB(i) s8[i]
 #define VsrH(i) u16[i]
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 073fd10168..5062d0e478 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7150,7 +7150,7 @@ static void ppc_cpu_reset(DeviceState *dev)
 #if defined(TARGET_PPC64)
     msr |= (target_ulong)1 << MSR_TM; /* Transactional memory */
 #endif
-#if !defined(TARGET_WORDS_BIGENDIAN)
+#if !TARGET_BIG_ENDIAN
     msr |= (target_ulong)1 << MSR_LE; /* Little-endian user mode */
     if (!((env->msr_mask >> MSR_LE) & 1)) {
         fprintf(stderr, "Selected CPU does not support little-endian.\n");
diff --git a/target/ppc/gdbstub.c b/target/ppc/gdbstub.c
index 105c2f7dd1..1252429a2a 100644
--- a/target/ppc/gdbstub.c
+++ b/target/ppc/gdbstub.c
@@ -87,9 +87,9 @@ static int ppc_gdb_register_len(int n)
 /*
  * We need to present the registers to gdb in the "current" memory
  * ordering.  For user-only mode we get this for free;
- * TARGET_WORDS_BIGENDIAN is set to the proper ordering for the
+ * TARGET_BIG_ENDIAN is set to the proper ordering for the
  * binary, and cannot be changed.  For system mode,
- * TARGET_WORDS_BIGENDIAN is always set, and we must check the current
+ * TARGET_BIG_ENDIAN is always set, and we must check the current
  * mode of the chip to see if we're running in little-endian.
  */
 void ppc_maybe_bswap_register(CPUPPCState *env, uint8_t *mem_buf, int len)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 492f34c499..8c1674510b 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -425,7 +425,7 @@ uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
 
 /*****************************************************************************/
 /* Altivec extension helpers */
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define VECTOR_FOR_INORDER_I(index, element)                    \
     for (index = 0; index < ARRAY_SIZE(r->element); index++)
 #else
@@ -1177,7 +1177,7 @@ XXGENPCV(XXGENPCVDM, 8)
 #undef XXGENPCV_LE_COMP
 #undef XXGENPCV
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
 #define VBPERMD_INDEX(i) (i)
 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
@@ -1298,7 +1298,7 @@ void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 }
 
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define PKBIG 1
 #else
 #define PKBIG 0
@@ -1307,7 +1307,7 @@ void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
     int i, j;
     ppc_avr_t result;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     const ppc_avr_t *x[2] = { a, b };
 #else
     const ppc_avr_t *x[2] = { b, a };
@@ -1516,7 +1516,7 @@ void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
     int sh = (b->VsrB(0xf) >> 3) & 0xf;
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     memmove(&r->u8[0], &a->u8[sh], 16 - sh);
     memset(&r->u8[16 - sh], 0, sh);
 #else
@@ -1525,7 +1525,7 @@ void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 #endif
 }
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
 #else
 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
@@ -1554,7 +1554,7 @@ VINSX(W, uint32_t)
 VINSX(D, uint64_t)
 #undef ELEM_ADDR
 #undef VINSX
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define VEXTDVLX(NAME, SIZE) \
 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
                    target_ulong index)                                         \
@@ -1593,7 +1593,7 @@ VEXTDVLX(VEXTDUHVLX, 2)
 VEXTDVLX(VEXTDUWVLX, 4)
 VEXTDVLX(VEXTDDVLX, 8)
 #undef VEXTDVLX
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define VEXTRACT(suffix, element)                                            \
     void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
     {                                                                        \
@@ -1750,7 +1750,7 @@ void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
     int sh = (b->VsrB(0xf) >> 3) & 0xf;
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
     memmove(&r->u8[sh], &a->u8[0], 16 - sh);
     memset(&r->u8[0], 0, sh);
 #else
@@ -1867,7 +1867,7 @@ void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     }
 }
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define UPKHI 1
 #define UPKLO 0
 #else
@@ -1974,7 +1974,7 @@ VGENERIC_DO(popcntd, u64)
 
 #undef VGENERIC_DO
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define QW_ONE { .u64 = { 0, 1 } }
 #else
 #define QW_ONE { .u64 = { 1, 0 } }
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index dc93b99189..f905a2af17 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -21,7 +21,6 @@
 
 #include <linux/kvm.h>
 
-#include "qemu-common.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "cpu.h"
@@ -418,7 +417,7 @@ void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
          * will be a normal mapping, not a special hugepage one used
          * for RAM.
          */
-        if (qemu_real_host_page_size < 0x10000) {
+        if (qemu_real_host_page_size() < 0x10000) {
             error_setg(errp,
                        "KVM can't supply 64kiB CI pages, which guest expects");
         }
@@ -632,7 +631,7 @@ static int kvm_put_fp(CPUState *cs)
             uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
             uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
 
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
             vsr[0] = float64_val(*fpr);
             vsr[1] = *vsrl;
 #else
@@ -710,7 +709,7 @@ static int kvm_get_fp(CPUState *cs)
                                         strerror(errno));
                 return ret;
             } else {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
                 *fpr = vsr[0];
                 if (vsx) {
                     *vsrl = vsr[1];
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 39945d9ea5..c4ff8fd632 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -32,7 +32,7 @@
 
 static inline bool needs_byteswap(const CPUPPCState *env)
 {
-#if defined(TARGET_WORDS_BIGENDIAN)
+#if TARGET_BIG_ENDIAN
   return msr_le;
 #else
   return !msr_le;
@@ -461,7 +461,7 @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
 
 /*****************************************************************************/
 /* Altivec extension helpers */
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define HI_IDX 0
 #define LO_IDX 1
 #else
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 408ae26173..f14f8d7309 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -193,7 +193,7 @@ struct DisasContext {
 /* Return true iff byteswap is needed in a scalar memop */
 static inline bool need_byteswap(const DisasContext *ctx)
 {
-#if defined(TARGET_WORDS_BIGENDIAN)
+#if TARGET_BIG_ENDIAN
      return ctx->le_mode;
 #else
      return !ctx->le_mode;
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 6101bca3fd..764ac45409 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -173,7 +173,7 @@ static void gen_mtvscr(DisasContext *ctx)
 
     val = tcg_temp_new_i32();
     bofs = avr_full_offset(rB(ctx->opcode));
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     bofs += 3 * 4;
 #endif
 
@@ -1692,7 +1692,7 @@ static void gen_vsplt(DisasContext *ctx, int vece)
 
     /* Experimental testing shows that hardware masks the immediate.  */
     bofs += (uimm << vece) & 15;
-#ifndef HOST_WORDS_BIGENDIAN
+#if !HOST_BIG_ENDIAN
     bofs ^= 15;
     bofs &= ~((1 << vece) - 1);
 #endif
diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index d1f6333314..7181a672d8 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -1552,7 +1552,7 @@ static bool trans_XXSPLTW(DisasContext *ctx, arg_XX2_uim2 *a)
     tofs = vsr_full_offset(a->xt);
     bofs = vsr_full_offset(a->xb);
     bofs += a->uim << MO_32;
-#ifndef HOST_WORDS_BIGENDIAN
+#if !HOST_BIG_ENDIAN
     bofs ^= 8 | 4;
 #endif
 
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index c069fe85fa..e1d976bdef 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -23,7 +23,7 @@
 #include "hw/core/cpu.h"
 #include "hw/registerfields.h"
 #include "exec/cpu-defs.h"
-#include "fpu/softfloat-types.h"
+#include "qemu/cpu-float.h"
 #include "qom/object.h"
 #include "qemu/int128.h"
 #include "cpu_bits.h"
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 4ea7e41e1a..8d675db9a2 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3293,7 +3293,7 @@ static void load_element(TCGv_i64 dest, TCGv_ptr base,
 /* offset of the idx element with base regsiter r */
 static uint32_t endian_ofs(DisasContext *s, int r, int idx)
 {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew);
 #else
     return vreg_ofs(s, r) + (idx << s->sew);
@@ -3303,7 +3303,7 @@ static uint32_t endian_ofs(DisasContext *s, int r, int idx)
 /* adjust the index according to the endian */
 static void endian_adjust(TCGv_i32 ofs, int sew)
 {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     tcg_gen_xori_i32(ofs, ofs, 7 >> sew);
 #endif
 }
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index e6b7cb6d4d..70b4cff06f 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -21,7 +21,6 @@
 
 #include <linux/kvm.h>
 
-#include "qemu-common.h"
 #include "qemu/timer.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 3bd4aac9c9..7a6ce0a3bc 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -79,7 +79,7 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
  * Note that vector data is stored in host-endian 64-bit chunks,
  * so addressing units smaller than that needs a host-endian fixup.
  */
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 #define H1(x)   ((x) ^ 7)
 #define H1_2(x) ((x) ^ 6)
 #define H1_4(x) ((x) ^ 4)
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index 25a4aa2976..fb30080ac4 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -20,7 +20,6 @@
 #include "qemu/qemu-print.h"
 #include "qapi/error.h"
 #include "cpu.h"
-#include "qemu-common.h"
 #include "migration/vmstate.h"
 #include "exec/exec-all.h"
 #include "hw/loader.h"
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
index b4abd90ccd..1c267f83bf 100644
--- a/target/rx/cpu.h
+++ b/target/rx/cpu.h
@@ -24,6 +24,7 @@
 #include "cpu-qom.h"
 
 #include "exec/cpu-defs.h"
+#include "qemu/cpu-float.h"
 
 /* PSW define */
 REG32(PSW, 0)
diff --git a/target/rx/gdbstub.c b/target/rx/gdbstub.c
index c811d4810b..7eb2059a84 100644
--- a/target/rx/gdbstub.c
+++ b/target/rx/gdbstub.c
@@ -16,7 +16,6 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "cpu.h"
 #include "exec/gdbstub.h"
 
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index c49c8466e7..7d6d01325b 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -28,6 +28,7 @@
 #include "cpu-qom.h"
 #include "cpu_models.h"
 #include "exec/cpu-defs.h"
+#include "qemu/cpu-float.h"
 
 #define ELF_MACHINE_UNAME "S390X"
 
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 6acf14d5ec..53098bf541 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -24,7 +24,6 @@
 #include <linux/kvm.h>
 #include <asm/ptrace.h>
 
-#include "qemu-common.h"
 #include "cpu.h"
 #include "s390x-internal.h"
 #include "kvm_s390x.h"
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 5acfc0ff9b..80f1f0be9a 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -263,7 +263,7 @@ static inline int vec_reg_offset(uint8_t reg, uint8_t enr, MemOp es)
      * 16 byte operations to handle it in a special way.
      */
     g_assert(es <= MO_64);
-#ifndef HOST_WORDS_BIGENDIAN
+#if !HOST_BIG_ENDIAN
     offs ^= (8 - bytes);
 #endif
     return offs + vec_full_reg_offset(reg);
diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc
index 98eb7710a4..b829ce0c7c 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -175,7 +175,7 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
 
     /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
     tcg_gen_shli_i64(tmp, tmp, es);
-#ifndef HOST_WORDS_BIGENDIAN
+#if !HOST_BIG_ENDIAN
     tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es));
 #endif
     tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg));
diff --git a/target/s390x/tcg/vec.h b/target/s390x/tcg/vec.h
index a6e361869b..8d095efcfc 100644
--- a/target/s390x/tcg/vec.h
+++ b/target/s390x/tcg/vec.h
@@ -38,7 +38,7 @@ typedef union S390Vector {
  * W:  [             1][             0] - [             3][             2]
  * DW: [                             0] - [                             1]
  */
-#ifndef HOST_WORDS_BIGENDIAN
+#if !HOST_BIG_ENDIAN
 #define H1(x)  ((x) ^ 7)
 #define H2(x)  ((x) ^ 3)
 #define H4(x)  ((x) ^ 1)
diff --git a/target/s390x/tcg/vec_fpu_helper.c b/target/s390x/tcg/vec_fpu_helper.c
index 1a77993471..aa2cc8e4a6 100644
--- a/target/s390x/tcg/vec_fpu_helper.c
+++ b/target/s390x/tcg/vec_fpu_helper.c
@@ -10,7 +10,6 @@
  * See the COPYING file in the top-level directory.
  */
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "cpu.h"
 #include "s390x-internal.h"
 #include "vec.h"
diff --git a/target/s390x/tcg/vec_int_helper.c b/target/s390x/tcg/vec_int_helper.c
index 5561b3ed90..b44859ee16 100644
--- a/target/s390x/tcg/vec_int_helper.c
+++ b/target/s390x/tcg/vec_int_helper.c
@@ -10,7 +10,6 @@
  * See the COPYING file in the top-level directory.
  */
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "cpu.h"
 #include "vec.h"
 #include "exec/helper-proto.h"
diff --git a/target/s390x/tcg/vec_string_helper.c b/target/s390x/tcg/vec_string_helper.c
index ac315eb095..f8b54bba4a 100644
--- a/target/s390x/tcg/vec_string_helper.c
+++ b/target/s390x/tcg/vec_string_helper.c
@@ -10,7 +10,6 @@
  * See the COPYING file in the top-level directory.
  */
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "cpu.h"
 #include "s390x-internal.h"
 #include "vec.h"
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index c72a30edfd..14d490ac47 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -22,6 +22,7 @@
 
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
+#include "qemu/cpu-float.h"
 
 /* CPU Subtypes */
 #define SH_CPU_SH7750  (1 << 0)
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index abb38db674..dd9e2f5cdb 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -4,6 +4,7 @@
 #include "qemu/bswap.h"
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
+#include "qemu/cpu-float.h"
 
 #if !defined(TARGET_SPARC64)
 #define TARGET_DPREGS 16
diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c
index f917e5992d..3afdc6975c 100644
--- a/target/sparc/vis_helper.c
+++ b/target/sparc/vis_helper.c
@@ -42,7 +42,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
         GET_FIELD_SP(pixel_addr, 11, 12);
 }
 
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 #define VIS_B64(n) b[7 - (n)]
 #define VIS_W64(n) w[3 - (n)]
 #define VIS_SW64(n) sw[3 - (n)]
@@ -470,7 +470,7 @@ uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
     uint32_t i, mask, host;
 
     /* Set up S such that we can index across all of the bytes.  */
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     s.ll[0] = src1;
     s.ll[1] = src2;
     host = 0;
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
index 108d6b8288..3b9c533a7c 100644
--- a/target/tricore/cpu.h
+++ b/target/tricore/cpu.h
@@ -22,6 +22,7 @@
 
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
+#include "qemu/cpu-float.h"
 #include "tricore-defs.h"
 
 struct tricore_boot_info;
diff --git a/target/tricore/gdbstub.c b/target/tricore/gdbstub.c
index 3ce55abb8e..ebf32defde 100644
--- a/target/tricore/gdbstub.c
+++ b/target/tricore/gdbstub.c
@@ -18,7 +18,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "exec/gdbstub.h"
 
 
diff --git a/target/xtensa/core-de233_fpu.c b/target/xtensa/core-de233_fpu.c
index c7cbeb1b48..41af8057fb 100644
--- a/target/xtensa/core-de233_fpu.c
+++ b/target/xtensa/core-de233_fpu.c
@@ -28,7 +28,6 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/gdbstub.h"
-#include "qemu-common.h"
 #include "qemu/host-utils.h"
 
 #include "core-de233_fpu/core-isa.h"
diff --git a/target/xtensa/core-dsp3400.c b/target/xtensa/core-dsp3400.c
index 4e0bc8a8c4..81e425c568 100644
--- a/target/xtensa/core-dsp3400.c
+++ b/target/xtensa/core-dsp3400.c
@@ -28,7 +28,6 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/gdbstub.h"
-#include "qemu-common.h"
 #include "qemu/host-utils.h"
 
 #include "core-dsp3400/core-isa.h"
diff --git a/target/xtensa/core-test_mmuhifi_c3.c b/target/xtensa/core-test_mmuhifi_c3.c
index 123c630b0d..c0e5d32d1e 100644
--- a/target/xtensa/core-test_mmuhifi_c3.c
+++ b/target/xtensa/core-test_mmuhifi_c3.c
@@ -28,7 +28,6 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/gdbstub.h"
-#include "qemu-common.h"
 #include "qemu/host-utils.h"
 
 #include "core-test_mmuhifi_c3/core-isa.h"
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 4515f682aa..71142ea8f4 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -29,6 +29,7 @@
 #define XTENSA_CPU_H
 
 #include "cpu-qom.h"
+#include "qemu/cpu-float.h"
 #include "exec/cpu-defs.h"
 #include "xtensa-isa.h"
 
@@ -494,7 +495,7 @@ typedef struct XtensaConfigList {
     struct XtensaConfigList *next;
 } XtensaConfigList;
 
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 enum {
     FP_F32_HIGH,
     FP_F32_LOW,
@@ -590,7 +591,7 @@ void xtensa_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
 #define XTENSA_CPU_TYPE_NAME(model) model XTENSA_CPU_TYPE_SUFFIX
 #define CPU_RESOLVING_TYPE TYPE_XTENSA_CPU
 
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
 #define XTENSA_DEFAULT_CPU_MODEL "fsf"
 #define XTENSA_DEFAULT_CPU_NOMMU_MODEL "fsf"
 #else
diff --git a/target/xtensa/import_core.sh b/target/xtensa/import_core.sh
index df66d09393..b4c15556c2 100755
--- a/target/xtensa/import_core.sh
+++ b/target/xtensa/import_core.sh
@@ -42,7 +42,6 @@ cat <<EOF > "${TARGET}.c"
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/gdbstub.h"
-#include "qemu-common.h"
 #include "qemu/host-utils.h"
 
 #include "core-$NAME/core-isa.h"
diff --git a/target/xtensa/overlay_tool.h b/target/xtensa/overlay_tool.h
index 78720734fe..701c00eed2 100644
--- a/target/xtensa/overlay_tool.h
+++ b/target/xtensa/overlay_tool.h
@@ -449,7 +449,7 @@
 
 #endif
 
-#if (defined(TARGET_WORDS_BIGENDIAN) != 0) == (XCHAL_HAVE_BE != 0)
+#if TARGET_BIG_ENDIAN == (XCHAL_HAVE_BE != 0)
 #define REGISTER_CORE(core) \
     static void __attribute__((constructor)) register_core(void) \
     { \
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index b1491ed625..fc4e9d2c9a 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -1471,14 +1471,14 @@ static void translate_b(DisasContext *dc, const OpcodeArg arg[],
 static void translate_bb(DisasContext *dc, const OpcodeArg arg[],
                          const uint32_t par[])
 {
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
     TCGv_i32 bit = tcg_const_i32(0x80000000u);
 #else
     TCGv_i32 bit = tcg_const_i32(0x00000001u);
 #endif
     TCGv_i32 tmp = tcg_temp_new_i32();
     tcg_gen_andi_i32(tmp, arg[1].in, 0x1f);
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
     tcg_gen_shr_i32(bit, bit, tmp);
 #else
     tcg_gen_shl_i32(bit, bit, tmp);
@@ -1493,7 +1493,7 @@ static void translate_bbi(DisasContext *dc, const OpcodeArg arg[],
                           const uint32_t par[])
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
     tcg_gen_andi_i32(tmp, arg[0].in, 0x80000000u >> arg[1].imm);
 #else
     tcg_gen_andi_i32(tmp, arg[0].in, 0x00000001u << arg[1].imm);