summary refs log tree commit diff stats
path: root/target
diff options
context:
space:
mode:
Diffstat (limited to 'target')
-rw-r--r--target/arm/arm-powerctl.c1
-rw-r--r--target/arm/arm_ldst.h1
-rw-r--r--target/arm/crypto_helper.c1
-rw-r--r--target/arm/gdbstub.c3
-rw-r--r--target/arm/helper-a64.c41
-rw-r--r--target/arm/helper.c90
-rw-r--r--target/arm/iwmmxt_helper.c1
-rw-r--r--target/arm/kvm.c3
-rw-r--r--target/arm/neon_helper.c1
-rw-r--r--target/arm/psci.c1
-rw-r--r--target/arm/vec_helper.c1
-rw-r--r--target/cris/cpu.c1
-rw-r--r--target/hppa/helper.c1
-rw-r--r--target/hppa/int_helper.c2
-rw-r--r--target/i386/hax-all.c2
-rw-r--r--target/i386/hax-mem.c1
-rw-r--r--target/i386/hax-windows.c1
-rw-r--r--target/i386/hvf/hvf.c2
-rw-r--r--target/i386/hvf/x86_task.c3
-rw-r--r--target/i386/kvm.c3
-rw-r--r--target/i386/whpx-all.c262
-rw-r--r--target/lm32/cpu.c1
-rw-r--r--target/lm32/op_helper.c4
-rw-r--r--target/m68k/cpu.c1
-rw-r--r--target/microblaze/cpu.c30
-rw-r--r--target/microblaze/cpu.h34
-rw-r--r--target/microblaze/helper.c32
-rw-r--r--target/microblaze/helper.h8
-rw-r--r--target/microblaze/mmu.c81
-rw-r--r--target/microblaze/mmu.h17
-rw-r--r--target/microblaze/op_helper.c30
-rw-r--r--target/microblaze/translate.c930
-rw-r--r--target/moxie/cpu.c1
-rw-r--r--target/moxie/mmu.c1
-rw-r--r--target/nios2/cpu.h1
-rw-r--r--target/nios2/op_helper.c1
-rw-r--r--target/openrisc/cpu.c1
-rw-r--r--target/ppc/cpu.h2
-rw-r--r--target/ppc/helper_regs.h1
-rw-r--r--target/ppc/int_helper.c1
-rw-r--r--target/ppc/machine.c8
-rw-r--r--target/ppc/mmu-hash64.c3
-rw-r--r--target/ppc/translate_init.inc.c2
-rw-r--r--target/riscv/helper.c2
-rw-r--r--target/s390x/cpu.c1
-rw-r--r--target/s390x/diag.c7
-rw-r--r--target/s390x/excp_helper.c3
-rw-r--r--target/s390x/helper.c1
-rw-r--r--target/s390x/kvm.c1
-rw-r--r--target/s390x/mem_helper.c1
-rw-r--r--target/s390x/misc_helper.c1
-rw-r--r--target/s390x/mmu_helper.c3
-rw-r--r--target/s390x/sigp.c3
-rw-r--r--target/sparc/mmu_helper.c1
-rw-r--r--target/tilegx/cpu.c1
-rw-r--r--target/xtensa/core-dc232b.c2
-rw-r--r--target/xtensa/core-dc233c.c1
-rw-r--r--target/xtensa/core-de212.c1
-rw-r--r--target/xtensa/core-fsf.c1
-rw-r--r--target/xtensa/core-sample_controller.c1
-rw-r--r--target/xtensa/cpu.c1
-rwxr-xr-xtarget/xtensa/import_core.sh1
-rw-r--r--target/xtensa/op_helper.c3
63 files changed, 880 insertions, 768 deletions
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index 25207cb850..ce55eeb682 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -15,7 +15,6 @@
 #include "arm-powerctl.h"
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
-#include "exec/exec-all.h"
 
 #ifndef DEBUG_ARM_POWERCTL
 #define DEBUG_ARM_POWERCTL 0
diff --git a/target/arm/arm_ldst.h b/target/arm/arm_ldst.h
index 01587b3ebb..5e0ac8bef0 100644
--- a/target/arm/arm_ldst.h
+++ b/target/arm/arm_ldst.h
@@ -20,7 +20,6 @@
 #ifndef ARM_LDST_H
 #define ARM_LDST_H
 
-#include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/bswap.h"
 
diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c
index cc339ea7e0..f800266727 100644
--- a/target/arm/crypto_helper.c
+++ b/target/arm/crypto_helper.c
@@ -12,7 +12,6 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "crypto/aes.h"
 
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index e80cfb47c7..0c64c0292e 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -157,8 +157,7 @@ int arm_gen_dynamic_xml(CPUState *cs)
     RegisterSysregXmlParam param = {cs, s};
 
     cpu->dyn_xml.num_cpregs = 0;
-    cpu->dyn_xml.cpregs_keys = g_malloc(sizeof(uint32_t *) *
-                                        g_hash_table_size(cpu->cp_regs));
+    cpu->dyn_xml.cpregs_keys = g_new(uint32_t, g_hash_table_size(cpu->cp_regs));
     g_string_printf(s, "<?xml version=\"1.0\"?>");
     g_string_append_printf(s, "<!DOCTYPE target SYSTEM \"gdb-target.dtd\">");
     g_string_append_printf(s, "<feature name=\"org.qemu.gdb.arm.sys.regs\">");
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index f92bdea732..7f6ad3000b 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -85,12 +85,12 @@ static inline uint32_t float_rel_to_flags(int res)
     return flags;
 }
 
-uint64_t HELPER(vfp_cmph_a64)(float16 x, float16 y, void *fp_status)
+uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status)
 {
     return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
 }
 
-uint64_t HELPER(vfp_cmpeh_a64)(float16 x, float16 y, void *fp_status)
+uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status)
 {
     return float_rel_to_flags(float16_compare(x, y, fp_status));
 }
@@ -214,7 +214,7 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
 #define float64_three make_float64(0x4008000000000000ULL)
 #define float64_one_point_five make_float64(0x3FF8000000000000ULL)
 
-float16 HELPER(recpsf_f16)(float16 a, float16 b, void *fpstp)
+uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
 {
     float_status *fpst = fpstp;
 
@@ -259,7 +259,7 @@ float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
     return float64_muladd(a, b, float64_two, 0, fpst);
 }
 
-float16 HELPER(rsqrtsf_f16)(float16 a, float16 b, void *fpstp)
+uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp)
 {
     float_status *fpst = fpstp;
 
@@ -366,7 +366,7 @@ uint64_t HELPER(neon_addlp_u16)(uint64_t a)
 }
 
 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
-float16 HELPER(frecpx_f16)(float16 a, void *fpstp)
+uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
 {
     float_status *fpst = fpstp;
     uint16_t val16, sbit;
@@ -384,6 +384,8 @@ float16 HELPER(frecpx_f16)(float16 a, void *fpstp)
         return nan;
     }
 
+    a = float16_squash_input_denormal(a, fpst);
+
     val16 = float16_val(a);
     sbit = 0x8000 & val16;
     exp = extract32(val16, 10, 5);
@@ -413,6 +415,8 @@ float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
         return nan;
     }
 
+    a = float32_squash_input_denormal(a, fpst);
+
     val32 = float32_val(a);
     sbit = 0x80000000ULL & val32;
     exp = extract32(val32, 23, 8);
@@ -442,6 +446,8 @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
         return nan;
     }
 
+    a = float64_squash_input_denormal(a, fpst);
+
     val64 = float64_val(a);
     sbit = 0x8000000000000000ULL & val64;
     exp = extract64(float64_val(a), 52, 11);
@@ -695,7 +701,7 @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
 
 #define ADVSIMD_HALFOP(name) \
-float16 ADVSIMD_HELPER(name, h)(float16 a, float16 b, void *fpstp) \
+uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
 { \
     float_status *fpst = fpstp; \
     return float16_ ## name(a, b, fpst);    \
@@ -755,7 +761,8 @@ ADVSIMD_HALFOP(mulx)
 ADVSIMD_TWOHALFOP(mulx)
 
 /* fused multiply-accumulate */
-float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp)
+uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c,
+                                 void *fpstp)
 {
     float_status *fpst = fpstp;
     return float16_muladd(a, b, c, 0, fpst);
@@ -786,14 +793,14 @@ uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
 
 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
 
-uint32_t HELPER(advsimd_ceq_f16)(float16 a, float16 b, void *fpstp)
+uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp)
 {
     float_status *fpst = fpstp;
     int compare = float16_compare_quiet(a, b, fpst);
     return ADVSIMD_CMPRES(compare == float_relation_equal);
 }
 
-uint32_t HELPER(advsimd_cge_f16)(float16 a, float16 b, void *fpstp)
+uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp)
 {
     float_status *fpst = fpstp;
     int compare = float16_compare(a, b, fpst);
@@ -801,14 +808,14 @@ uint32_t HELPER(advsimd_cge_f16)(float16 a, float16 b, void *fpstp)
                           compare == float_relation_equal);
 }
 
-uint32_t HELPER(advsimd_cgt_f16)(float16 a, float16 b, void *fpstp)
+uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp)
 {
     float_status *fpst = fpstp;
     int compare = float16_compare(a, b, fpst);
     return ADVSIMD_CMPRES(compare == float_relation_greater);
 }
 
-uint32_t HELPER(advsimd_acge_f16)(float16 a, float16 b, void *fpstp)
+uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
 {
     float_status *fpst = fpstp;
     float16 f0 = float16_abs(a);
@@ -818,7 +825,7 @@ uint32_t HELPER(advsimd_acge_f16)(float16 a, float16 b, void *fpstp)
                           compare == float_relation_equal);
 }
 
-uint32_t HELPER(advsimd_acgt_f16)(float16 a, float16 b, void *fpstp)
+uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
 {
     float_status *fpst = fpstp;
     float16 f0 = float16_abs(a);
@@ -828,12 +835,12 @@ uint32_t HELPER(advsimd_acgt_f16)(float16 a, float16 b, void *fpstp)
 }
 
 /* round to integral */
-float16 HELPER(advsimd_rinth_exact)(float16 x, void *fp_status)
+uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status)
 {
     return float16_round_to_int(x, fp_status);
 }
 
-float16 HELPER(advsimd_rinth)(float16 x, void *fp_status)
+uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
 {
     int old_flags = get_float_exception_flags(fp_status), new_flags;
     float16 ret;
@@ -857,7 +864,7 @@ float16 HELPER(advsimd_rinth)(float16 x, void *fp_status)
  * setting the mode appropriately before calling the helper.
  */
 
-uint32_t HELPER(advsimd_f16tosinth)(float16 a, void *fpstp)
+uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
 {
     float_status *fpst = fpstp;
 
@@ -869,7 +876,7 @@ uint32_t HELPER(advsimd_f16tosinth)(float16 a, void *fpstp)
     return float16_to_int16(a, fpst);
 }
 
-uint32_t HELPER(advsimd_f16touinth)(float16 a, void *fpstp)
+uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
 {
     float_status *fpst = fpstp;
 
@@ -885,7 +892,7 @@ uint32_t HELPER(advsimd_f16touinth)(float16 a, void *fpstp)
  * Square Root and Reciprocal square root
  */
 
-float16 HELPER(sqrt_f16)(float16 a, void *fpstp)
+uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
 {
     float_status *s = fpstp;
 
diff --git a/target/arm/helper.c b/target/arm/helper.c
index c0f739972e..f75aa6e9ca 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -863,6 +863,14 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     env->cp15.cpacr_el1 = value;
 }
 
+static void cpacr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* Call cpacr_write() so that we reset with the correct RAO bits set
+     * for our CPU features.
+     */
+    cpacr_write(env, ri, 0);
+}
+
 static CPAccessResult cpacr_access(CPUARMState *env, const ARMCPRegInfo *ri,
                                    bool isread)
 {
@@ -920,7 +928,7 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
     { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
       .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1),
-      .resetvalue = 0, .writefn = cpacr_write },
+      .resetfn = cpacr_reset, .writefn = cpacr_write },
     REGINFO_SENTINEL
 };
 
@@ -11344,35 +11352,35 @@ DO_VFP_cmp(d, float64)
 
 /* Integer to float and float to integer conversions */
 
-#define CONV_ITOF(name, fsz, sign) \
-    float##fsz HELPER(name)(uint32_t x, void *fpstp) \
-{ \
-    float_status *fpst = fpstp; \
-    return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \
+#define CONV_ITOF(name, ftype, fsz, sign)                           \
+ftype HELPER(name)(uint32_t x, void *fpstp)                         \
+{                                                                   \
+    float_status *fpst = fpstp;                                     \
+    return sign##int32_to_##float##fsz((sign##int32_t)x, fpst);     \
 }
 
-#define CONV_FTOI(name, fsz, sign, round) \
-uint32_t HELPER(name)(float##fsz x, void *fpstp) \
-{ \
-    float_status *fpst = fpstp; \
-    if (float##fsz##_is_any_nan(x)) { \
-        float_raise(float_flag_invalid, fpst); \
-        return 0; \
-    } \
-    return float##fsz##_to_##sign##int32##round(x, fpst); \
+#define CONV_FTOI(name, ftype, fsz, sign, round)                \
+uint32_t HELPER(name)(ftype x, void *fpstp)                     \
+{                                                               \
+    float_status *fpst = fpstp;                                 \
+    if (float##fsz##_is_any_nan(x)) {                           \
+        float_raise(float_flag_invalid, fpst);                  \
+        return 0;                                               \
+    }                                                           \
+    return float##fsz##_to_##sign##int32##round(x, fpst);       \
 }
 
-#define FLOAT_CONVS(name, p, fsz, sign) \
-CONV_ITOF(vfp_##name##to##p, fsz, sign) \
-CONV_FTOI(vfp_to##name##p, fsz, sign, ) \
-CONV_FTOI(vfp_to##name##z##p, fsz, sign, _round_to_zero)
+#define FLOAT_CONVS(name, p, ftype, fsz, sign)            \
+    CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign)        \
+    CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, )        \
+    CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
 
-FLOAT_CONVS(si, h, 16, )
-FLOAT_CONVS(si, s, 32, )
-FLOAT_CONVS(si, d, 64, )
-FLOAT_CONVS(ui, h, 16, u)
-FLOAT_CONVS(ui, s, 32, u)
-FLOAT_CONVS(ui, d, 64, u)
+FLOAT_CONVS(si, h, uint32_t, 16, )
+FLOAT_CONVS(si, s, float32, 32, )
+FLOAT_CONVS(si, d, float64, 64, )
+FLOAT_CONVS(ui, h, uint32_t, 16, u)
+FLOAT_CONVS(ui, s, float32, 32, u)
+FLOAT_CONVS(ui, d, float64, 64, u)
 
 #undef CONV_ITOF
 #undef CONV_FTOI
@@ -11465,22 +11473,22 @@ static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst)
     return float64_to_float16(float64_scalbn(f, -shift, fpst), true, fpst);
 }
 
-float16 HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
+uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
 {
     return do_postscale_fp16(int32_to_float64(x, fpst), shift, fpst);
 }
 
-float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
+uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
 {
     return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst);
 }
 
-float16 HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
+uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
 {
     return do_postscale_fp16(int64_to_float64(x, fpst), shift, fpst);
 }
 
-float16 HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
+uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
 {
     return do_postscale_fp16(uint64_to_float64(x, fpst), shift, fpst);
 }
@@ -11504,32 +11512,32 @@ static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst)
     }
 }
 
-uint32_t HELPER(vfp_toshh)(float16 x, uint32_t shift, void *fpst)
+uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
 {
     return float64_to_int16(do_prescale_fp16(x, shift, fpst), fpst);
 }
 
-uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst)
+uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
 {
     return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst);
 }
 
-uint32_t HELPER(vfp_toslh)(float16 x, uint32_t shift, void *fpst)
+uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
 {
     return float64_to_int32(do_prescale_fp16(x, shift, fpst), fpst);
 }
 
-uint32_t HELPER(vfp_toulh)(float16 x, uint32_t shift, void *fpst)
+uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
 {
     return float64_to_uint32(do_prescale_fp16(x, shift, fpst), fpst);
 }
 
-uint64_t HELPER(vfp_tosqh)(float16 x, uint32_t shift, void *fpst)
+uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
 {
     return float64_to_int64(do_prescale_fp16(x, shift, fpst), fpst);
 }
 
-uint64_t HELPER(vfp_touqh)(float16 x, uint32_t shift, void *fpst)
+uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
 {
     return float64_to_uint64(do_prescale_fp16(x, shift, fpst), fpst);
 }
@@ -11565,7 +11573,7 @@ uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
 }
 
 /* Half precision conversions.  */
-float32 HELPER(vfp_fcvt_f16_to_f32)(float16 a, void *fpstp, uint32_t ahp_mode)
+float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
 {
     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
      * it would affect flushing input denormals.
@@ -11578,7 +11586,7 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(float16 a, void *fpstp, uint32_t ahp_mode)
     return r;
 }
 
-float16 HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
+uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
 {
     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
      * it would affect flushing output denormals.
@@ -11591,7 +11599,7 @@ float16 HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
     return r;
 }
 
-float64 HELPER(vfp_fcvt_f16_to_f64)(float16 a, void *fpstp, uint32_t ahp_mode)
+float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
 {
     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
      * it would affect flushing input denormals.
@@ -11604,7 +11612,7 @@ float64 HELPER(vfp_fcvt_f16_to_f64)(float16 a, void *fpstp, uint32_t ahp_mode)
     return r;
 }
 
-float16 HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
+uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
 {
     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
      * it would affect flushing output denormals.
@@ -11742,7 +11750,7 @@ static bool round_to_inf(float_status *fpst, bool sign_bit)
     g_assert_not_reached();
 }
 
-float16 HELPER(recpe_f16)(float16 input, void *fpstp)
+uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
 {
     float_status *fpst = fpstp;
     float16 f16 = float16_squash_input_denormal(input, fpst);
@@ -11937,7 +11945,7 @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
     return extract64(estimate, 0, 8) << 44;
 }
 
-float16 HELPER(rsqrte_f16)(float16 input, void *fpstp)
+uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
 {
     float_status *s = fpstp;
     float16 f16 = float16_squash_input_denormal(input, s);
diff --git a/target/arm/iwmmxt_helper.c b/target/arm/iwmmxt_helper.c
index 7d87e1a0a8..f6a4fc5b7f 100644
--- a/target/arm/iwmmxt_helper.c
+++ b/target/arm/iwmmxt_helper.c
@@ -22,7 +22,6 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 
 /* iwMMXt macros extracted from GNU gdb.  */
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 5141d0adc5..98f5006323 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -664,7 +664,8 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
     /* MSI doorbell address is translated by an IOMMU */
 
     rcu_read_lock();
-    mr = address_space_translate(as, address, &xlat, &len, true);
+    mr = address_space_translate(as, address, &xlat, &len, true,
+                                 MEMTXATTRS_UNSPECIFIED);
     if (!mr) {
         goto unlock;
     }
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index a1ec6537eb..c2c6491a83 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -9,7 +9,6 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "fpu/softfloat.h"
 
diff --git a/target/arm/psci.c b/target/arm/psci.c
index eb7b88e926..a74d78802a 100644
--- a/target/arm/psci.c
+++ b/target/arm/psci.c
@@ -22,7 +22,6 @@
 #include "sysemu/sysemu.h"
 #include "internals.h"
 #include "arm-powerctl.h"
-#include "exec/exec-all.h"
 
 bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
 {
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index ec705cfca5..25e209da31 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -19,7 +19,6 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "tcg/tcg-gvec-desc.h"
 #include "fpu/softfloat.h"
diff --git a/target/cris/cpu.c b/target/cris/cpu.c
index db8d0884a1..a23aba2688 100644
--- a/target/cris/cpu.c
+++ b/target/cris/cpu.c
@@ -26,7 +26,6 @@
 #include "cpu.h"
 #include "qemu-common.h"
 #include "mmu.h"
-#include "exec/exec-all.h"
 
 
 static void cris_cpu_set_pc(CPUState *cs, vaddr value)
diff --git a/target/hppa/helper.c b/target/hppa/helper.c
index 858ec205b6..6539061e52 100644
--- a/target/hppa/helper.c
+++ b/target/hppa/helper.c
@@ -20,7 +20,6 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "fpu/softfloat.h"
 #include "exec/helper-proto.h"
 
diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c
index 787f3d6357..8d5edd3a20 100644
--- a/target/hppa/int_helper.c
+++ b/target/hppa/int_helper.c
@@ -19,8 +19,8 @@
 
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
+#include "qemu/log.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "qom/cpu.h"
 
diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c
index cad7531406..d2e512856b 100644
--- a/target/i386/hax-all.c
+++ b/target/i386/hax-all.c
@@ -26,8 +26,6 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/address-spaces.h"
-#include "exec/exec-all.h"
-#include "exec/ioport.h"
 
 #include "qemu-common.h"
 #include "hax-i386.h"
diff --git a/target/i386/hax-mem.c b/target/i386/hax-mem.c
index f46e85544d..5c37e94caa 100644
--- a/target/i386/hax-mem.c
+++ b/target/i386/hax-mem.c
@@ -11,7 +11,6 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/address-spaces.h"
-#include "exec/exec-all.h"
 #include "qemu/error-report.h"
 
 #include "target/i386/hax-i386.h"
diff --git a/target/i386/hax-windows.c b/target/i386/hax-windows.c
index b1ac737ae4..5729ad9b48 100644
--- a/target/i386/hax-windows.c
+++ b/target/i386/hax-windows.c
@@ -12,7 +12,6 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "hax-i386.h"
 
 /*
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index c36753954b..df69e6d0a7 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -65,8 +65,6 @@
 #include <Hypervisor/hv_vmx.h>
 
 #include "exec/address-spaces.h"
-#include "exec/exec-all.h"
-#include "exec/ioport.h"
 #include "hw/i386/apic_internal.h"
 #include "hw/boards.h"
 #include "qemu/main-loop.h"
diff --git a/target/i386/hvf/x86_task.c b/target/i386/hvf/x86_task.c
index 4abf3db25e..7099335e89 100644
--- a/target/i386/hvf/x86_task.c
+++ b/target/i386/hvf/x86_task.c
@@ -26,9 +26,6 @@
 #include <Hypervisor/hv.h>
 #include <Hypervisor/hv_vmx.h>
 
-#include "exec/address-spaces.h"
-#include "exec/exec-all.h"
-#include "exec/ioport.h"
 #include "hw/i386/apic_internal.h"
 #include "hw/boards.h"
 #include "qemu/main-loop.h"
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 6511329d11..44f70733e7 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -40,7 +40,6 @@
 #include "hw/i386/intel_iommu.h"
 #include "hw/i386/x86-iommu.h"
 
-#include "exec/ioport.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
@@ -387,7 +386,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
             ret &= ~(1U << KVM_FEATURE_PV_UNHALT);
         }
     } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) {
-        ret |= 1U << KVM_HINTS_DEDICATED;
+        ret |= 1U << KVM_HINTS_REALTIME;
         found = 1;
     }
 
diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
index 58435178a4..6b42096698 100644
--- a/target/i386/whpx-all.c
+++ b/target/i386/whpx-all.c
@@ -11,7 +11,6 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/address-spaces.h"
-#include "exec/exec-all.h"
 #include "exec/ioport.h"
 #include "qemu-common.h"
 #include "strings.h"
@@ -25,6 +24,7 @@
 #include "qemu/queue.h"
 #include "qapi/error.h"
 #include "migration/blocker.h"
+#include "whp-dispatch.h"
 
 #include <WinHvPlatform.h>
 #include <WinHvEmulation.h>
@@ -160,8 +160,11 @@ struct whpx_vcpu {
 };
 
 static bool whpx_allowed;
+static bool whp_dispatch_initialized;
+static HMODULE hWinHvPlatform, hWinHvEmulation;
 
 struct whpx_state whpx_global;
+struct WHPDispatch whp_dispatch;
 
 
 /*
@@ -220,24 +223,31 @@ static void whpx_set_registers(CPUState *cpu)
     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
     X86CPU *x86_cpu = X86_CPU(cpu);
-    struct whpx_register_set vcxt = {0};
+    struct whpx_register_set vcxt;
     HRESULT hr;
-    int idx = 0;
+    int idx;
+    int idx_next;
     int i;
     int v86, r86;
 
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 
+    memset(&vcxt, 0, sizeof(struct whpx_register_set));
+
     v86 = (env->eflags & VM_MASK);
     r86 = !(env->cr[0] & CR0_PE_MASK);
 
     vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
     vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
 
+    idx = 0;
+
     /* Indexes for first 16 registers match between HV and QEMU definitions */
-    for (idx = 0; idx < CPU_NB_REGS64; idx += 1) {
-        vcxt.values[idx].Reg64 = env->regs[idx];
+    idx_next = 16;
+    for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
+        vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
     }
+    idx = idx_next;
 
     /* Same goes for RIP and RFLAGS */
     assert(whpx_register_names[idx] == WHvX64RegisterRip);
@@ -284,10 +294,12 @@ static void whpx_set_registers(CPUState *cpu)
 
     /* 16 XMM registers */
     assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
-    for (i = 0; i < 16; i += 1, idx += 1) {
+    idx_next = idx + 16;
+    for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
         vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
         vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
     }
+    idx = idx_next;
 
     /* 8 FP registers */
     assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
@@ -355,10 +367,11 @@ static void whpx_set_registers(CPUState *cpu)
 
     assert(idx == RTL_NUMBER_OF(whpx_register_names));
 
-    hr = WHvSetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index,
-                                         whpx_register_names,
-                                         RTL_NUMBER_OF(whpx_register_names),
-                                         &vcxt.values[0]);
+    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+        whpx->partition, cpu->cpu_index,
+        whpx_register_names,
+        RTL_NUMBER_OF(whpx_register_names),
+        &vcxt.values[0]);
 
     if (FAILED(hr)) {
         error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
@@ -377,24 +390,30 @@ static void whpx_get_registers(CPUState *cpu)
     struct whpx_register_set vcxt;
     uint64_t tpr, apic_base;
     HRESULT hr;
-    int idx = 0;
+    int idx;
+    int idx_next;
     int i;
 
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 
-    hr = WHvGetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index,
-                                         whpx_register_names,
-                                         RTL_NUMBER_OF(whpx_register_names),
-                                         &vcxt.values[0]);
+    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+        whpx->partition, cpu->cpu_index,
+        whpx_register_names,
+        RTL_NUMBER_OF(whpx_register_names),
+        &vcxt.values[0]);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
                      hr);
     }
 
+    idx = 0;
+
     /* Indexes for first 16 registers match between HV and QEMU definitions */
-    for (idx = 0; idx < CPU_NB_REGS64; idx += 1) {
+    idx_next = 16;
+    for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
         env->regs[idx] = vcxt.values[idx].Reg64;
     }
+    idx = idx_next;
 
     /* Same goes for RIP and RFLAGS */
     assert(whpx_register_names[idx] == WHvX64RegisterRip);
@@ -441,10 +460,12 @@ static void whpx_get_registers(CPUState *cpu)
 
     /* 16 XMM registers */
     assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
-    for (i = 0; i < 16; i += 1, idx += 1) {
+    idx_next = idx + 16;
+    for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
         env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
         env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
     }
+    idx = idx_next;
 
     /* 8 FP registers */
     assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
@@ -545,9 +566,10 @@ static HRESULT CALLBACK whpx_emu_getreg_callback(
     struct whpx_state *whpx = &whpx_global;
     CPUState *cpu = (CPUState *)ctx;
 
-    hr = WHvGetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index,
-                                         RegisterNames, RegisterCount,
-                                         RegisterValues);
+    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+        whpx->partition, cpu->cpu_index,
+        RegisterNames, RegisterCount,
+        RegisterValues);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to get virtual processor registers,"
                      " hr=%08lx", hr);
@@ -566,9 +588,10 @@ static HRESULT CALLBACK whpx_emu_setreg_callback(
     struct whpx_state *whpx = &whpx_global;
     CPUState *cpu = (CPUState *)ctx;
 
-    hr = WHvSetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index,
-                                         RegisterNames, RegisterCount,
-                                         RegisterValues);
+    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+        whpx->partition, cpu->cpu_index,
+        RegisterNames, RegisterCount,
+        RegisterValues);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to set virtual processor registers,"
                      " hr=%08lx", hr);
@@ -595,8 +618,8 @@ static HRESULT CALLBACK whpx_emu_translate_callback(
     CPUState *cpu = (CPUState *)ctx;
     WHV_TRANSLATE_GVA_RESULT res;
 
-    hr = WHvTranslateGva(whpx->partition, cpu->cpu_index,
-                         Gva, TranslateFlags, &res, Gpa);
+    hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
+                                      Gva, TranslateFlags, &res, Gpa);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
     } else {
@@ -621,16 +644,18 @@ static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
     WHV_EMULATOR_STATUS emu_status;
 
-    hr = WHvEmulatorTryMmioEmulation(vcpu->emulator, cpu,
-                                     &vcpu->exit_ctx.VpContext, ctx,
-                                     &emu_status);
+    hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
+        vcpu->emulator, cpu,
+        &vcpu->exit_ctx.VpContext, ctx,
+        &emu_status);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
         return -1;
     }
 
     if (!emu_status.EmulationSuccessful) {
-        error_report("WHPX: Failed to emulate MMIO access");
+        error_report("WHPX: Failed to emulate MMIO access with"
+                     " EmulatorReturnStatus: %u", emu_status.AsUINT32);
         return -1;
     }
 
@@ -644,16 +669,18 @@ static int whpx_handle_portio(CPUState *cpu,
     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
     WHV_EMULATOR_STATUS emu_status;
 
-    hr = WHvEmulatorTryIoEmulation(vcpu->emulator, cpu,
-                                   &vcpu->exit_ctx.VpContext, ctx,
-                                   &emu_status);
+    hr = whp_dispatch.WHvEmulatorTryIoEmulation(
+        vcpu->emulator, cpu,
+        &vcpu->exit_ctx.VpContext, ctx,
+        &emu_status);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
         return -1;
     }
 
     if (!emu_status.EmulationSuccessful) {
-        error_report("WHPX: Failed to emulate PortMMIO access");
+        error_report("WHPX: Failed to emulate PortIO access with"
+                     " EmulatorReturnStatus: %u", emu_status.AsUINT32);
         return -1;
     }
 
@@ -687,11 +714,14 @@ static void whpx_vcpu_pre_run(CPUState *cpu)
     X86CPU *x86_cpu = X86_CPU(cpu);
     int irq;
     uint8_t tpr;
-    WHV_X64_PENDING_INTERRUPTION_REGISTER new_int = {0};
+    WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
     UINT32 reg_count = 0;
-    WHV_REGISTER_VALUE reg_values[3] = {0};
+    WHV_REGISTER_VALUE reg_values[3];
     WHV_REGISTER_NAME reg_names[3];
 
+    memset(&new_int, 0, sizeof(new_int));
+    memset(reg_values, 0, sizeof(reg_values));
+
     qemu_mutex_lock_iothread();
 
     /* Inject NMI */
@@ -768,8 +798,9 @@ static void whpx_vcpu_pre_run(CPUState *cpu)
     qemu_mutex_unlock_iothread();
 
     if (reg_count) {
-        hr = WHvSetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index,
-                                             reg_names, reg_count, reg_values);
+        hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+            whpx->partition, cpu->cpu_index,
+            reg_names, reg_count, reg_values);
         if (FAILED(hr)) {
             error_report("WHPX: Failed to set interrupt state registers,"
                          " hr=%08lx", hr);
@@ -877,8 +908,9 @@ static int whpx_vcpu_run(CPUState *cpu)
             whpx_vcpu_kick(cpu);
         }
 
-        hr = WHvRunVirtualProcessor(whpx->partition, cpu->cpu_index,
-                                    &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
+        hr = whp_dispatch.WHvRunVirtualProcessor(
+            whpx->partition, cpu->cpu_index,
+            &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
 
         if (FAILED(hr)) {
             error_report("WHPX: Failed to exec a virtual processor,"
@@ -912,11 +944,13 @@ static int whpx_vcpu_run(CPUState *cpu)
             break;
 
         case WHvRunVpExitReasonX64Cpuid: {
-            WHV_REGISTER_VALUE reg_values[5] = {0};
+            WHV_REGISTER_VALUE reg_values[5];
             WHV_REGISTER_NAME reg_names[5];
             UINT32 reg_count = 5;
             UINT64 rip, rax, rcx, rdx, rbx;
 
+            memset(reg_values, 0, sizeof(reg_values));
+
             rip = vcpu->exit_ctx.VpContext.Rip +
                   vcpu->exit_ctx.VpContext.InstructionLength;
             switch (vcpu->exit_ctx.CpuidAccess.Rax) {
@@ -949,11 +983,11 @@ static int whpx_vcpu_run(CPUState *cpu)
             reg_values[3].Reg64 = rdx;
             reg_values[4].Reg64 = rbx;
 
-            hr = WHvSetVirtualProcessorRegisters(whpx->partition,
-                                                 cpu->cpu_index,
-                                                 reg_names,
-                                                 reg_count,
-                                                 reg_values);
+            hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+                whpx->partition, cpu->cpu_index,
+                reg_names,
+                reg_count,
+                reg_values);
 
             if (FAILED(hr)) {
                 error_report("WHPX: Failed to set CpuidAccess state registers,"
@@ -1065,8 +1099,8 @@ int whpx_init_vcpu(CPUState *cpu)
         (void)migrate_add_blocker(whpx_migration_blocker, &local_error);
         if (local_error) {
             error_report_err(local_error);
-            error_free(whpx_migration_blocker);
             migrate_del_blocker(whpx_migration_blocker);
+            error_free(whpx_migration_blocker);
             return -EINVAL;
         }
     }
@@ -1078,7 +1112,9 @@ int whpx_init_vcpu(CPUState *cpu)
         return -ENOMEM;
     }
 
-    hr = WHvEmulatorCreateEmulator(&whpx_emu_callbacks, &vcpu->emulator);
+    hr = whp_dispatch.WHvEmulatorCreateEmulator(
+        &whpx_emu_callbacks,
+        &vcpu->emulator);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to setup instruction completion support,"
                      " hr=%08lx", hr);
@@ -1086,11 +1122,12 @@ int whpx_init_vcpu(CPUState *cpu)
         return -EINVAL;
     }
 
-    hr = WHvCreateVirtualProcessor(whpx->partition, cpu->cpu_index, 0);
+    hr = whp_dispatch.WHvCreateVirtualProcessor(
+        whpx->partition, cpu->cpu_index, 0);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to create a virtual processor,"
                      " hr=%08lx", hr);
-        WHvEmulatorDestroyEmulator(vcpu->emulator);
+        whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
         g_free(vcpu);
         return -EINVAL;
     }
@@ -1131,8 +1168,8 @@ void whpx_destroy_vcpu(CPUState *cpu)
     struct whpx_state *whpx = &whpx_global;
     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
 
-    WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
-    WHvEmulatorDestroyEmulator(vcpu->emulator);
+    whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
+    whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
     g_free(cpu->hax_vcpu);
     return;
 }
@@ -1140,7 +1177,8 @@ void whpx_destroy_vcpu(CPUState *cpu)
 void whpx_vcpu_kick(CPUState *cpu)
 {
     struct whpx_state *whpx = &whpx_global;
-    WHvCancelRunVirtualProcessor(whpx->partition, cpu->cpu_index, 0);
+    whp_dispatch.WHvCancelRunVirtualProcessor(
+        whpx->partition, cpu->cpu_index, 0);
 }
 
 /*
@@ -1166,24 +1204,24 @@ static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
     */
 
     if (add) {
-        hr = WHvMapGpaRange(whpx->partition,
-                            host_va,
-                            start_pa,
-                            size,
-                            (WHvMapGpaRangeFlagRead |
-                             WHvMapGpaRangeFlagExecute |
-                             (rom ? 0 : WHvMapGpaRangeFlagWrite)));
+        hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
+                                         host_va,
+                                         start_pa,
+                                         size,
+                                         (WHvMapGpaRangeFlagRead |
+                                          WHvMapGpaRangeFlagExecute |
+                                          (rom ? 0 : WHvMapGpaRangeFlagWrite)));
     } else {
-        hr = WHvUnmapGpaRange(whpx->partition,
-                              start_pa,
-                              size);
+        hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
+                                           start_pa,
+                                           size);
     }
 
     if (FAILED(hr)) {
         error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
                      " Host:%p, hr=%08lx",
                      (add ? "MAP" : "UNMAP"), name,
-                     (void *)start_pa, (void *)size, host_va, hr);
+                     (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
     }
 }
 
@@ -1214,8 +1252,8 @@ static void whpx_process_section(MemoryRegionSection *section, int add)
     host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
             + section->offset_within_region + delta;
 
-    whpx_update_mapping(start_pa, size, (void *)host_va, add,
-                       memory_region_is_rom(mr), mr->name);
+    whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
+                        memory_region_is_rom(mr), mr->name);
 }
 
 static void whpx_region_add(MemoryListener *listener,
@@ -1290,18 +1328,24 @@ static int whpx_accel_init(MachineState *ms)
 
     whpx = &whpx_global;
 
+    if (!init_whp_dispatch()) {
+        ret = -ENOSYS;
+        goto error;
+    }
+
     memset(whpx, 0, sizeof(struct whpx_state));
     whpx->mem_quota = ms->ram_size;
 
-    hr = WHvGetCapability(WHvCapabilityCodeHypervisorPresent, &whpx_cap,
-                          sizeof(whpx_cap), &whpx_cap_size);
+    hr = whp_dispatch.WHvGetCapability(
+        WHvCapabilityCodeHypervisorPresent, &whpx_cap,
+        sizeof(whpx_cap), &whpx_cap_size);
     if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
         error_report("WHPX: No accelerator found, hr=%08lx", hr);
         ret = -ENOSPC;
         goto error;
     }
 
-    hr = WHvCreatePartition(&whpx->partition);
+    hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to create partition, hr=%08lx", hr);
         ret = -EINVAL;
@@ -1310,10 +1354,11 @@ static int whpx_accel_init(MachineState *ms)
 
     memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
     prop.ProcessorCount = smp_cpus;
-    hr = WHvSetPartitionProperty(whpx->partition,
-                                 WHvPartitionPropertyCodeProcessorCount,
-                                 &prop,
-                                 sizeof(WHV_PARTITION_PROPERTY));
+    hr = whp_dispatch.WHvSetPartitionProperty(
+        whpx->partition,
+        WHvPartitionPropertyCodeProcessorCount,
+        &prop,
+        sizeof(WHV_PARTITION_PROPERTY));
 
     if (FAILED(hr)) {
         error_report("WHPX: Failed to set partition core count to %d,"
@@ -1324,10 +1369,11 @@ static int whpx_accel_init(MachineState *ms)
 
     memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
     prop.ExtendedVmExits.X64CpuidExit = 1;
-    hr = WHvSetPartitionProperty(whpx->partition,
-                                 WHvPartitionPropertyCodeExtendedVmExits,
-                                 &prop,
-                                 sizeof(WHV_PARTITION_PROPERTY));
+    hr = whp_dispatch.WHvSetPartitionProperty(
+        whpx->partition,
+        WHvPartitionPropertyCodeExtendedVmExits,
+        &prop,
+        sizeof(WHV_PARTITION_PROPERTY));
 
     if (FAILED(hr)) {
         error_report("WHPX: Failed to enable partition extended X64CpuidExit"
@@ -1337,11 +1383,11 @@ static int whpx_accel_init(MachineState *ms)
     }
 
     UINT32 cpuidExitList[] = {1};
-    hr = WHvSetPartitionProperty(whpx->partition,
-                                 WHvPartitionPropertyCodeCpuidExitList,
-                                 cpuidExitList,
-                                 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
-
+    hr = whp_dispatch.WHvSetPartitionProperty(
+        whpx->partition,
+        WHvPartitionPropertyCodeCpuidExitList,
+        cpuidExitList,
+        RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
     if (FAILED(hr)) {
         error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
                      hr);
@@ -1349,7 +1395,7 @@ static int whpx_accel_init(MachineState *ms)
         goto error;
     }
 
-    hr = WHvSetupPartition(whpx->partition);
+    hr = whp_dispatch.WHvSetupPartition(whpx->partition);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
         ret = -EINVAL;
@@ -1366,7 +1412,7 @@ static int whpx_accel_init(MachineState *ms)
   error:
 
     if (NULL != whpx->partition) {
-        WHvDeletePartition(whpx->partition);
+        whp_dispatch.WHvDeletePartition(whpx->partition);
         whpx->partition = NULL;
     }
 
@@ -1398,4 +1444,54 @@ static void whpx_type_init(void)
     type_register_static(&whpx_accel_type);
 }
 
+bool init_whp_dispatch(void)
+{
+    const char *lib_name;
+    HMODULE hLib;
+
+    if (whp_dispatch_initialized) {
+        return true;
+    }
+
+    #define WHP_LOAD_FIELD(return_type, function_name, signature) \
+        whp_dispatch.function_name = \
+            (function_name ## _t)GetProcAddress(hLib, #function_name); \
+        if (!whp_dispatch.function_name) { \
+            error_report("Could not load function %s from library %s.", \
+                         #function_name, lib_name); \
+            goto error; \
+        } \
+
+    lib_name = "WinHvPlatform.dll";
+    hWinHvPlatform = LoadLibrary(lib_name);
+    if (!hWinHvPlatform) {
+        error_report("Could not load library %s.", lib_name);
+        goto error;
+    }
+    hLib = hWinHvPlatform;
+    LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
+
+    lib_name = "WinHvEmulation.dll";
+    hWinHvEmulation = LoadLibrary(lib_name);
+    if (!hWinHvEmulation) {
+        error_report("Could not load library %s.", lib_name);
+        goto error;
+    }
+    hLib = hWinHvEmulation;
+    LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
+
+    whp_dispatch_initialized = true;
+    return true;
+
+    error:
+
+    if (hWinHvPlatform) {
+        FreeLibrary(hWinHvPlatform);
+    }
+    if (hWinHvEmulation) {
+        FreeLibrary(hWinHvEmulation);
+    }
+    return false;
+}
+
 type_init(whpx_type_init);
diff --git a/target/lm32/cpu.c b/target/lm32/cpu.c
index 0003152469..b7499cb627 100644
--- a/target/lm32/cpu.c
+++ b/target/lm32/cpu.c
@@ -22,7 +22,6 @@
 #include "qapi/error.h"
 #include "cpu.h"
 #include "qemu-common.h"
-#include "exec/exec-all.h"
 
 
 static void lm32_cpu_set_pc(CPUState *cs, vaddr value)
diff --git a/target/lm32/op_helper.c b/target/lm32/op_helper.c
index 577f8306e3..234d55e056 100644
--- a/target/lm32/op_helper.c
+++ b/target/lm32/op_helper.c
@@ -102,12 +102,16 @@ void HELPER(wcsr_dc)(CPULM32State *env, uint32_t dc)
 
 void HELPER(wcsr_im)(CPULM32State *env, uint32_t im)
 {
+    qemu_mutex_lock_iothread();
     lm32_pic_set_im(env->pic_state, im);
+    qemu_mutex_unlock_iothread();
 }
 
 void HELPER(wcsr_ip)(CPULM32State *env, uint32_t im)
 {
+    qemu_mutex_lock_iothread();
     lm32_pic_set_ip(env->pic_state, im);
+    qemu_mutex_unlock_iothread();
 }
 
 void HELPER(wcsr_jtx)(CPULM32State *env, uint32_t jtx)
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index a4ed8770aa..582e3a73b3 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -23,7 +23,6 @@
 #include "cpu.h"
 #include "qemu-common.h"
 #include "migration/vmstate.h"
-#include "exec/exec-all.h"
 #include "fpu/softfloat.h"
 
 static void m68k_cpu_set_pc(CPUState *cs, vaddr value)
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index 4dc1404800..9b546a2c18 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -72,6 +72,9 @@ static const struct {
     {NULL, 0},
 };
 
+/* If no specific version gets selected, default to the following.  */
+#define DEFAULT_CPU_VERSION "10.0"
+
 static void mb_cpu_set_pc(CPUState *cs, vaddr value)
 {
     MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
@@ -125,6 +128,7 @@ static void mb_cpu_reset(CPUState *s)
     env->mmu.c_mmu = 3;
     env->mmu.c_mmu_tlb_access = 3;
     env->mmu.c_mmu_zones = 16;
+    env->mmu.c_addr_mask = MAKE_64BIT_MASK(0, cpu->cfg.addr_size);
 #endif
 }
 
@@ -141,6 +145,7 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp)
     MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
     CPUMBState *env = &cpu->env;
     uint8_t version_code = 0;
+    const char *version;
     int i = 0;
     Error *local_err = NULL;
 
@@ -150,6 +155,12 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp)
         return;
     }
 
+    if (cpu->cfg.addr_size < 32 || cpu->cfg.addr_size > 64) {
+        error_setg(errp, "addr-size %d is out of range (32 - 64)",
+                   cpu->cfg.addr_size);
+        return;
+    }
+
     qemu_init_vcpu(cs);
 
     env->pvr.regs[0] = PVR0_USE_EXC_MASK \
@@ -162,8 +173,9 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp)
                         | PVR2_FPU_EXC_MASK \
                         | 0;
 
-    for (i = 0; mb_cpu_lookup[i].name && cpu->cfg.version; i++) {
-        if (strcmp(mb_cpu_lookup[i].name, cpu->cfg.version) == 0) {
+    version = cpu->cfg.version ? cpu->cfg.version : DEFAULT_CPU_VERSION;
+    for (i = 0; mb_cpu_lookup[i].name && version; i++) {
+        if (strcmp(mb_cpu_lookup[i].name, version) == 0) {
             version_code = mb_cpu_lookup[i].version_id;
             break;
         }
@@ -195,8 +207,10 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp)
     env->pvr.regs[5] |= cpu->cfg.dcache_writeback ?
                                         PVR5_DCACHE_WRITEBACK_MASK : 0;
 
-    env->pvr.regs[10] = 0x0c000000; /* Default to spartan 3a dsp family.  */
-    env->pvr.regs[11] = PVR11_USE_MMU | (16 << 17);
+    env->pvr.regs[10] = 0x0c000000 | /* Default to spartan 3a dsp family.  */
+                        (cpu->cfg.addr_size - 32) << PVR10_ASIZE_SHIFT;
+    env->pvr.regs[11] = (cpu->cfg.use_mmu ? PVR11_USE_MMU : 0) |
+                        16 << 17;
 
     mcc->parent_realize(dev, errp);
 }
@@ -226,6 +240,14 @@ static Property mb_properties[] = {
     DEFINE_PROP_UINT32("base-vectors", MicroBlazeCPU, cfg.base_vectors, 0),
     DEFINE_PROP_BOOL("use-stack-protection", MicroBlazeCPU, cfg.stackprot,
                      false),
+    /*
+     * This is the C_ADDR_SIZE synth-time configuration option of the
+     * MicroBlaze cores. Supported values range between 32 and 64.
+     *
+     * When set to > 32, 32bit MicroBlaze can emit load/stores
+     * with extended addressing.
+     */
+    DEFINE_PROP_UINT8("addr-size", MicroBlazeCPU, cfg.addr_size, 32),
     /* If use-fpu > 0 - FPU is enabled
      * If use-fpu = 2 - Floating point conversion and square root instructions
      *                  are enabled
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index 5be71bc320..3c4e0ba80a 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h
@@ -23,7 +23,7 @@
 #include "qemu-common.h"
 #include "cpu-qom.h"
 
-#define TARGET_LONG_BITS 32
+#define TARGET_LONG_BITS 64
 
 #define CPUArchState struct CPUMBState
 
@@ -203,6 +203,7 @@ typedef struct CPUMBState CPUMBState;
 
 /* Target family PVR mask */
 #define PVR10_TARGET_FAMILY_MASK        0xFF000000
+#define PVR10_ASIZE_SHIFT               18
 
 /* MMU descrtiption */
 #define PVR11_USE_MMU                   0xC0000000
@@ -238,19 +239,19 @@ typedef struct CPUMBState CPUMBState;
 struct CPUMBState {
     uint32_t debug;
     uint32_t btaken;
-    uint32_t btarget;
+    uint64_t btarget;
     uint32_t bimm;
 
     uint32_t imm;
-    uint32_t regs[33];
-    uint32_t sregs[24];
+    uint32_t regs[32];
+    uint64_t sregs[14];
     float_status fp_status;
     /* Stack protectors. Yes, it's a hw feature.  */
     uint32_t slr, shr;
 
     /* lwx/swx reserved address */
 #define RES_ADDR_NONE 0xffffffff /* Use 0xffffffff to indicate no reservation */
-    uint32_t res_addr;
+    target_ulong res_addr;
     uint32_t res_val;
 
     /* Internal flags.  */
@@ -277,7 +278,7 @@ struct CPUMBState {
     /* These fields are preserved on reset.  */
 
     struct {
-        uint32_t regs[16];
+        uint32_t regs[13];
     } pvr;
 };
 
@@ -297,6 +298,7 @@ struct MicroBlazeCPU {
     struct {
         bool stackprot;
         uint32_t base_vectors;
+        uint8_t addr_size;
         uint8_t use_fpu;
         uint8_t use_hw_mul;
         bool use_barrel;
@@ -340,8 +342,8 @@ int cpu_mb_signal_handler(int host_signum, void *pinfo,
 /* FIXME: MB uses variable pages down to 1K but linux only uses 4k.  */
 #define TARGET_PAGE_BITS 12
 
-#define TARGET_PHYS_ADDR_SPACE_BITS 32
-#define TARGET_VIRT_ADDR_SPACE_BITS 32
+#define TARGET_PHYS_ADDR_SPACE_BITS 64
+#define TARGET_VIRT_ADDR_SPACE_BITS 64
 
 #define CPU_RESOLVING_TYPE TYPE_MICROBLAZE_CPU
 
@@ -358,13 +360,17 @@ int cpu_mb_signal_handler(int host_signum, void *pinfo,
 
 static inline int cpu_mmu_index (CPUMBState *env, bool ifetch)
 {
-        /* Are we in nommu mode?.  */
-        if (!(env->sregs[SR_MSR] & MSR_VM))
-            return MMU_NOMMU_IDX;
+    MicroBlazeCPU *cpu = mb_env_get_cpu(env);
 
-	if (env->sregs[SR_MSR] & MSR_UM)
-            return MMU_USER_IDX;
-        return MMU_KERNEL_IDX;
+    /* Are we in nommu mode?.  */
+    if (!(env->sregs[SR_MSR] & MSR_VM) || !cpu->cfg.use_mmu) {
+        return MMU_NOMMU_IDX;
+    }
+
+    if (env->sregs[SR_MSR] & MSR_UM) {
+        return MMU_USER_IDX;
+    }
+    return MMU_KERNEL_IDX;
 }
 
 int mb_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int size, int rw,
diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
index fac6ee9263..bc753793ec 100644
--- a/target/microblaze/helper.c
+++ b/target/microblaze/helper.c
@@ -54,22 +54,12 @@ int mb_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw,
     MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
     CPUMBState *env = &cpu->env;
     unsigned int hit;
-    unsigned int mmu_available;
     int r = 1;
     int prot;
 
-    mmu_available = 0;
-    if (cpu->cfg.use_mmu) {
-        mmu_available = 1;
-        if ((cpu->cfg.pvr == C_PVR_FULL) &&
-            (env->pvr.regs[11] & PVR11_USE_MMU) != PVR11_USE_MMU) {
-            mmu_available = 0;
-        }
-    }
-
     /* Translate if the MMU is available and enabled.  */
-    if (mmu_available && (env->sregs[SR_MSR] & MSR_VM)) {
-        target_ulong vaddr, paddr;
+    if (mmu_idx != MMU_NOMMU_IDX) {
+        uint32_t vaddr, paddr;
         struct microblaze_mmu_lookup lu;
 
         hit = mmu_translate(&env->mmu, &lu, address, rw, mmu_idx);
@@ -152,7 +142,8 @@ void mb_cpu_do_interrupt(CPUState *cs)
             env->sregs[SR_MSR] |= MSR_EIP;
 
             qemu_log_mask(CPU_LOG_INT,
-                          "hw exception at pc=%x ear=%x esr=%x iflags=%x\n",
+                          "hw exception at pc=%" PRIx64 " ear=%" PRIx64 " "
+                          "esr=%" PRIx64 " iflags=%x\n",
                           env->sregs[SR_PC], env->sregs[SR_EAR],
                           env->sregs[SR_ESR], env->iflags);
             log_cpu_state_mask(CPU_LOG_INT, cs, 0);
@@ -175,7 +166,8 @@ void mb_cpu_do_interrupt(CPUState *cs)
                 /* was the branch immprefixed?.  */
                 if (env->bimm) {
                     qemu_log_mask(CPU_LOG_INT,
-                                  "bimm exception at pc=%x iflags=%x\n",
+                                  "bimm exception at pc=%" PRIx64 " "
+                                  "iflags=%x\n",
                                   env->sregs[SR_PC], env->iflags);
                     env->regs[17] -= 4;
                     log_cpu_state_mask(CPU_LOG_INT, cs, 0);
@@ -193,7 +185,8 @@ void mb_cpu_do_interrupt(CPUState *cs)
             env->sregs[SR_MSR] |= MSR_EIP;
 
             qemu_log_mask(CPU_LOG_INT,
-                          "exception at pc=%x ear=%x iflags=%x\n",
+                          "exception at pc=%" PRIx64 " ear=%" PRIx64 " "
+                          "iflags=%x\n",
                           env->sregs[SR_PC], env->sregs[SR_EAR], env->iflags);
             log_cpu_state_mask(CPU_LOG_INT, cs, 0);
             env->iflags &= ~(IMM_FLAG | D_FLAG);
@@ -230,7 +223,8 @@ void mb_cpu_do_interrupt(CPUState *cs)
             }
 #endif
             qemu_log_mask(CPU_LOG_INT,
-                         "interrupt at pc=%x msr=%x %x iflags=%x\n",
+                         "interrupt at pc=%" PRIx64 " msr=%" PRIx64 " %x "
+                         "iflags=%x\n",
                          env->sregs[SR_PC], env->sregs[SR_MSR], t, env->iflags);
 
             env->sregs[SR_MSR] &= ~(MSR_VMS | MSR_UMS | MSR_VM \
@@ -248,7 +242,8 @@ void mb_cpu_do_interrupt(CPUState *cs)
             assert(!(env->iflags & D_FLAG));
             t = (env->sregs[SR_MSR] & (MSR_VM | MSR_UM)) << 1;
             qemu_log_mask(CPU_LOG_INT,
-                        "break at pc=%x msr=%x %x iflags=%x\n",
+                        "break at pc=%" PRIx64 " msr=%" PRIx64 " %x "
+                        "iflags=%x\n",
                         env->sregs[SR_PC], env->sregs[SR_MSR], t, env->iflags);
             log_cpu_state_mask(CPU_LOG_INT, cs, 0);
             env->sregs[SR_MSR] &= ~(MSR_VMS | MSR_UMS | MSR_VM | MSR_UM);
@@ -274,9 +269,10 @@ hwaddr mb_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     CPUMBState *env = &cpu->env;
     target_ulong vaddr, paddr = 0;
     struct microblaze_mmu_lookup lu;
+    int mmu_idx = cpu_mmu_index(env, false);
     unsigned int hit;
 
-    if (env->sregs[SR_MSR] & MSR_VM) {
+    if (mmu_idx != MMU_NOMMU_IDX) {
         hit = mmu_translate(&env->mmu, &lu, addr, 0, 0);
         if (hit) {
             vaddr = addr & TARGET_PAGE_MASK;
diff --git a/target/microblaze/helper.h b/target/microblaze/helper.h
index 71a6c0858d..2f8bdea22b 100644
--- a/target/microblaze/helper.h
+++ b/target/microblaze/helper.h
@@ -25,12 +25,12 @@ DEF_HELPER_3(fcmp_ge, i32, env, i32, i32)
 
 DEF_HELPER_FLAGS_2(pcmpbf, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 #if !defined(CONFIG_USER_ONLY)
-DEF_HELPER_2(mmu_read, i32, env, i32)
-DEF_HELPER_3(mmu_write, void, env, i32, i32)
+DEF_HELPER_3(mmu_read, i32, env, i32, i32)
+DEF_HELPER_4(mmu_write, void, env, i32, i32, i32)
 #endif
 
-DEF_HELPER_5(memalign, void, env, i32, i32, i32, i32)
-DEF_HELPER_2(stackprot, void, env, i32)
+DEF_HELPER_5(memalign, void, env, tl, i32, i32, i32)
+DEF_HELPER_2(stackprot, void, env, tl)
 
 DEF_HELPER_2(get, i32, i32, i32)
 DEF_HELPER_3(put, void, i32, i32, i32)
diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c
index 9d5e6aa8a5..f4ceaea520 100644
--- a/target/microblaze/mmu.c
+++ b/target/microblaze/mmu.c
@@ -22,8 +22,6 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 
-#define D(x)
-
 static unsigned int tlb_decode_size(unsigned int f)
 {
     static const unsigned int sizes[] = {
@@ -81,34 +79,29 @@ unsigned int mmu_translate(struct microblaze_mmu *mmu,
 {
     unsigned int i, hit = 0;
     unsigned int tlb_ex = 0, tlb_wr = 0, tlb_zsel;
-    unsigned int tlb_size;
-    uint32_t tlb_tag, tlb_rpn, mask, t0;
+    uint64_t tlb_tag, tlb_rpn, mask;
+    uint32_t tlb_size, t0;
 
     lu->err = ERR_MISS;
     for (i = 0; i < ARRAY_SIZE(mmu->rams[RAM_TAG]); i++) {
-        uint32_t t, d;
+        uint64_t t, d;
 
         /* Lookup and decode.  */
         t = mmu->rams[RAM_TAG][i];
-        D(qemu_log("TLB %d valid=%d\n", i, t & TLB_VALID));
         if (t & TLB_VALID) {
             tlb_size = tlb_decode_size((t & TLB_PAGESZ_MASK) >> 7);
             if (tlb_size < TARGET_PAGE_SIZE) {
-                qemu_log("%d pages not supported\n", tlb_size);
+                qemu_log_mask(LOG_UNIMP, "%d pages not supported\n", tlb_size);
                 abort();
             }
 
-            mask = ~(tlb_size - 1);
+            mask = ~((uint64_t)tlb_size - 1);
             tlb_tag = t & TLB_EPN_MASK;
             if ((vaddr & mask) != (tlb_tag & mask)) {
-                D(qemu_log("TLB %d vaddr=%x != tag=%x\n",
-                           i, vaddr & mask, tlb_tag & mask));
                 continue;
             }
             if (mmu->tids[i]
                 && ((mmu->regs[MMU_R_PID] & 0xff) != mmu->tids[i])) {
-                D(qemu_log("TLB %d pid=%x != tid=%x\n",
-                           i, mmu->regs[MMU_R_PID], mmu->tids[i]));
                 continue;
             }
 
@@ -123,7 +116,8 @@ unsigned int mmu_translate(struct microblaze_mmu *mmu,
             t0 &= 0x3;
 
             if (tlb_zsel > mmu->c_mmu_zones) {
-                qemu_log_mask(LOG_GUEST_ERROR, "tlb zone select out of range! %d\n", tlb_zsel);
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "tlb zone select out of range! %d\n", tlb_zsel);
                 t0 = 1; /* Ignore.  */
             }
 
@@ -164,6 +158,7 @@ unsigned int mmu_translate(struct microblaze_mmu *mmu,
             tlb_rpn = d & TLB_RPN_MASK;
 
             lu->vaddr = tlb_tag;
+            lu->paddr = tlb_rpn & mmu->c_addr_mask;
             lu->paddr = tlb_rpn;
             lu->size = tlb_size;
             lu->err = ERR_HIT;
@@ -173,13 +168,14 @@ unsigned int mmu_translate(struct microblaze_mmu *mmu,
         }
     }
 done:
-    D(qemu_log("MMU vaddr=%x rw=%d tlb_wr=%d tlb_ex=%d hit=%d\n",
-              vaddr, rw, tlb_wr, tlb_ex, hit));
+    qemu_log_mask(CPU_LOG_MMU,
+                  "MMU vaddr=%" PRIx64 " rw=%d tlb_wr=%d tlb_ex=%d hit=%d\n",
+                  vaddr, rw, tlb_wr, tlb_ex, hit);
     return hit;
 }
 
 /* Writes/reads to the MMU's special regs end up here.  */
-uint32_t mmu_read(CPUMBState *env, uint32_t rn)
+uint32_t mmu_read(CPUMBState *env, bool ext, uint32_t rn)
 {
     unsigned int i;
     uint32_t r = 0;
@@ -188,50 +184,65 @@ uint32_t mmu_read(CPUMBState *env, uint32_t rn)
         qemu_log_mask(LOG_GUEST_ERROR, "MMU access on MMU-less system\n");
         return 0;
     }
+    if (ext && rn != MMU_R_TLBLO) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Extended access only to TLBLO.\n");
+        return 0;
+    }
 
     switch (rn) {
         /* Reads to HI/LO trig reads from the mmu rams.  */
         case MMU_R_TLBLO:
         case MMU_R_TLBHI:
             if (!(env->mmu.c_mmu_tlb_access & 1)) {
-                qemu_log_mask(LOG_GUEST_ERROR, "Invalid access to MMU reg %d\n", rn);
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Invalid access to MMU reg %d\n", rn);
                 return 0;
             }
 
             i = env->mmu.regs[MMU_R_TLBX] & 0xff;
-            r = env->mmu.rams[rn & 1][i];
+            r = extract64(env->mmu.rams[rn & 1][i], ext * 32, 32);
             if (rn == MMU_R_TLBHI)
                 env->mmu.regs[MMU_R_PID] = env->mmu.tids[i];
             break;
         case MMU_R_PID:
         case MMU_R_ZPR:
             if (!(env->mmu.c_mmu_tlb_access & 1)) {
-                qemu_log_mask(LOG_GUEST_ERROR, "Invalid access to MMU reg %d\n", rn);
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Invalid access to MMU reg %d\n", rn);
                 return 0;
             }
             r = env->mmu.regs[rn];
             break;
+        case MMU_R_TLBX:
+            r = env->mmu.regs[rn];
+            break;
         case MMU_R_TLBSX:
             qemu_log_mask(LOG_GUEST_ERROR, "TLBSX is write-only.\n");
             break;
         default:
-            r = env->mmu.regs[rn];
+            qemu_log_mask(LOG_GUEST_ERROR, "Invalid MMU register %d.\n", rn);
             break;
     }
-    D(qemu_log("%s rn=%d=%x\n", __func__, rn, r));
+    qemu_log_mask(CPU_LOG_MMU, "%s rn=%d=%x\n", __func__, rn, r);
     return r;
 }
 
-void mmu_write(CPUMBState *env, uint32_t rn, uint32_t v)
+void mmu_write(CPUMBState *env, bool ext, uint32_t rn, uint32_t v)
 {
     MicroBlazeCPU *cpu = mb_env_get_cpu(env);
+    uint64_t tmp64;
     unsigned int i;
-    D(qemu_log("%s rn=%d=%x old=%x\n", __func__, rn, v, env->mmu.regs[rn]));
+    qemu_log_mask(CPU_LOG_MMU,
+                  "%s rn=%d=%x old=%x\n", __func__, rn, v, env->mmu.regs[rn]);
 
     if (env->mmu.c_mmu < 2 || !env->mmu.c_mmu_tlb_access) {
         qemu_log_mask(LOG_GUEST_ERROR, "MMU access on MMU-less system\n");
         return;
     }
+    if (ext && rn != MMU_R_TLBLO) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Extended access only to TLBLO.\n");
+        return;
+    }
 
     switch (rn) {
         /* Writes to HI/LO trig writes to the mmu rams.  */
@@ -240,18 +251,19 @@ void mmu_write(CPUMBState *env, uint32_t rn, uint32_t v)
             i = env->mmu.regs[MMU_R_TLBX] & 0xff;
             if (rn == MMU_R_TLBHI) {
                 if (i < 3 && !(v & TLB_VALID) && qemu_loglevel_mask(~0))
-                    qemu_log_mask(LOG_GUEST_ERROR, "invalidating index %x at pc=%x\n",
+                    qemu_log_mask(LOG_GUEST_ERROR,
+                             "invalidating index %x at pc=%" PRIx64 "\n",
                              i, env->sregs[SR_PC]);
                 env->mmu.tids[i] = env->mmu.regs[MMU_R_PID] & 0xff;
                 mmu_flush_idx(env, i);
             }
-            env->mmu.rams[rn & 1][i] = v;
-
-            D(qemu_log("%s ram[%d][%d]=%x\n", __func__, rn & 1, i, v));
+            tmp64 = env->mmu.rams[rn & 1][i];
+            env->mmu.rams[rn & 1][i] = deposit64(tmp64, ext * 32, 32, v);
             break;
         case MMU_R_ZPR:
             if (env->mmu.c_mmu_tlb_access <= 1) {
-                qemu_log_mask(LOG_GUEST_ERROR, "Invalid access to MMU reg %d\n", rn);
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Invalid access to MMU reg %d\n", rn);
                 return;
             }
 
@@ -264,7 +276,8 @@ void mmu_write(CPUMBState *env, uint32_t rn, uint32_t v)
             break;
         case MMU_R_PID:
             if (env->mmu.c_mmu_tlb_access <= 1) {
-                qemu_log_mask(LOG_GUEST_ERROR, "Invalid access to MMU reg %d\n", rn);
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Invalid access to MMU reg %d\n", rn);
                 return;
             }
 
@@ -283,7 +296,8 @@ void mmu_write(CPUMBState *env, uint32_t rn, uint32_t v)
             int hit;
 
             if (env->mmu.c_mmu_tlb_access <= 1) {
-                qemu_log_mask(LOG_GUEST_ERROR, "Invalid access to MMU reg %d\n", rn);
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Invalid access to MMU reg %d\n", rn);
                 return;
             }
 
@@ -291,12 +305,13 @@ void mmu_write(CPUMBState *env, uint32_t rn, uint32_t v)
                                 v & TLB_EPN_MASK, 0, cpu_mmu_index(env, false));
             if (hit) {
                 env->mmu.regs[MMU_R_TLBX] = lu.idx;
-            } else
-                env->mmu.regs[MMU_R_TLBX] |= 0x80000000;
+            } else {
+                env->mmu.regs[MMU_R_TLBX] |= R_TBLX_MISS_MASK;
+            }
             break;
         }
         default:
-            env->mmu.regs[rn] = v;
+            qemu_log_mask(LOG_GUEST_ERROR, "Invalid MMU register %d.\n", rn);
             break;
    }
 }
diff --git a/target/microblaze/mmu.h b/target/microblaze/mmu.h
index 3b7a9983d5..a4272b6356 100644
--- a/target/microblaze/mmu.h
+++ b/target/microblaze/mmu.h
@@ -28,7 +28,7 @@
 #define RAM_TAG      0
 
 /* Tag portion */
-#define TLB_EPN_MASK          0xFFFFFC00 /* Effective Page Number */
+#define TLB_EPN_MASK          MAKE_64BIT_MASK(10, 64 - 10)
 #define TLB_PAGESZ_MASK       0x00000380
 #define TLB_PAGESZ(x)         (((x) & 0x7) << 7)
 #define PAGESZ_1K             0
@@ -42,7 +42,7 @@
 #define TLB_VALID             0x00000040 /* Entry is valid */
 
 /* Data portion */
-#define TLB_RPN_MASK          0xFFFFFC00 /* Real Page Number */
+#define TLB_RPN_MASK          MAKE_64BIT_MASK(10, 64 - 10)
 #define TLB_PERM_MASK         0x00000300
 #define TLB_EX                0x00000200 /* Instruction execution allowed */
 #define TLB_WR                0x00000100 /* Writes permitted */
@@ -54,20 +54,25 @@
 #define TLB_M                 0x00000002 /* Memory is coherent */
 #define TLB_G                 0x00000001 /* Memory is guarded from prefetch */
 
+/* TLBX  */
+#define R_TBLX_MISS_SHIFT 31
+#define R_TBLX_MISS_MASK (1U << R_TBLX_MISS_SHIFT)
+
 #define TLB_ENTRIES    64
 
 struct microblaze_mmu
 {
     /* Data and tag brams.  */
-    uint32_t rams[2][TLB_ENTRIES];
+    uint64_t rams[2][TLB_ENTRIES];
     /* We keep a separate ram for the tids to avoid the 48 bit tag width.  */
     uint8_t tids[TLB_ENTRIES];
     /* Control flops.  */
-    uint32_t regs[8];
+    uint32_t regs[3];
 
     int c_mmu;
     int c_mmu_tlb_access;
     int c_mmu_zones;
+    uint64_t c_addr_mask; /* Mask to apply to physical addresses.  */
 };
 
 struct microblaze_mmu_lookup
@@ -85,6 +90,6 @@ struct microblaze_mmu_lookup
 unsigned int mmu_translate(struct microblaze_mmu *mmu,
                            struct microblaze_mmu_lookup *lu,
                            target_ulong vaddr, int rw, int mmu_idx);
-uint32_t mmu_read(CPUMBState *env, uint32_t rn);
-void mmu_write(CPUMBState *env, uint32_t rn, uint32_t v);
+uint32_t mmu_read(CPUMBState *env, bool ea, uint32_t rn);
+void mmu_write(CPUMBState *env, bool ea, uint32_t rn, uint32_t v);
 void mmu_init(struct microblaze_mmu *mmu);
diff --git a/target/microblaze/op_helper.c b/target/microblaze/op_helper.c
index 1b4fe796e7..7cdbbcccae 100644
--- a/target/microblaze/op_helper.c
+++ b/target/microblaze/op_helper.c
@@ -94,16 +94,17 @@ void helper_debug(CPUMBState *env)
 {
     int i;
 
-    qemu_log("PC=%8.8x\n", env->sregs[SR_PC]);
-    qemu_log("rmsr=%x resr=%x rear=%x debug[%x] imm=%x iflags=%x\n",
+    qemu_log("PC=%" PRIx64 "\n", env->sregs[SR_PC]);
+    qemu_log("rmsr=%" PRIx64 " resr=%" PRIx64 " rear=%" PRIx64 " "
+             "debug[%x] imm=%x iflags=%x\n",
              env->sregs[SR_MSR], env->sregs[SR_ESR], env->sregs[SR_EAR],
              env->debug, env->imm, env->iflags);
-    qemu_log("btaken=%d btarget=%x mode=%s(saved=%s) eip=%d ie=%d\n",
+    qemu_log("btaken=%d btarget=%" PRIx64 " mode=%s(saved=%s) eip=%d ie=%d\n",
              env->btaken, env->btarget,
              (env->sregs[SR_MSR] & MSR_UM) ? "user" : "kernel",
              (env->sregs[SR_MSR] & MSR_UMS) ? "user" : "kernel",
-             (env->sregs[SR_MSR] & MSR_EIP),
-             (env->sregs[SR_MSR] & MSR_IE));
+             (bool)(env->sregs[SR_MSR] & MSR_EIP),
+             (bool)(env->sregs[SR_MSR] & MSR_IE));
     for (i = 0; i < 32; i++) {
         qemu_log("r%2.2d=%8.8x ", i, env->regs[i]);
         if ((i + 1) % 4 == 0)
@@ -439,12 +440,14 @@ uint32_t helper_pcmpbf(uint32_t a, uint32_t b)
     return 0;
 }
 
-void helper_memalign(CPUMBState *env, uint32_t addr, uint32_t dr, uint32_t wr,
+void helper_memalign(CPUMBState *env, target_ulong addr,
+                     uint32_t dr, uint32_t wr,
                      uint32_t mask)
 {
     if (addr & mask) {
             qemu_log_mask(CPU_LOG_INT,
-                          "unaligned access addr=%x mask=%x, wr=%d dr=r%d\n",
+                          "unaligned access addr=" TARGET_FMT_lx
+                          " mask=%x, wr=%d dr=r%d\n",
                           addr, mask, wr, dr);
             env->sregs[SR_EAR] = addr;
             env->sregs[SR_ESR] = ESR_EC_UNALIGNED_DATA | (wr << 10) \
@@ -459,10 +462,11 @@ void helper_memalign(CPUMBState *env, uint32_t addr, uint32_t dr, uint32_t wr,
     }
 }
 
-void helper_stackprot(CPUMBState *env, uint32_t addr)
+void helper_stackprot(CPUMBState *env, target_ulong addr)
 {
     if (addr < env->slr || addr > env->shr) {
-        qemu_log_mask(CPU_LOG_INT, "Stack protector violation at %x %x %x\n",
+        qemu_log_mask(CPU_LOG_INT, "Stack protector violation at "
+                      TARGET_FMT_lx " %x %x\n",
                       addr, env->slr, env->shr);
         env->sregs[SR_EAR] = addr;
         env->sregs[SR_ESR] = ESR_EC_STACKPROT;
@@ -472,14 +476,14 @@ void helper_stackprot(CPUMBState *env, uint32_t addr)
 
 #if !defined(CONFIG_USER_ONLY)
 /* Writes/reads to the MMU's special regs end up here.  */
-uint32_t helper_mmu_read(CPUMBState *env, uint32_t rn)
+uint32_t helper_mmu_read(CPUMBState *env, uint32_t ext, uint32_t rn)
 {
-    return mmu_read(env, rn);
+    return mmu_read(env, ext, rn);
 }
 
-void helper_mmu_write(CPUMBState *env, uint32_t rn, uint32_t v)
+void helper_mmu_write(CPUMBState *env, uint32_t ext, uint32_t rn, uint32_t v)
 {
-    mmu_write(env, rn, v);
+    mmu_write(env, ext, rn, v);
 }
 
 void mb_cpu_unassigned_access(CPUState *cs, hwaddr addr,
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index 0872dc9ded..b79600cba5 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -52,22 +52,22 @@
 #define DISAS_UPDATE  DISAS_TARGET_1 /* cpu state was modified dynamically */
 #define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */
 
-static TCGv env_debug;
-static TCGv cpu_R[32];
-static TCGv cpu_SR[18];
-static TCGv env_imm;
-static TCGv env_btaken;
-static TCGv env_btarget;
-static TCGv env_iflags;
+static TCGv_i32 env_debug;
+static TCGv_i32 cpu_R[32];
+static TCGv_i64 cpu_SR[14];
+static TCGv_i32 env_imm;
+static TCGv_i32 env_btaken;
+static TCGv_i64 env_btarget;
+static TCGv_i32 env_iflags;
 static TCGv env_res_addr;
-static TCGv env_res_val;
+static TCGv_i32 env_res_val;
 
 #include "exec/gen-icount.h"
 
 /* This is the state at translation time.  */
 typedef struct DisasContext {
     MicroBlazeCPU *cpu;
-    target_ulong pc;
+    uint32_t pc;
 
     /* Decoder.  */
     int type_b;
@@ -105,16 +105,15 @@ static const char *regnames[] =
 
 static const char *special_regnames[] =
 {
-    "rpc", "rmsr", "sr2", "sr3", "sr4", "sr5", "sr6", "sr7",
-    "sr8", "sr9", "sr10", "sr11", "sr12", "sr13", "sr14", "sr15",
-    "sr16", "sr17", "sr18"
+    "rpc", "rmsr", "sr2", "rear", "sr4", "resr", "sr6", "rfsr",
+    "sr8", "sr9", "sr10", "rbtr", "sr12", "redr"
 };
 
 static inline void t_sync_flags(DisasContext *dc)
 {
     /* Synch the tb dependent flags between translator and runtime.  */
     if (dc->tb_flags != dc->synced_flags) {
-        tcg_gen_movi_tl(env_iflags, dc->tb_flags);
+        tcg_gen_movi_i32(env_iflags, dc->tb_flags);
         dc->synced_flags = dc->tb_flags;
     }
 }
@@ -124,7 +123,7 @@ static inline void t_gen_raise_exception(DisasContext *dc, uint32_t index)
     TCGv_i32 tmp = tcg_const_i32(index);
 
     t_sync_flags(dc);
-    tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
+    tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc);
     gen_helper_raise_exception(cpu_env, tmp);
     tcg_temp_free_i32(tmp);
     dc->is_jmp = DISAS_UPDATE;
@@ -143,41 +142,70 @@ static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
 {
     if (use_goto_tb(dc, dest)) {
         tcg_gen_goto_tb(n);
-        tcg_gen_movi_tl(cpu_SR[SR_PC], dest);
+        tcg_gen_movi_i64(cpu_SR[SR_PC], dest);
         tcg_gen_exit_tb((uintptr_t)dc->tb + n);
     } else {
-        tcg_gen_movi_tl(cpu_SR[SR_PC], dest);
+        tcg_gen_movi_i64(cpu_SR[SR_PC], dest);
         tcg_gen_exit_tb(0);
     }
 }
 
-static void read_carry(DisasContext *dc, TCGv d)
+static void read_carry(DisasContext *dc, TCGv_i32 d)
 {
-    tcg_gen_shri_tl(d, cpu_SR[SR_MSR], 31);
+    tcg_gen_extrl_i64_i32(d, cpu_SR[SR_MSR]);
+    tcg_gen_shri_i32(d, d, 31);
 }
 
 /*
  * write_carry sets the carry bits in MSR based on bit 0 of v.
  * v[31:1] are ignored.
  */
-static void write_carry(DisasContext *dc, TCGv v)
+static void write_carry(DisasContext *dc, TCGv_i32 v)
 {
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_shli_tl(t0, v, 31);
-    tcg_gen_sari_tl(t0, t0, 31);
-    tcg_gen_andi_tl(t0, t0, (MSR_C | MSR_CC));
-    tcg_gen_andi_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR],
-                    ~(MSR_C | MSR_CC));
-    tcg_gen_or_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t0);
-    tcg_temp_free(t0);
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(t0, v);
+    /* Deposit bit 0 into MSR_C and the alias MSR_CC.  */
+    tcg_gen_deposit_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t0, 2, 1);
+    tcg_gen_deposit_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t0, 31, 1);
+    tcg_temp_free_i64(t0);
 }
 
 static void write_carryi(DisasContext *dc, bool carry)
 {
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_movi_tl(t0, carry);
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    tcg_gen_movi_i32(t0, carry);
     write_carry(dc, t0);
-    tcg_temp_free(t0);
+    tcg_temp_free_i32(t0);
+}
+
+/*
+ * Returns true if the insn an illegal operation.
+ * If exceptions are enabled, an exception is raised.
+ */
+static bool trap_illegal(DisasContext *dc, bool cond)
+{
+    if (cond && (dc->tb_flags & MSR_EE_FLAG)
+        && (dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)) {
+        tcg_gen_movi_i64(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
+        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    }
+    return cond;
+}
+
+/*
+ * Returns true if the insn is illegal in userspace.
+ * If exceptions are enabled, an exception is raised.
+ */
+static bool trap_userspace(DisasContext *dc, bool cond)
+{
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
+    bool cond_user = cond && mem_index == MMU_USER_IDX;
+
+    if (cond_user && (dc->tb_flags & MSR_EE_FLAG)) {
+        tcg_gen_movi_i64(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
+        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    }
+    return cond_user;
 }
 
 /* True if ALU operand b is a small immediate that may deserve
@@ -188,13 +216,13 @@ static inline int dec_alu_op_b_is_small_imm(DisasContext *dc)
     return dc->type_b && !(dc->tb_flags & IMM_FLAG);
 }
 
-static inline TCGv *dec_alu_op_b(DisasContext *dc)
+static inline TCGv_i32 *dec_alu_op_b(DisasContext *dc)
 {
     if (dc->type_b) {
         if (dc->tb_flags & IMM_FLAG)
-            tcg_gen_ori_tl(env_imm, env_imm, dc->imm);
+            tcg_gen_ori_i32(env_imm, env_imm, dc->imm);
         else
-            tcg_gen_movi_tl(env_imm, (int32_t)((int16_t)dc->imm));
+            tcg_gen_movi_i32(env_imm, (int32_t)((int16_t)dc->imm));
         return &env_imm;
     } else
         return &cpu_R[dc->rb];
@@ -203,7 +231,7 @@ static inline TCGv *dec_alu_op_b(DisasContext *dc)
 static void dec_add(DisasContext *dc)
 {
     unsigned int k, c;
-    TCGv cf;
+    TCGv_i32 cf;
 
     k = dc->opcode & 4;
     c = dc->opcode & 2;
@@ -217,15 +245,15 @@ static void dec_add(DisasContext *dc)
         /* k - keep carry, no need to update MSR.  */
         /* If rd == r0, it's a nop.  */
         if (dc->rd) {
-            tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+            tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
 
             if (c) {
                 /* c - Add carry into the result.  */
-                cf = tcg_temp_new();
+                cf = tcg_temp_new_i32();
 
                 read_carry(dc, cf);
-                tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->rd], cf);
-                tcg_temp_free(cf);
+                tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf);
+                tcg_temp_free_i32(cf);
             }
         }
         return;
@@ -233,31 +261,31 @@ static void dec_add(DisasContext *dc)
 
     /* From now on, we can assume k is zero.  So we need to update MSR.  */
     /* Extract carry.  */
-    cf = tcg_temp_new();
+    cf = tcg_temp_new_i32();
     if (c) {
         read_carry(dc, cf);
     } else {
-        tcg_gen_movi_tl(cf, 0);
+        tcg_gen_movi_i32(cf, 0);
     }
 
     if (dc->rd) {
-        TCGv ncf = tcg_temp_new();
+        TCGv_i32 ncf = tcg_temp_new_i32();
         gen_helper_carry(ncf, cpu_R[dc->ra], *(dec_alu_op_b(dc)), cf);
-        tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
-        tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->rd], cf);
+        tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+        tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf);
         write_carry(dc, ncf);
-        tcg_temp_free(ncf);
+        tcg_temp_free_i32(ncf);
     } else {
         gen_helper_carry(cf, cpu_R[dc->ra], *(dec_alu_op_b(dc)), cf);
         write_carry(dc, cf);
     }
-    tcg_temp_free(cf);
+    tcg_temp_free_i32(cf);
 }
 
 static void dec_sub(DisasContext *dc)
 {
     unsigned int u, cmp, k, c;
-    TCGv cf, na;
+    TCGv_i32 cf, na;
 
     u = dc->imm & 2;
     k = dc->opcode & 4;
@@ -283,15 +311,15 @@ static void dec_sub(DisasContext *dc)
         /* k - keep carry, no need to update MSR.  */
         /* If rd == r0, it's a nop.  */
         if (dc->rd) {
-            tcg_gen_sub_tl(cpu_R[dc->rd], *(dec_alu_op_b(dc)), cpu_R[dc->ra]);
+            tcg_gen_sub_i32(cpu_R[dc->rd], *(dec_alu_op_b(dc)), cpu_R[dc->ra]);
 
             if (c) {
                 /* c - Add carry into the result.  */
-                cf = tcg_temp_new();
+                cf = tcg_temp_new_i32();
 
                 read_carry(dc, cf);
-                tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->rd], cf);
-                tcg_temp_free(cf);
+                tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf);
+                tcg_temp_free_i32(cf);
             }
         }
         return;
@@ -299,41 +327,38 @@ static void dec_sub(DisasContext *dc)
 
     /* From now on, we can assume k is zero.  So we need to update MSR.  */
     /* Extract carry. And complement a into na.  */
-    cf = tcg_temp_new();
-    na = tcg_temp_new();
+    cf = tcg_temp_new_i32();
+    na = tcg_temp_new_i32();
     if (c) {
         read_carry(dc, cf);
     } else {
-        tcg_gen_movi_tl(cf, 1);
+        tcg_gen_movi_i32(cf, 1);
     }
 
     /* d = b + ~a + c. carry defaults to 1.  */
-    tcg_gen_not_tl(na, cpu_R[dc->ra]);
+    tcg_gen_not_i32(na, cpu_R[dc->ra]);
 
     if (dc->rd) {
-        TCGv ncf = tcg_temp_new();
+        TCGv_i32 ncf = tcg_temp_new_i32();
         gen_helper_carry(ncf, na, *(dec_alu_op_b(dc)), cf);
-        tcg_gen_add_tl(cpu_R[dc->rd], na, *(dec_alu_op_b(dc)));
-        tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->rd], cf);
+        tcg_gen_add_i32(cpu_R[dc->rd], na, *(dec_alu_op_b(dc)));
+        tcg_gen_add_i32(cpu_R[dc->rd], cpu_R[dc->rd], cf);
         write_carry(dc, ncf);
-        tcg_temp_free(ncf);
+        tcg_temp_free_i32(ncf);
     } else {
         gen_helper_carry(cf, na, *(dec_alu_op_b(dc)), cf);
         write_carry(dc, cf);
     }
-    tcg_temp_free(cf);
-    tcg_temp_free(na);
+    tcg_temp_free_i32(cf);
+    tcg_temp_free_i32(na);
 }
 
 static void dec_pattern(DisasContext *dc)
 {
     unsigned int mode;
 
-    if ((dc->tb_flags & MSR_EE_FLAG)
-          && (dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)
-          && !dc->cpu->cfg.use_pcmp_instr) {
-        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
-        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    if (trap_illegal(dc, !dc->cpu->cfg.use_pcmp_instr)) {
+        return;
     }
 
     mode = dc->opcode & 3;
@@ -347,14 +372,14 @@ static void dec_pattern(DisasContext *dc)
         case 2:
             LOG_DIS("pcmpeq r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
             if (dc->rd) {
-                tcg_gen_setcond_tl(TCG_COND_EQ, cpu_R[dc->rd],
+                tcg_gen_setcond_i32(TCG_COND_EQ, cpu_R[dc->rd],
                                    cpu_R[dc->ra], cpu_R[dc->rb]);
             }
             break;
         case 3:
             LOG_DIS("pcmpne r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
             if (dc->rd) {
-                tcg_gen_setcond_tl(TCG_COND_NE, cpu_R[dc->rd],
+                tcg_gen_setcond_i32(TCG_COND_NE, cpu_R[dc->rd],
                                    cpu_R[dc->ra], cpu_R[dc->rb]);
             }
             break;
@@ -381,9 +406,9 @@ static void dec_and(DisasContext *dc)
         return;
 
     if (not) {
-        tcg_gen_andc_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+        tcg_gen_andc_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
     } else
-        tcg_gen_and_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+        tcg_gen_and_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
 }
 
 static void dec_or(DisasContext *dc)
@@ -395,7 +420,7 @@ static void dec_or(DisasContext *dc)
 
     LOG_DIS("or r%d r%d r%d imm=%x\n", dc->rd, dc->ra, dc->rb, dc->imm);
     if (dc->rd)
-        tcg_gen_or_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+        tcg_gen_or_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
 }
 
 static void dec_xor(DisasContext *dc)
@@ -407,43 +432,54 @@ static void dec_xor(DisasContext *dc)
 
     LOG_DIS("xor r%d\n", dc->rd);
     if (dc->rd)
-        tcg_gen_xor_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+        tcg_gen_xor_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
 }
 
-static inline void msr_read(DisasContext *dc, TCGv d)
+static inline void msr_read(DisasContext *dc, TCGv_i32 d)
 {
-    tcg_gen_mov_tl(d, cpu_SR[SR_MSR]);
+    tcg_gen_extrl_i64_i32(d, cpu_SR[SR_MSR]);
 }
 
-static inline void msr_write(DisasContext *dc, TCGv v)
+static inline void msr_write(DisasContext *dc, TCGv_i32 v)
 {
-    TCGv t;
+    TCGv_i64 t;
 
-    t = tcg_temp_new();
+    t = tcg_temp_new_i64();
     dc->cpustate_changed = 1;
     /* PVR bit is not writable.  */
-    tcg_gen_andi_tl(t, v, ~MSR_PVR);
-    tcg_gen_andi_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR], MSR_PVR);
-    tcg_gen_or_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t);
-    tcg_temp_free(t);
+    tcg_gen_extu_i32_i64(t, v);
+    tcg_gen_andi_i64(t, t, ~MSR_PVR);
+    tcg_gen_andi_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], MSR_PVR);
+    tcg_gen_or_i64(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t);
+    tcg_temp_free_i64(t);
 }
 
 static void dec_msr(DisasContext *dc)
 {
     CPUState *cs = CPU(dc->cpu);
-    TCGv t0, t1;
-    unsigned int sr, to, rn;
-    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
+    TCGv_i32 t0, t1;
+    unsigned int sr, rn;
+    bool to, clrset, extended = false;
 
-    sr = dc->imm & ((1 << 14) - 1);
-    to = dc->imm & (1 << 14);
+    sr = extract32(dc->imm, 0, 14);
+    to = extract32(dc->imm, 14, 1);
+    clrset = extract32(dc->imm, 15, 1) == 0;
     dc->type_b = 1;
-    if (to)
+    if (to) {
         dc->cpustate_changed = 1;
+    }
+
+    /* Extended MSRs are only available if addr_size > 32.  */
+    if (dc->cpu->cfg.addr_size > 32) {
+        /* The E-bit is encoded differently for To/From MSR.  */
+        static const unsigned int e_bit[] = { 19, 24 };
+
+        extended = extract32(dc->imm, e_bit[to], 1);
+    }
 
     /* msrclr and msrset.  */
-    if (!(dc->imm & (1 << 15))) {
-        unsigned int clr = dc->ir & (1 << 16);
+    if (clrset) {
+        bool clr = extract32(dc->ir, 16, 1);
 
         LOG_DIS("msr%s r%d imm=%x\n", clr ? "clr" : "set",
                 dc->rd, dc->imm);
@@ -453,52 +489,51 @@ static void dec_msr(DisasContext *dc)
             return;
         }
 
-        if ((dc->tb_flags & MSR_EE_FLAG)
-            && mem_index == MMU_USER_IDX && (dc->imm != 4 && dc->imm != 0)) {
-            tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
-            t_gen_raise_exception(dc, EXCP_HW_EXCP);
+        if (trap_userspace(dc, dc->imm != 4 && dc->imm != 0)) {
             return;
         }
 
         if (dc->rd)
             msr_read(dc, cpu_R[dc->rd]);
 
-        t0 = tcg_temp_new();
-        t1 = tcg_temp_new();
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
         msr_read(dc, t0);
-        tcg_gen_mov_tl(t1, *(dec_alu_op_b(dc)));
+        tcg_gen_mov_i32(t1, *(dec_alu_op_b(dc)));
 
         if (clr) {
-            tcg_gen_not_tl(t1, t1);
-            tcg_gen_and_tl(t0, t0, t1);
+            tcg_gen_not_i32(t1, t1);
+            tcg_gen_and_i32(t0, t0, t1);
         } else
-            tcg_gen_or_tl(t0, t0, t1);
+            tcg_gen_or_i32(t0, t0, t1);
         msr_write(dc, t0);
-        tcg_temp_free(t0);
-        tcg_temp_free(t1);
-	tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc + 4);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+        tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc + 4);
         dc->is_jmp = DISAS_UPDATE;
         return;
     }
 
-    if (to) {
-        if ((dc->tb_flags & MSR_EE_FLAG)
-             && mem_index == MMU_USER_IDX) {
-            tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
-            t_gen_raise_exception(dc, EXCP_HW_EXCP);
-            return;
-        }
+    if (trap_userspace(dc, to)) {
+        return;
     }
 
 #if !defined(CONFIG_USER_ONLY)
     /* Catch read/writes to the mmu block.  */
     if ((sr & ~0xff) == 0x1000) {
+        TCGv_i32 tmp_ext = tcg_const_i32(extended);
+        TCGv_i32 tmp_sr;
+
         sr &= 7;
+        tmp_sr = tcg_const_i32(sr);
         LOG_DIS("m%ss sr%d r%d imm=%x\n", to ? "t" : "f", sr, dc->ra, dc->imm);
-        if (to)
-            gen_helper_mmu_write(cpu_env, tcg_const_tl(sr), cpu_R[dc->ra]);
-        else
-            gen_helper_mmu_read(cpu_R[dc->rd], cpu_env, tcg_const_tl(sr));
+        if (to) {
+            gen_helper_mmu_write(cpu_env, tmp_ext, tmp_sr, cpu_R[dc->ra]);
+        } else {
+            gen_helper_mmu_read(cpu_R[dc->rd], cpu_env, tmp_ext, tmp_sr);
+        }
+        tcg_temp_free_i32(tmp_sr);
+        tcg_temp_free_i32(tmp_ext);
         return;
     }
 #endif
@@ -511,20 +546,18 @@ static void dec_msr(DisasContext *dc)
             case 1:
                 msr_write(dc, cpu_R[dc->ra]);
                 break;
-            case 0x3:
-                tcg_gen_mov_tl(cpu_SR[SR_EAR], cpu_R[dc->ra]);
-                break;
-            case 0x5:
-                tcg_gen_mov_tl(cpu_SR[SR_ESR], cpu_R[dc->ra]);
-                break;
-            case 0x7:
-                tcg_gen_andi_tl(cpu_SR[SR_FSR], cpu_R[dc->ra], 31);
+            case SR_EAR:
+            case SR_ESR:
+            case SR_FSR:
+                tcg_gen_extu_i32_i64(cpu_SR[sr], cpu_R[dc->ra]);
                 break;
             case 0x800:
-                tcg_gen_st_tl(cpu_R[dc->ra], cpu_env, offsetof(CPUMBState, slr));
+                tcg_gen_st_i32(cpu_R[dc->ra],
+                               cpu_env, offsetof(CPUMBState, slr));
                 break;
             case 0x802:
-                tcg_gen_st_tl(cpu_R[dc->ra], cpu_env, offsetof(CPUMBState, shr));
+                tcg_gen_st_i32(cpu_R[dc->ra],
+                               cpu_env, offsetof(CPUMBState, shr));
                 break;
             default:
                 cpu_abort(CPU(dc->cpu), "unknown mts reg %x\n", sr);
@@ -535,44 +568,32 @@ static void dec_msr(DisasContext *dc)
 
         switch (sr) {
             case 0:
-                tcg_gen_movi_tl(cpu_R[dc->rd], dc->pc);
+                tcg_gen_movi_i32(cpu_R[dc->rd], dc->pc);
                 break;
             case 1:
                 msr_read(dc, cpu_R[dc->rd]);
                 break;
-            case 0x3:
-                tcg_gen_mov_tl(cpu_R[dc->rd], cpu_SR[SR_EAR]);
-                break;
-            case 0x5:
-                tcg_gen_mov_tl(cpu_R[dc->rd], cpu_SR[SR_ESR]);
-                break;
-             case 0x7:
-                tcg_gen_mov_tl(cpu_R[dc->rd], cpu_SR[SR_FSR]);
-                break;
-            case 0xb:
-                tcg_gen_mov_tl(cpu_R[dc->rd], cpu_SR[SR_BTR]);
+            case SR_EAR:
+                if (extended) {
+                    tcg_gen_extrh_i64_i32(cpu_R[dc->rd], cpu_SR[sr]);
+                    break;
+                }
+            case SR_ESR:
+            case SR_FSR:
+            case SR_BTR:
+                tcg_gen_extrl_i64_i32(cpu_R[dc->rd], cpu_SR[sr]);
                 break;
             case 0x800:
-                tcg_gen_ld_tl(cpu_R[dc->rd], cpu_env, offsetof(CPUMBState, slr));
+                tcg_gen_ld_i32(cpu_R[dc->rd],
+                               cpu_env, offsetof(CPUMBState, slr));
                 break;
             case 0x802:
-                tcg_gen_ld_tl(cpu_R[dc->rd], cpu_env, offsetof(CPUMBState, shr));
+                tcg_gen_ld_i32(cpu_R[dc->rd],
+                               cpu_env, offsetof(CPUMBState, shr));
                 break;
-            case 0x2000:
-            case 0x2001:
-            case 0x2002:
-            case 0x2003:
-            case 0x2004:
-            case 0x2005:
-            case 0x2006:
-            case 0x2007:
-            case 0x2008:
-            case 0x2009:
-            case 0x200a:
-            case 0x200b:
-            case 0x200c:
+            case 0x2000 ... 0x200c:
                 rn = sr & 0xf;
-                tcg_gen_ld_tl(cpu_R[dc->rd],
+                tcg_gen_ld_i32(cpu_R[dc->rd],
                               cpu_env, offsetof(CPUMBState, pvr.regs[rn]));
                 break;
             default:
@@ -582,21 +603,17 @@ static void dec_msr(DisasContext *dc)
     }
 
     if (dc->rd == 0) {
-        tcg_gen_movi_tl(cpu_R[0], 0);
+        tcg_gen_movi_i32(cpu_R[0], 0);
     }
 }
 
 /* Multiplier unit.  */
 static void dec_mul(DisasContext *dc)
 {
-    TCGv tmp;
+    TCGv_i32 tmp;
     unsigned int subcode;
 
-    if ((dc->tb_flags & MSR_EE_FLAG)
-         && (dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)
-         && !dc->cpu->cfg.use_hw_mul) {
-        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
-        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    if (trap_illegal(dc, !dc->cpu->cfg.use_hw_mul)) {
         return;
     }
 
@@ -604,7 +621,7 @@ static void dec_mul(DisasContext *dc)
 
     if (dc->type_b) {
         LOG_DIS("muli r%d r%d %x\n", dc->rd, dc->ra, dc->imm);
-        tcg_gen_mul_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+        tcg_gen_mul_i32(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
         return;
     }
 
@@ -613,29 +630,31 @@ static void dec_mul(DisasContext *dc)
         /* nop??? */
     }
 
-    tmp = tcg_temp_new();
+    tmp = tcg_temp_new_i32();
     switch (subcode) {
         case 0:
             LOG_DIS("mul r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            tcg_gen_mul_tl(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_mul_i32(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         case 1:
             LOG_DIS("mulh r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            tcg_gen_muls2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_muls2_i32(tmp, cpu_R[dc->rd],
+                              cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         case 2:
             LOG_DIS("mulhsu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            tcg_gen_mulsu2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_mulsu2_i32(tmp, cpu_R[dc->rd],
+                               cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         case 3:
             LOG_DIS("mulhu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            tcg_gen_mulu2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_mulu2_i32(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         default:
             cpu_abort(CPU(dc->cpu), "unknown MUL insn %x\n", subcode);
             break;
     }
-    tcg_temp_free(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 /* Div unit.  */
@@ -646,10 +665,8 @@ static void dec_div(DisasContext *dc)
     u = dc->imm & 2; 
     LOG_DIS("div\n");
 
-    if ((dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)
-          && !dc->cpu->cfg.use_div) {
-        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
-        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    if (trap_illegal(dc, !dc->cpu->cfg.use_div)) {
+        return;
     }
 
     if (u)
@@ -659,20 +676,16 @@ static void dec_div(DisasContext *dc)
         gen_helper_divs(cpu_R[dc->rd], cpu_env, *(dec_alu_op_b(dc)),
                         cpu_R[dc->ra]);
     if (!dc->rd)
-        tcg_gen_movi_tl(cpu_R[dc->rd], 0);
+        tcg_gen_movi_i32(cpu_R[dc->rd], 0);
 }
 
 static void dec_barrel(DisasContext *dc)
 {
-    TCGv t0;
+    TCGv_i32 t0;
     unsigned int imm_w, imm_s;
     bool s, t, e = false, i = false;
 
-    if ((dc->tb_flags & MSR_EE_FLAG)
-          && (dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)
-          && !dc->cpu->cfg.use_barrel) {
-        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
-        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    if (trap_illegal(dc, !dc->cpu->cfg.use_barrel)) {
         return;
     }
 
@@ -710,45 +723,45 @@ static void dec_barrel(DisasContext *dc)
                                 imm_s, width);
         }
     } else {
-        t0 = tcg_temp_new();
+        t0 = tcg_temp_new_i32();
 
-        tcg_gen_mov_tl(t0, *(dec_alu_op_b(dc)));
-        tcg_gen_andi_tl(t0, t0, 31);
+        tcg_gen_mov_i32(t0, *(dec_alu_op_b(dc)));
+        tcg_gen_andi_i32(t0, t0, 31);
 
         if (s) {
-            tcg_gen_shl_tl(cpu_R[dc->rd], cpu_R[dc->ra], t0);
+            tcg_gen_shl_i32(cpu_R[dc->rd], cpu_R[dc->ra], t0);
         } else {
             if (t) {
-                tcg_gen_sar_tl(cpu_R[dc->rd], cpu_R[dc->ra], t0);
+                tcg_gen_sar_i32(cpu_R[dc->rd], cpu_R[dc->ra], t0);
             } else {
-                tcg_gen_shr_tl(cpu_R[dc->rd], cpu_R[dc->ra], t0);
+                tcg_gen_shr_i32(cpu_R[dc->rd], cpu_R[dc->ra], t0);
             }
         }
-        tcg_temp_free(t0);
+        tcg_temp_free_i32(t0);
     }
 }
 
 static void dec_bit(DisasContext *dc)
 {
     CPUState *cs = CPU(dc->cpu);
-    TCGv t0;
+    TCGv_i32 t0;
     unsigned int op;
-    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
 
     op = dc->ir & ((1 << 9) - 1);
     switch (op) {
         case 0x21:
             /* src.  */
-            t0 = tcg_temp_new();
+            t0 = tcg_temp_new_i32();
 
             LOG_DIS("src r%d r%d\n", dc->rd, dc->ra);
-            tcg_gen_andi_tl(t0, cpu_SR[SR_MSR], MSR_CC);
+            tcg_gen_extrl_i64_i32(t0, cpu_SR[SR_MSR]);
+            tcg_gen_andi_i32(t0, t0, MSR_CC);
             write_carry(dc, cpu_R[dc->ra]);
             if (dc->rd) {
-                tcg_gen_shri_tl(cpu_R[dc->rd], cpu_R[dc->ra], 1);
-                tcg_gen_or_tl(cpu_R[dc->rd], cpu_R[dc->rd], t0);
+                tcg_gen_shri_i32(cpu_R[dc->rd], cpu_R[dc->ra], 1);
+                tcg_gen_or_i32(cpu_R[dc->rd], cpu_R[dc->rd], t0);
             }
-            tcg_temp_free(t0);
+            tcg_temp_free_i32(t0);
             break;
 
         case 0x1:
@@ -760,9 +773,9 @@ static void dec_bit(DisasContext *dc)
             write_carry(dc, cpu_R[dc->ra]);
             if (dc->rd) {
                 if (op == 0x41)
-                    tcg_gen_shri_tl(cpu_R[dc->rd], cpu_R[dc->ra], 1);
+                    tcg_gen_shri_i32(cpu_R[dc->rd], cpu_R[dc->ra], 1);
                 else
-                    tcg_gen_sari_tl(cpu_R[dc->rd], cpu_R[dc->ra], 1);
+                    tcg_gen_sari_i32(cpu_R[dc->rd], cpu_R[dc->ra], 1);
             }
             break;
         case 0x60:
@@ -779,29 +792,16 @@ static void dec_bit(DisasContext *dc)
         case 0x76:
             /* wdc.  */
             LOG_DIS("wdc r%d\n", dc->ra);
-            if ((dc->tb_flags & MSR_EE_FLAG)
-                 && mem_index == MMU_USER_IDX) {
-                tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
-                t_gen_raise_exception(dc, EXCP_HW_EXCP);
-                return;
-            }
+            trap_userspace(dc, true);
             break;
         case 0x68:
             /* wic.  */
             LOG_DIS("wic r%d\n", dc->ra);
-            if ((dc->tb_flags & MSR_EE_FLAG)
-                 && mem_index == MMU_USER_IDX) {
-                tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
-                t_gen_raise_exception(dc, EXCP_HW_EXCP);
-                return;
-            }
+            trap_userspace(dc, true);
             break;
         case 0xe0:
-            if ((dc->tb_flags & MSR_EE_FLAG)
-                && (dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)
-                && !dc->cpu->cfg.use_pcmp_instr) {
-                tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
-                t_gen_raise_exception(dc, EXCP_HW_EXCP);
+            if (trap_illegal(dc, !dc->cpu->cfg.use_pcmp_instr)) {
+                return;
             }
             if (dc->cpu->cfg.use_pcmp_instr) {
                 tcg_gen_clzi_i32(cpu_R[dc->rd], cpu_R[dc->ra], 32);
@@ -828,101 +828,128 @@ static inline void sync_jmpstate(DisasContext *dc)
 {
     if (dc->jmp == JMP_DIRECT || dc->jmp == JMP_DIRECT_CC) {
         if (dc->jmp == JMP_DIRECT) {
-            tcg_gen_movi_tl(env_btaken, 1);
+            tcg_gen_movi_i32(env_btaken, 1);
         }
         dc->jmp = JMP_INDIRECT;
-        tcg_gen_movi_tl(env_btarget, dc->jmp_pc);
+        tcg_gen_movi_i64(env_btarget, dc->jmp_pc);
     }
 }
 
 static void dec_imm(DisasContext *dc)
 {
     LOG_DIS("imm %x\n", dc->imm << 16);
-    tcg_gen_movi_tl(env_imm, (dc->imm << 16));
+    tcg_gen_movi_i32(env_imm, (dc->imm << 16));
     dc->tb_flags |= IMM_FLAG;
     dc->clear_imm = 0;
 }
 
-static inline TCGv *compute_ldst_addr(DisasContext *dc, TCGv *t)
+static inline void compute_ldst_addr(DisasContext *dc, bool ea, TCGv t)
 {
-    unsigned int extimm = dc->tb_flags & IMM_FLAG;
-    /* Should be set to one if r1 is used by loadstores.  */
-    int stackprot = 0;
+    bool extimm = dc->tb_flags & IMM_FLAG;
+    /* Should be set to true if r1 is used by loadstores.  */
+    bool stackprot = false;
+    TCGv_i32 t32;
 
     /* All load/stores use ra.  */
     if (dc->ra == 1 && dc->cpu->cfg.stackprot) {
-        stackprot = 1;
+        stackprot = true;
     }
 
     /* Treat the common cases first.  */
     if (!dc->type_b) {
-        /* If any of the regs is r0, return a ptr to the other.  */
+        if (ea) {
+            int addr_size = dc->cpu->cfg.addr_size;
+
+            if (addr_size == 32) {
+                tcg_gen_extu_i32_tl(t, cpu_R[dc->rb]);
+                return;
+            }
+
+            tcg_gen_concat_i32_i64(t, cpu_R[dc->rb], cpu_R[dc->ra]);
+            if (addr_size < 64) {
+                /* Mask off out of range bits.  */
+                tcg_gen_andi_i64(t, t, MAKE_64BIT_MASK(0, addr_size));
+            }
+            return;
+        }
+
+        /* If any of the regs is r0, set t to the value of the other reg.  */
         if (dc->ra == 0) {
-            return &cpu_R[dc->rb];
+            tcg_gen_extu_i32_tl(t, cpu_R[dc->rb]);
+            return;
         } else if (dc->rb == 0) {
-            return &cpu_R[dc->ra];
+            tcg_gen_extu_i32_tl(t, cpu_R[dc->ra]);
+            return;
         }
 
         if (dc->rb == 1 && dc->cpu->cfg.stackprot) {
-            stackprot = 1;
+            stackprot = true;
         }
 
-        *t = tcg_temp_new();
-        tcg_gen_add_tl(*t, cpu_R[dc->ra], cpu_R[dc->rb]);
+        t32 = tcg_temp_new_i32();
+        tcg_gen_add_i32(t32, cpu_R[dc->ra], cpu_R[dc->rb]);
+        tcg_gen_extu_i32_tl(t, t32);
+        tcg_temp_free_i32(t32);
 
         if (stackprot) {
-            gen_helper_stackprot(cpu_env, *t);
+            gen_helper_stackprot(cpu_env, t);
         }
-        return t;
+        return;
     }
     /* Immediate.  */
+    t32 = tcg_temp_new_i32();
     if (!extimm) {
-        if (dc->imm == 0) {
-            return &cpu_R[dc->ra];
-        }
-        *t = tcg_temp_new();
-        tcg_gen_movi_tl(*t, (int32_t)((int16_t)dc->imm));
-        tcg_gen_add_tl(*t, cpu_R[dc->ra], *t);
+        tcg_gen_addi_i32(t32, cpu_R[dc->ra], (int16_t)dc->imm);
     } else {
-        *t = tcg_temp_new();
-        tcg_gen_add_tl(*t, cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+        tcg_gen_add_i32(t32, cpu_R[dc->ra], *(dec_alu_op_b(dc)));
     }
+    tcg_gen_extu_i32_tl(t, t32);
+    tcg_temp_free_i32(t32);
 
     if (stackprot) {
-        gen_helper_stackprot(cpu_env, *t);
+        gen_helper_stackprot(cpu_env, t);
     }
-    return t;
+    return;
 }
 
 static void dec_load(DisasContext *dc)
 {
-    TCGv t, v, *addr;
-    unsigned int size, rev = 0, ex = 0;
+    TCGv_i32 v;
+    TCGv addr;
+    unsigned int size;
+    bool rev = false, ex = false, ea = false;
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
     TCGMemOp mop;
 
     mop = dc->opcode & 3;
     size = 1 << mop;
     if (!dc->type_b) {
-        rev = (dc->ir >> 9) & 1;
-        ex = (dc->ir >> 10) & 1;
+        ea = extract32(dc->ir, 7, 1);
+        rev = extract32(dc->ir, 9, 1);
+        ex = extract32(dc->ir, 10, 1);
     }
     mop |= MO_TE;
     if (rev) {
         mop ^= MO_BSWAP;
     }
 
-    if (size > 4 && (dc->tb_flags & MSR_EE_FLAG)
-          && (dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)) {
-        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
-        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    if (trap_illegal(dc, size > 4)) {
         return;
     }
 
-    LOG_DIS("l%d%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
-                                                        ex ? "x" : "");
+    if (trap_userspace(dc, ea)) {
+        return;
+    }
+
+    LOG_DIS("l%d%s%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
+                                                        ex ? "x" : "",
+                                                        ea ? "ea" : "");
 
     t_sync_flags(dc);
-    addr = compute_ldst_addr(dc, &t);
+    addr = tcg_temp_new();
+    compute_ldst_addr(dc, ea, addr);
+    /* Extended addressing bypasses the MMU.  */
+    mem_index = ea ? MMU_NOMMU_IDX : mem_index;
 
     /*
      * When doing reverse accesses we need to do two things.
@@ -941,17 +968,10 @@ static void dec_load(DisasContext *dc)
                    11 -> 00 */
                 TCGv low = tcg_temp_new();
 
-                /* Force addr into the temp.  */
-                if (addr != &t) {
-                    t = tcg_temp_new();
-                    tcg_gen_mov_tl(t, *addr);
-                    addr = &t;
-                }
-
-                tcg_gen_andi_tl(low, t, 3);
+                tcg_gen_andi_tl(low, addr, 3);
                 tcg_gen_sub_tl(low, tcg_const_tl(3), low);
-                tcg_gen_andi_tl(t, t, ~3);
-                tcg_gen_or_tl(t, t, low);
+                tcg_gen_andi_tl(addr, addr, ~3);
+                tcg_gen_or_tl(addr, addr, low);
                 tcg_temp_free(low);
                 break;
             }
@@ -959,14 +979,7 @@ static void dec_load(DisasContext *dc)
             case 2:
                 /* 00 -> 10
                    10 -> 00.  */
-                /* Force addr into the temp.  */
-                if (addr != &t) {
-                    t = tcg_temp_new();
-                    tcg_gen_xori_tl(t, *addr, 2);
-                    addr = &t;
-                } else {
-                    tcg_gen_xori_tl(t, t, 2);
-                }
+                tcg_gen_xori_tl(addr, addr, 2);
                 break;
             default:
                 cpu_abort(CPU(dc->cpu), "Invalid reverse size\n");
@@ -976,13 +989,7 @@ static void dec_load(DisasContext *dc)
 
     /* lwx does not throw unaligned access errors, so force alignment */
     if (ex) {
-        /* Force addr into the temp.  */
-        if (addr != &t) {
-            t = tcg_temp_new();
-            tcg_gen_mov_tl(t, *addr);
-            addr = &t;
-        }
-        tcg_gen_andi_tl(t, t, ~3);
+        tcg_gen_andi_tl(addr, addr, ~3);
     }
 
     /* If we get a fault on a dslot, the jmpstate better be in sync.  */
@@ -995,90 +1002,92 @@ static void dec_load(DisasContext *dc)
      * into v. If the load succeeds, we verify alignment of the
      * address and if that succeeds we write into the destination reg.
      */
-    v = tcg_temp_new();
-    tcg_gen_qemu_ld_tl(v, *addr, cpu_mmu_index(&dc->cpu->env, false), mop);
+    v = tcg_temp_new_i32();
+    tcg_gen_qemu_ld_i32(v, addr, mem_index, mop);
 
     if ((dc->cpu->env.pvr.regs[2] & PVR2_UNALIGNED_EXC_MASK) && size > 1) {
-        tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
-        gen_helper_memalign(cpu_env, *addr, tcg_const_tl(dc->rd),
-                            tcg_const_tl(0), tcg_const_tl(size - 1));
+        tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc);
+        gen_helper_memalign(cpu_env, addr, tcg_const_i32(dc->rd),
+                            tcg_const_i32(0), tcg_const_i32(size - 1));
     }
 
     if (ex) {
-        tcg_gen_mov_tl(env_res_addr, *addr);
-        tcg_gen_mov_tl(env_res_val, v);
+        tcg_gen_mov_tl(env_res_addr, addr);
+        tcg_gen_mov_i32(env_res_val, v);
     }
     if (dc->rd) {
-        tcg_gen_mov_tl(cpu_R[dc->rd], v);
+        tcg_gen_mov_i32(cpu_R[dc->rd], v);
     }
-    tcg_temp_free(v);
+    tcg_temp_free_i32(v);
 
     if (ex) { /* lwx */
         /* no support for AXI exclusive so always clear C */
         write_carryi(dc, 0);
     }
 
-    if (addr == &t)
-        tcg_temp_free(t);
+    tcg_temp_free(addr);
 }
 
 static void dec_store(DisasContext *dc)
 {
-    TCGv t, *addr, swx_addr;
+    TCGv addr;
     TCGLabel *swx_skip = NULL;
-    unsigned int size, rev = 0, ex = 0;
+    unsigned int size;
+    bool rev = false, ex = false, ea = false;
+    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
     TCGMemOp mop;
 
     mop = dc->opcode & 3;
     size = 1 << mop;
     if (!dc->type_b) {
-        rev = (dc->ir >> 9) & 1;
-        ex = (dc->ir >> 10) & 1;
+        ea = extract32(dc->ir, 7, 1);
+        rev = extract32(dc->ir, 9, 1);
+        ex = extract32(dc->ir, 10, 1);
     }
     mop |= MO_TE;
     if (rev) {
         mop ^= MO_BSWAP;
     }
 
-    if (size > 4 && (dc->tb_flags & MSR_EE_FLAG)
-          && (dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)) {
-        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
-        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    if (trap_illegal(dc, size > 4)) {
         return;
     }
 
-    LOG_DIS("s%d%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
-                                                        ex ? "x" : "");
+    trap_userspace(dc, ea);
+
+    LOG_DIS("s%d%s%s%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "",
+                                                        ex ? "x" : "",
+                                                        ea ? "ea" : "");
     t_sync_flags(dc);
     /* If we get a fault on a dslot, the jmpstate better be in sync.  */
     sync_jmpstate(dc);
-    addr = compute_ldst_addr(dc, &t);
+    /* SWX needs a temp_local.  */
+    addr = ex ? tcg_temp_local_new() : tcg_temp_new();
+    compute_ldst_addr(dc, ea, addr);
+    /* Extended addressing bypasses the MMU.  */
+    mem_index = ea ? MMU_NOMMU_IDX : mem_index;
 
-    swx_addr = tcg_temp_local_new();
     if (ex) { /* swx */
-        TCGv tval;
+        TCGv_i32 tval;
 
-        /* Force addr into the swx_addr. */
-        tcg_gen_mov_tl(swx_addr, *addr);
-        addr = &swx_addr;
         /* swx does not throw unaligned access errors, so force alignment */
-        tcg_gen_andi_tl(swx_addr, swx_addr, ~3);
+        tcg_gen_andi_tl(addr, addr, ~3);
 
         write_carryi(dc, 1);
         swx_skip = gen_new_label();
-        tcg_gen_brcond_tl(TCG_COND_NE, env_res_addr, swx_addr, swx_skip);
+        tcg_gen_brcond_tl(TCG_COND_NE, env_res_addr, addr, swx_skip);
 
         /* Compare the value loaded at lwx with current contents of
            the reserved location.
            FIXME: This only works for system emulation where we can expect
            this compare and the following write to be atomic. For user
            emulation we need to add atomicity between threads.  */
-        tval = tcg_temp_new();
-        tcg_gen_qemu_ld_tl(tval, swx_addr, cpu_mmu_index(&dc->cpu->env, false),
-                           MO_TEUL);
-        tcg_gen_brcond_tl(TCG_COND_NE, env_res_val, tval, swx_skip);
+        tval = tcg_temp_new_i32();
+        tcg_gen_qemu_ld_i32(tval, addr, cpu_mmu_index(&dc->cpu->env, false),
+                            MO_TEUL);
+        tcg_gen_brcond_i32(TCG_COND_NE, env_res_val, tval, swx_skip);
         write_carryi(dc, 0);
-        tcg_temp_free(tval);
+        tcg_temp_free_i32(tval);
     }
 
     if (rev && size != 4) {
@@ -1092,17 +1101,10 @@ static void dec_store(DisasContext *dc)
                    11 -> 00 */
                 TCGv low = tcg_temp_new();
 
-                /* Force addr into the temp.  */
-                if (addr != &t) {
-                    t = tcg_temp_new();
-                    tcg_gen_mov_tl(t, *addr);
-                    addr = &t;
-                }
-
-                tcg_gen_andi_tl(low, t, 3);
+                tcg_gen_andi_tl(low, addr, 3);
                 tcg_gen_sub_tl(low, tcg_const_tl(3), low);
-                tcg_gen_andi_tl(t, t, ~3);
-                tcg_gen_or_tl(t, t, low);
+                tcg_gen_andi_tl(addr, addr, ~3);
+                tcg_gen_or_tl(addr, addr, low);
                 tcg_temp_free(low);
                 break;
             }
@@ -1111,79 +1113,74 @@ static void dec_store(DisasContext *dc)
                 /* 00 -> 10
                    10 -> 00.  */
                 /* Force addr into the temp.  */
-                if (addr != &t) {
-                    t = tcg_temp_new();
-                    tcg_gen_xori_tl(t, *addr, 2);
-                    addr = &t;
-                } else {
-                    tcg_gen_xori_tl(t, t, 2);
-                }
+                tcg_gen_xori_tl(addr, addr, 2);
                 break;
             default:
                 cpu_abort(CPU(dc->cpu), "Invalid reverse size\n");
                 break;
         }
     }
-    tcg_gen_qemu_st_tl(cpu_R[dc->rd], *addr, cpu_mmu_index(&dc->cpu->env, false), mop);
+    tcg_gen_qemu_st_i32(cpu_R[dc->rd], addr, mem_index, mop);
 
     /* Verify alignment if needed.  */
     if ((dc->cpu->env.pvr.regs[2] & PVR2_UNALIGNED_EXC_MASK) && size > 1) {
-        tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
+        tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc);
         /* FIXME: if the alignment is wrong, we should restore the value
          *        in memory. One possible way to achieve this is to probe
          *        the MMU prior to the memaccess, thay way we could put
          *        the alignment checks in between the probe and the mem
          *        access.
          */
-        gen_helper_memalign(cpu_env, *addr, tcg_const_tl(dc->rd),
-                            tcg_const_tl(1), tcg_const_tl(size - 1));
+        gen_helper_memalign(cpu_env, addr, tcg_const_i32(dc->rd),
+                            tcg_const_i32(1), tcg_const_i32(size - 1));
     }
 
     if (ex) {
         gen_set_label(swx_skip);
     }
-    tcg_temp_free(swx_addr);
 
-    if (addr == &t)
-        tcg_temp_free(t);
+    tcg_temp_free(addr);
 }
 
 static inline void eval_cc(DisasContext *dc, unsigned int cc,
-                           TCGv d, TCGv a, TCGv b)
+                           TCGv_i32 d, TCGv_i32 a)
 {
+    static const int mb_to_tcg_cc[] = {
+        [CC_EQ] = TCG_COND_EQ,
+        [CC_NE] = TCG_COND_NE,
+        [CC_LT] = TCG_COND_LT,
+        [CC_LE] = TCG_COND_LE,
+        [CC_GE] = TCG_COND_GE,
+        [CC_GT] = TCG_COND_GT,
+    };
+
     switch (cc) {
-        case CC_EQ:
-            tcg_gen_setcond_tl(TCG_COND_EQ, d, a, b);
-            break;
-        case CC_NE:
-            tcg_gen_setcond_tl(TCG_COND_NE, d, a, b);
-            break;
-        case CC_LT:
-            tcg_gen_setcond_tl(TCG_COND_LT, d, a, b);
-            break;
-        case CC_LE:
-            tcg_gen_setcond_tl(TCG_COND_LE, d, a, b);
-            break;
-        case CC_GE:
-            tcg_gen_setcond_tl(TCG_COND_GE, d, a, b);
-            break;
-        case CC_GT:
-            tcg_gen_setcond_tl(TCG_COND_GT, d, a, b);
-            break;
-        default:
-            cpu_abort(CPU(dc->cpu), "Unknown condition code %x.\n", cc);
-            break;
+    case CC_EQ:
+    case CC_NE:
+    case CC_LT:
+    case CC_LE:
+    case CC_GE:
+    case CC_GT:
+        tcg_gen_setcondi_i32(mb_to_tcg_cc[cc], d, a, 0);
+        break;
+    default:
+        cpu_abort(CPU(dc->cpu), "Unknown condition code %x.\n", cc);
+        break;
     }
 }
 
-static void eval_cond_jmp(DisasContext *dc, TCGv pc_true, TCGv pc_false)
+static void eval_cond_jmp(DisasContext *dc, TCGv_i64 pc_true, TCGv_i64 pc_false)
 {
-    TCGLabel *l1 = gen_new_label();
-    /* Conditional jmp.  */
-    tcg_gen_mov_tl(cpu_SR[SR_PC], pc_false);
-    tcg_gen_brcondi_tl(TCG_COND_EQ, env_btaken, 0, l1);
-    tcg_gen_mov_tl(cpu_SR[SR_PC], pc_true);
-    gen_set_label(l1);
+    TCGv_i64 tmp_btaken = tcg_temp_new_i64();
+    TCGv_i64 tmp_zero = tcg_const_i64(0);
+
+    tcg_gen_extu_i32_i64(tmp_btaken, env_btaken);
+    tcg_gen_movcond_i64(TCG_COND_NE, cpu_SR[SR_PC],
+                        tmp_btaken, tmp_zero,
+                        pc_true, pc_false);
+
+    tcg_temp_free_i64(tmp_btaken);
+    tcg_temp_free_i64(tmp_zero);
 }
 
 static void dec_bcc(DisasContext *dc)
@@ -1199,28 +1196,28 @@ static void dec_bcc(DisasContext *dc)
     if (dslot) {
         dc->delayed_branch = 2;
         dc->tb_flags |= D_FLAG;
-        tcg_gen_st_tl(tcg_const_tl(dc->type_b && (dc->tb_flags & IMM_FLAG)),
+        tcg_gen_st_i32(tcg_const_i32(dc->type_b && (dc->tb_flags & IMM_FLAG)),
                       cpu_env, offsetof(CPUMBState, bimm));
     }
 
     if (dec_alu_op_b_is_small_imm(dc)) {
         int32_t offset = (int32_t)((int16_t)dc->imm); /* sign-extend.  */
 
-        tcg_gen_movi_tl(env_btarget, dc->pc + offset);
+        tcg_gen_movi_i64(env_btarget, dc->pc + offset);
         dc->jmp = JMP_DIRECT_CC;
         dc->jmp_pc = dc->pc + offset;
     } else {
         dc->jmp = JMP_INDIRECT;
-        tcg_gen_movi_tl(env_btarget, dc->pc);
-        tcg_gen_add_tl(env_btarget, env_btarget, *(dec_alu_op_b(dc)));
+        tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc)));
+        tcg_gen_addi_i64(env_btarget, env_btarget, dc->pc);
+        tcg_gen_andi_i64(env_btarget, env_btarget, UINT32_MAX);
     }
-    eval_cc(dc, cc, env_btaken, cpu_R[dc->ra], tcg_const_tl(0));
+    eval_cc(dc, cc, env_btaken, cpu_R[dc->ra]);
 }
 
 static void dec_br(DisasContext *dc)
 {
     unsigned int dslot, link, abs, mbar;
-    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
 
     dslot = dc->ir & (1 << 20);
     abs = dc->ir & (1 << 19);
@@ -1240,7 +1237,7 @@ static void dec_br(DisasContext *dc)
             tcg_gen_st_i32(tmp_1, cpu_env,
                            -offsetof(MicroBlazeCPU, env)
                            +offsetof(CPUState, halted));
-            tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc + 4);
+            tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc + 4);
             gen_helper_raise_exception(cpu_env, tmp_hlt);
             tcg_temp_free_i32(tmp_hlt);
             tcg_temp_free_i32(tmp_1);
@@ -1261,23 +1258,21 @@ static void dec_br(DisasContext *dc)
     if (dslot) {
         dc->delayed_branch = 2;
         dc->tb_flags |= D_FLAG;
-        tcg_gen_st_tl(tcg_const_tl(dc->type_b && (dc->tb_flags & IMM_FLAG)),
+        tcg_gen_st_i32(tcg_const_i32(dc->type_b && (dc->tb_flags & IMM_FLAG)),
                       cpu_env, offsetof(CPUMBState, bimm));
     }
     if (link && dc->rd)
-        tcg_gen_movi_tl(cpu_R[dc->rd], dc->pc);
+        tcg_gen_movi_i32(cpu_R[dc->rd], dc->pc);
 
     dc->jmp = JMP_INDIRECT;
     if (abs) {
-        tcg_gen_movi_tl(env_btaken, 1);
-        tcg_gen_mov_tl(env_btarget, *(dec_alu_op_b(dc)));
+        tcg_gen_movi_i32(env_btaken, 1);
+        tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc)));
         if (link && !dslot) {
             if (!(dc->tb_flags & IMM_FLAG) && (dc->imm == 8 || dc->imm == 0x18))
                 t_gen_raise_exception(dc, EXCP_BREAK);
             if (dc->imm == 0) {
-                if ((dc->tb_flags & MSR_EE_FLAG) && mem_index == MMU_USER_IDX) {
-                    tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
-                    t_gen_raise_exception(dc, EXCP_HW_EXCP);
+                if (trap_userspace(dc, true)) {
                     return;
                 }
 
@@ -1289,116 +1284,115 @@ static void dec_br(DisasContext *dc)
             dc->jmp = JMP_DIRECT;
             dc->jmp_pc = dc->pc + (int32_t)((int16_t)dc->imm);
         } else {
-            tcg_gen_movi_tl(env_btaken, 1);
-            tcg_gen_movi_tl(env_btarget, dc->pc);
-            tcg_gen_add_tl(env_btarget, env_btarget, *(dec_alu_op_b(dc)));
+            tcg_gen_movi_i32(env_btaken, 1);
+            tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc)));
+            tcg_gen_addi_i64(env_btarget, env_btarget, dc->pc);
+            tcg_gen_andi_i64(env_btarget, env_btarget, UINT32_MAX);
         }
     }
 }
 
 static inline void do_rti(DisasContext *dc)
 {
-    TCGv t0, t1;
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    tcg_gen_shri_tl(t0, cpu_SR[SR_MSR], 1);
-    tcg_gen_ori_tl(t1, cpu_SR[SR_MSR], MSR_IE);
-    tcg_gen_andi_tl(t0, t0, (MSR_VM | MSR_UM));
-
-    tcg_gen_andi_tl(t1, t1, ~(MSR_VM | MSR_UM));
-    tcg_gen_or_tl(t1, t1, t0);
+    TCGv_i32 t0, t1;
+    t0 = tcg_temp_new_i32();
+    t1 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t1, cpu_SR[SR_MSR]);
+    tcg_gen_shri_i32(t0, t1, 1);
+    tcg_gen_ori_i32(t1, t1, MSR_IE);
+    tcg_gen_andi_i32(t0, t0, (MSR_VM | MSR_UM));
+
+    tcg_gen_andi_i32(t1, t1, ~(MSR_VM | MSR_UM));
+    tcg_gen_or_i32(t1, t1, t0);
     msr_write(dc, t1);
-    tcg_temp_free(t1);
-    tcg_temp_free(t0);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
     dc->tb_flags &= ~DRTI_FLAG;
 }
 
 static inline void do_rtb(DisasContext *dc)
 {
-    TCGv t0, t1;
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    tcg_gen_andi_tl(t1, cpu_SR[SR_MSR], ~MSR_BIP);
-    tcg_gen_shri_tl(t0, t1, 1);
-    tcg_gen_andi_tl(t0, t0, (MSR_VM | MSR_UM));
-
-    tcg_gen_andi_tl(t1, t1, ~(MSR_VM | MSR_UM));
-    tcg_gen_or_tl(t1, t1, t0);
+    TCGv_i32 t0, t1;
+    t0 = tcg_temp_new_i32();
+    t1 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t1, cpu_SR[SR_MSR]);
+    tcg_gen_andi_i32(t1, t1, ~MSR_BIP);
+    tcg_gen_shri_i32(t0, t1, 1);
+    tcg_gen_andi_i32(t0, t0, (MSR_VM | MSR_UM));
+
+    tcg_gen_andi_i32(t1, t1, ~(MSR_VM | MSR_UM));
+    tcg_gen_or_i32(t1, t1, t0);
     msr_write(dc, t1);
-    tcg_temp_free(t1);
-    tcg_temp_free(t0);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
     dc->tb_flags &= ~DRTB_FLAG;
 }
 
 static inline void do_rte(DisasContext *dc)
 {
-    TCGv t0, t1;
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-
-    tcg_gen_ori_tl(t1, cpu_SR[SR_MSR], MSR_EE);
-    tcg_gen_andi_tl(t1, t1, ~MSR_EIP);
-    tcg_gen_shri_tl(t0, t1, 1);
-    tcg_gen_andi_tl(t0, t0, (MSR_VM | MSR_UM));
-
-    tcg_gen_andi_tl(t1, t1, ~(MSR_VM | MSR_UM));
-    tcg_gen_or_tl(t1, t1, t0);
+    TCGv_i32 t0, t1;
+    t0 = tcg_temp_new_i32();
+    t1 = tcg_temp_new_i32();
+
+    tcg_gen_extrl_i64_i32(t1, cpu_SR[SR_MSR]);
+    tcg_gen_ori_i32(t1, t1, MSR_EE);
+    tcg_gen_andi_i32(t1, t1, ~MSR_EIP);
+    tcg_gen_shri_i32(t0, t1, 1);
+    tcg_gen_andi_i32(t0, t0, (MSR_VM | MSR_UM));
+
+    tcg_gen_andi_i32(t1, t1, ~(MSR_VM | MSR_UM));
+    tcg_gen_or_i32(t1, t1, t0);
     msr_write(dc, t1);
-    tcg_temp_free(t1);
-    tcg_temp_free(t0);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
     dc->tb_flags &= ~DRTE_FLAG;
 }
 
 static void dec_rts(DisasContext *dc)
 {
     unsigned int b_bit, i_bit, e_bit;
-    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
+    TCGv_i64 tmp64;
 
     i_bit = dc->ir & (1 << 21);
     b_bit = dc->ir & (1 << 22);
     e_bit = dc->ir & (1 << 23);
 
+    if (trap_userspace(dc, i_bit || b_bit || e_bit)) {
+        return;
+    }
+
     dc->delayed_branch = 2;
     dc->tb_flags |= D_FLAG;
-    tcg_gen_st_tl(tcg_const_tl(dc->type_b && (dc->tb_flags & IMM_FLAG)),
+    tcg_gen_st_i32(tcg_const_i32(dc->type_b && (dc->tb_flags & IMM_FLAG)),
                   cpu_env, offsetof(CPUMBState, bimm));
 
     if (i_bit) {
         LOG_DIS("rtid ir=%x\n", dc->ir);
-        if ((dc->tb_flags & MSR_EE_FLAG)
-             && mem_index == MMU_USER_IDX) {
-            tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
-            t_gen_raise_exception(dc, EXCP_HW_EXCP);
-        }
         dc->tb_flags |= DRTI_FLAG;
     } else if (b_bit) {
         LOG_DIS("rtbd ir=%x\n", dc->ir);
-        if ((dc->tb_flags & MSR_EE_FLAG)
-             && mem_index == MMU_USER_IDX) {
-            tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
-            t_gen_raise_exception(dc, EXCP_HW_EXCP);
-        }
         dc->tb_flags |= DRTB_FLAG;
     } else if (e_bit) {
         LOG_DIS("rted ir=%x\n", dc->ir);
-        if ((dc->tb_flags & MSR_EE_FLAG)
-             && mem_index == MMU_USER_IDX) {
-            tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
-            t_gen_raise_exception(dc, EXCP_HW_EXCP);
-        }
         dc->tb_flags |= DRTE_FLAG;
     } else
         LOG_DIS("rts ir=%x\n", dc->ir);
 
     dc->jmp = JMP_INDIRECT;
-    tcg_gen_movi_tl(env_btaken, 1);
-    tcg_gen_add_tl(env_btarget, cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+    tcg_gen_movi_i32(env_btaken, 1);
+
+    tmp64 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(env_btarget, *(dec_alu_op_b(dc)));
+    tcg_gen_extu_i32_i64(tmp64, cpu_R[dc->ra]);
+    tcg_gen_add_i64(env_btarget, env_btarget, tmp64);
+    tcg_gen_andi_i64(env_btarget, env_btarget, UINT32_MAX);
+    tcg_temp_free_i64(tmp64);
 }
 
 static int dec_check_fpuv2(DisasContext *dc)
 {
     if ((dc->cpu->cfg.use_fpu != 2) && (dc->tb_flags & MSR_EE_FLAG)) {
-        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_FPU);
+        tcg_gen_movi_i64(cpu_SR[SR_ESR], ESR_EC_FPU);
         t_gen_raise_exception(dc, EXCP_HW_EXCP);
     }
     return (dc->cpu->cfg.use_fpu == 2) ? 0 : PVR2_USE_FPU2_MASK;
@@ -1408,11 +1402,7 @@ static void dec_fpu(DisasContext *dc)
 {
     unsigned int fpu_insn;
 
-    if ((dc->tb_flags & MSR_EE_FLAG)
-          && (dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)
-          && !dc->cpu->cfg.use_fpu) {
-        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
-        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    if (trap_illegal(dc, !dc->cpu->cfg.use_fpu)) {
         return;
     }
 
@@ -1511,10 +1501,7 @@ static void dec_fpu(DisasContext *dc)
 
 static void dec_null(DisasContext *dc)
 {
-    if ((dc->tb_flags & MSR_EE_FLAG)
-          && (dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)) {
-        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
-        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    if (trap_illegal(dc, true)) {
         return;
     }
     qemu_log_mask(LOG_GUEST_ERROR, "unknown insn pc=%x opc=%x\n", dc->pc, dc->opcode);
@@ -1524,37 +1511,34 @@ static void dec_null(DisasContext *dc)
 /* Insns connected to FSL or AXI stream attached devices.  */
 static void dec_stream(DisasContext *dc)
 {
-    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
     TCGv_i32 t_id, t_ctrl;
     int ctrl;
 
     LOG_DIS("%s%s imm=%x\n", dc->rd ? "get" : "put",
             dc->type_b ? "" : "d", dc->imm);
 
-    if ((dc->tb_flags & MSR_EE_FLAG) && (mem_index == MMU_USER_IDX)) {
-        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
-        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+    if (trap_userspace(dc, true)) {
         return;
     }
 
-    t_id = tcg_temp_new();
+    t_id = tcg_temp_new_i32();
     if (dc->type_b) {
-        tcg_gen_movi_tl(t_id, dc->imm & 0xf);
+        tcg_gen_movi_i32(t_id, dc->imm & 0xf);
         ctrl = dc->imm >> 10;
     } else {
-        tcg_gen_andi_tl(t_id, cpu_R[dc->rb], 0xf);
+        tcg_gen_andi_i32(t_id, cpu_R[dc->rb], 0xf);
         ctrl = dc->imm >> 5;
     }
 
-    t_ctrl = tcg_const_tl(ctrl);
+    t_ctrl = tcg_const_i32(ctrl);
 
     if (dc->rd == 0) {
         gen_helper_put(t_id, t_ctrl, cpu_R[dc->ra]);
     } else {
         gen_helper_get(cpu_R[dc->rd], t_id, t_ctrl);
     }
-    tcg_temp_free(t_id);
-    tcg_temp_free(t_ctrl);
+    tcg_temp_free_i32(t_id);
+    tcg_temp_free_i32(t_ctrl);
 }
 
 static struct decoder_info {
@@ -1595,13 +1579,7 @@ static inline void decode(DisasContext *dc, uint32_t ir)
     if (dc->ir)
         dc->nr_nops = 0;
     else {
-        if ((dc->tb_flags & MSR_EE_FLAG)
-              && (dc->cpu->env.pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)
-              && (dc->cpu->env.pvr.regs[2] & PVR2_OPCODE_0x0_ILL_MASK)) {
-            tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
-            t_gen_raise_exception(dc, EXCP_HW_EXCP);
-            return;
-        }
+        trap_illegal(dc, dc->cpu->env.pvr.regs[2] & PVR2_OPCODE_0x0_ILL_MASK);
 
         LOG_DIS("nr_nops=%d\t", dc->nr_nops);
         dc->nr_nops++;
@@ -1636,7 +1614,7 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
     struct DisasContext ctx;
     struct DisasContext *dc = &ctx;
     uint32_t page_start, org_flags;
-    target_ulong npc;
+    uint32_t npc;
     int num_insns;
     int max_insns;
 
@@ -1679,7 +1657,7 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
 
 #if SIM_COMPAT
         if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
-            tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
+            tcg_gen_movi_i64(cpu_SR[SR_PC], dc->pc);
             gen_helper_debug();
         }
 #endif
@@ -1721,7 +1699,7 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
                 dc->tb_flags &= ~D_FLAG;
                 /* If it is a direct jump, try direct chaining.  */
                 if (dc->jmp == JMP_INDIRECT) {
-                    eval_cond_jmp(dc, env_btarget, tcg_const_tl(dc->pc));
+                    eval_cond_jmp(dc, env_btarget, tcg_const_i64(dc->pc));
                     dc->is_jmp = DISAS_JUMP;
                 } else if (dc->jmp == JMP_DIRECT) {
                     t_sync_flags(dc);
@@ -1731,7 +1709,7 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
                     TCGLabel *l1 = gen_new_label();
                     t_sync_flags(dc);
                     /* Conditional jmp.  */
-                    tcg_gen_brcondi_tl(TCG_COND_NE, env_btaken, 0, l1);
+                    tcg_gen_brcondi_i32(TCG_COND_NE, env_btaken, 0, l1);
                     gen_goto_tb(dc, 1, dc->pc);
                     gen_set_label(l1);
                     gen_goto_tb(dc, 0, dc->jmp_pc);
@@ -1754,7 +1732,7 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
     if (dc->jmp == JMP_DIRECT || dc->jmp == JMP_DIRECT_CC) {
         if (dc->tb_flags & D_FLAG) {
             dc->is_jmp = DISAS_UPDATE;
-            tcg_gen_movi_tl(cpu_SR[SR_PC], npc);
+            tcg_gen_movi_i64(cpu_SR[SR_PC], npc);
             sync_jmpstate(dc);
         } else
             npc = dc->jmp_pc;
@@ -1766,7 +1744,7 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
     if (dc->is_jmp == DISAS_NEXT
         && (dc->cpustate_changed || org_flags != dc->tb_flags)) {
         dc->is_jmp = DISAS_UPDATE;
-        tcg_gen_movi_tl(cpu_SR[SR_PC], npc);
+        tcg_gen_movi_i64(cpu_SR[SR_PC], npc);
     }
     t_sync_flags(dc);
 
@@ -1774,7 +1752,7 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
         TCGv_i32 tmp = tcg_const_i32(EXCP_DEBUG);
 
         if (dc->is_jmp != DISAS_JUMP) {
-            tcg_gen_movi_tl(cpu_SR[SR_PC], npc);
+            tcg_gen_movi_i64(cpu_SR[SR_PC], npc);
         }
         gen_helper_raise_exception(cpu_env, tmp);
         tcg_temp_free_i32(tmp);
@@ -1824,17 +1802,19 @@ void mb_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
     if (!env || !f)
         return;
 
-    cpu_fprintf(f, "IN: PC=%x %s\n",
+    cpu_fprintf(f, "IN: PC=%" PRIx64 " %s\n",
                 env->sregs[SR_PC], lookup_symbol(env->sregs[SR_PC]));
-    cpu_fprintf(f, "rmsr=%x resr=%x rear=%x debug=%x imm=%x iflags=%x fsr=%x\n",
+    cpu_fprintf(f, "rmsr=%" PRIx64 " resr=%" PRIx64 " rear=%" PRIx64 " "
+                   "debug=%x imm=%x iflags=%x fsr=%" PRIx64 "\n",
              env->sregs[SR_MSR], env->sregs[SR_ESR], env->sregs[SR_EAR],
              env->debug, env->imm, env->iflags, env->sregs[SR_FSR]);
-    cpu_fprintf(f, "btaken=%d btarget=%x mode=%s(saved=%s) eip=%d ie=%d\n",
+    cpu_fprintf(f, "btaken=%d btarget=%" PRIx64 " mode=%s(saved=%s) "
+                   "eip=%d ie=%d\n",
              env->btaken, env->btarget,
              (env->sregs[SR_MSR] & MSR_UM) ? "user" : "kernel",
              (env->sregs[SR_MSR] & MSR_UMS) ? "user" : "kernel",
-             (env->sregs[SR_MSR] & MSR_EIP),
-             (env->sregs[SR_MSR] & MSR_IE));
+             (bool)(env->sregs[SR_MSR] & MSR_EIP),
+             (bool)(env->sregs[SR_MSR] & MSR_IE));
 
     for (i = 0; i < 32; i++) {
         cpu_fprintf(f, "r%2.2d=%8.8x ", i, env->regs[i]);
@@ -1848,34 +1828,34 @@ void mb_tcg_init(void)
 {
     int i;
 
-    env_debug = tcg_global_mem_new(cpu_env,
+    env_debug = tcg_global_mem_new_i32(cpu_env,
                     offsetof(CPUMBState, debug),
                     "debug0");
-    env_iflags = tcg_global_mem_new(cpu_env,
+    env_iflags = tcg_global_mem_new_i32(cpu_env,
                     offsetof(CPUMBState, iflags),
                     "iflags");
-    env_imm = tcg_global_mem_new(cpu_env,
+    env_imm = tcg_global_mem_new_i32(cpu_env,
                     offsetof(CPUMBState, imm),
                     "imm");
-    env_btarget = tcg_global_mem_new(cpu_env,
+    env_btarget = tcg_global_mem_new_i64(cpu_env,
                      offsetof(CPUMBState, btarget),
                      "btarget");
-    env_btaken = tcg_global_mem_new(cpu_env,
+    env_btaken = tcg_global_mem_new_i32(cpu_env,
                      offsetof(CPUMBState, btaken),
                      "btaken");
     env_res_addr = tcg_global_mem_new(cpu_env,
                      offsetof(CPUMBState, res_addr),
                      "res_addr");
-    env_res_val = tcg_global_mem_new(cpu_env,
+    env_res_val = tcg_global_mem_new_i32(cpu_env,
                      offsetof(CPUMBState, res_val),
                      "res_val");
     for (i = 0; i < ARRAY_SIZE(cpu_R); i++) {
-        cpu_R[i] = tcg_global_mem_new(cpu_env,
+        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
                           offsetof(CPUMBState, regs[i]),
                           regnames[i]);
     }
     for (i = 0; i < ARRAY_SIZE(cpu_SR); i++) {
-        cpu_SR[i] = tcg_global_mem_new(cpu_env,
+        cpu_SR[i] = tcg_global_mem_new_i64(cpu_env,
                           offsetof(CPUMBState, sregs[i]),
                           special_regnames[i]);
     }
diff --git a/target/moxie/cpu.c b/target/moxie/cpu.c
index 4170284da6..8d67eb6727 100644
--- a/target/moxie/cpu.c
+++ b/target/moxie/cpu.c
@@ -23,7 +23,6 @@
 #include "qemu-common.h"
 #include "migration/vmstate.h"
 #include "machine.h"
-#include "exec/exec-all.h"
 
 static void moxie_cpu_set_pc(CPUState *cs, vaddr value)
 {
diff --git a/target/moxie/mmu.c b/target/moxie/mmu.c
index 9203330b3b..bd90b1eebc 100644
--- a/target/moxie/mmu.c
+++ b/target/moxie/mmu.c
@@ -21,7 +21,6 @@
 
 #include "cpu.h"
 #include "mmu.h"
-#include "exec/exec-all.h"
 
 int moxie_mmu_translate(MoxieMMUResult *res,
                        CPUMoxieState *env, uint32_t vaddr,
diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
index 145796e8ce..047f3764b7 100644
--- a/target/nios2/cpu.h
+++ b/target/nios2/cpu.h
@@ -260,7 +260,6 @@ static inline int cpu_interrupts_enabled(CPUNios2State *env)
 }
 
 #include "exec/cpu-all.h"
-#include "exec/exec-all.h"
 
 static inline void cpu_get_tb_cpu_state(CPUNios2State *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c
index c853aeae02..529ec6ac0e 100644
--- a/target/nios2/op_helper.c
+++ b/target/nios2/op_helper.c
@@ -22,6 +22,7 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
 #include "qemu/main-loop.h"
 
 #if !defined(CONFIG_USER_ONLY)
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index 20b115afae..a692a98ec0 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -21,7 +21,6 @@
 #include "qapi/error.h"
 #include "cpu.h"
 #include "qemu-common.h"
-#include "exec/exec-all.h"
 
 static void openrisc_cpu_set_pc(CPUState *cs, vaddr value)
 {
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 7ccd2f460e..0247c1f04c 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1215,7 +1215,7 @@ struct PowerPCCPU {
     uint64_t mig_insns_flags2;
     uint32_t mig_nb_BATs;
     bool pre_2_10_migration;
-    bool pre_2_13_migration;
+    bool pre_3_0_migration;
     int32_t mig_slb_nr;
 };
 
diff --git a/target/ppc/helper_regs.h b/target/ppc/helper_regs.h
index 84fd30c2db..5efd18049e 100644
--- a/target/ppc/helper_regs.h
+++ b/target/ppc/helper_regs.h
@@ -21,6 +21,7 @@
 #define HELPER_REGS_H
 
 #include "qemu/main-loop.h"
+#include "exec/exec-all.h"
 
 /* Swap temporary saved registers with GPRs */
 static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 1607a7a42b..03d37da79f 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -19,7 +19,6 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "internal.h"
-#include "exec/exec-all.h"
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
 #include "crypto/aes.h"
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index ba1b9e531f..b2745ec4e5 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -150,11 +150,11 @@ static bool cpu_pre_2_8_migration(void *opaque, int version_id)
 }
 
 #if defined(TARGET_PPC64)
-static bool cpu_pre_2_13_migration(void *opaque, int version_id)
+static bool cpu_pre_3_0_migration(void *opaque, int version_id)
 {
     PowerPCCPU *cpu = opaque;
 
-    return cpu->pre_2_13_migration;
+    return cpu->pre_3_0_migration;
 }
 #endif
 
@@ -220,7 +220,7 @@ static int cpu_pre_save(void *opaque)
         cpu->mig_insns_flags2 = env->insns_flags2 & insns_compat_mask2;
         cpu->mig_nb_BATs = env->nb_BATs;
     }
-    if (cpu->pre_2_13_migration) {
+    if (cpu->pre_3_0_migration) {
         if (cpu->hash64_opts) {
             cpu->mig_slb_nr = cpu->hash64_opts->slb_size;
         }
@@ -517,7 +517,7 @@ static const VMStateDescription vmstate_slb = {
     .needed = slb_needed,
     .post_load = slb_post_load,
     .fields = (VMStateField[]) {
-        VMSTATE_INT32_TEST(mig_slb_nr, PowerPCCPU, cpu_pre_2_13_migration),
+        VMSTATE_INT32_TEST(mig_slb_nr, PowerPCCPU, cpu_pre_3_0_migration),
         VMSTATE_SLB_ARRAY(env.slb, PowerPCCPU, MAX_SLB_ENTRIES),
         VMSTATE_END_OF_LIST()
     }
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index a1db20e3a8..aa200cba4c 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -431,7 +431,8 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
         return NULL;
     }
 
-    hptes = address_space_map(CPU(cpu)->as, base + pte_offset, &plen, false);
+    hptes = address_space_map(CPU(cpu)->as, base + pte_offset, &plen, false,
+                              MEMTXATTRS_UNSPECIFIED);
     if (plen < (n * HASH_PTE_SIZE_64)) {
         hw_error("%s: Unable to map all requested HPTEs\n", __func__);
     }
diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
index a0b3f184b2..ab782cb32a 100644
--- a/target/ppc/translate_init.inc.c
+++ b/target/ppc/translate_init.inc.c
@@ -10427,7 +10427,7 @@ static Property ppc_cpu_properties[] = {
     DEFINE_PROP_BOOL("pre-2.8-migration", PowerPCCPU, pre_2_8_migration, false),
     DEFINE_PROP_BOOL("pre-2.10-migration", PowerPCCPU, pre_2_10_migration,
                      false),
-    DEFINE_PROP_BOOL("pre-2.13-migration", PowerPCCPU, pre_2_13_migration,
+    DEFINE_PROP_BOOL("pre-3.0-migration", PowerPCCPU, pre_3_0_migration,
                      false),
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/target/riscv/helper.c b/target/riscv/helper.c
index 95889f23b9..29e1a603dc 100644
--- a/target/riscv/helper.c
+++ b/target/riscv/helper.c
@@ -210,7 +210,7 @@ restart:
                 MemoryRegion *mr;
                 hwaddr l = sizeof(target_ulong), addr1;
                 mr = address_space_translate(cs->as, pte_addr,
-                    &addr1, &l, false);
+                    &addr1, &l, false, MEMTXATTRS_UNSPECIFIED);
                 if (memory_access_is_direct(mr, true)) {
                     target_ulong *pte_pa =
                         qemu_map_ram_ptr(mr->ram_block, addr1);
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index c2b775f4eb..c268065887 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -38,7 +38,6 @@
 #include "qapi/qapi-visit-misc.h"
 #include "qapi/qapi-visit-run-state.h"
 #include "sysemu/hw_accel.h"
-#include "exec/exec-all.h"
 #include "hw/qdev-properties.h"
 #ifndef CONFIG_USER_ONLY
 #include "hw/hw.h"
diff --git a/target/s390x/diag.c b/target/s390x/diag.c
index ac2c40f363..acb0f3d4af 100644
--- a/target/s390x/diag.c
+++ b/target/s390x/diag.c
@@ -16,7 +16,6 @@
 #include "cpu.h"
 #include "internal.h"
 #include "exec/address-spaces.h"
-#include "exec/exec-all.h"
 #include "hw/watchdog/wdt_diag288.h"
 #include "sysemu/cpus.h"
 #include "hw/s390x/ipl.h"
@@ -87,7 +86,8 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra)
             return;
         }
         if (!address_space_access_valid(&address_space_memory, addr,
-                                        sizeof(IplParameterBlock), false)) {
+                                        sizeof(IplParameterBlock), false,
+                                        MEMTXATTRS_UNSPECIFIED)) {
             s390_program_interrupt(env, PGM_ADDRESSING, ILEN_AUTO, ra);
             return;
         }
@@ -116,7 +116,8 @@ out:
             return;
         }
         if (!address_space_access_valid(&address_space_memory, addr,
-                                        sizeof(IplParameterBlock), true)) {
+                                        sizeof(IplParameterBlock), true,
+                                        MEMTXATTRS_UNSPECIFIED)) {
             s390_program_interrupt(env, PGM_ADDRESSING, ILEN_AUTO, ra);
             return;
         }
diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
index dfee221111..f0ce60cff2 100644
--- a/target/s390x/excp_helper.c
+++ b/target/s390x/excp_helper.c
@@ -120,7 +120,8 @@ int s390_cpu_handle_mmu_fault(CPUState *cs, vaddr orig_vaddr, int size,
 
     /* check out of RAM access */
     if (!address_space_access_valid(&address_space_memory, raddr,
-                                    TARGET_PAGE_SIZE, rw)) {
+                                    TARGET_PAGE_SIZE, rw,
+                                    MEMTXATTRS_UNSPECIFIED)) {
         DPRINTF("%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n", __func__,
                 (uint64_t)raddr, (uint64_t)ram_size);
         trigger_pgm_exception(env, PGM_ADDRESSING, ILEN_AUTO);
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index fd5791f134..254631693d 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -23,7 +23,6 @@
 #include "internal.h"
 #include "exec/gdbstub.h"
 #include "qemu/timer.h"
-#include "exec/exec-all.h"
 #include "hw/s390x/ioinst.h"
 #include "sysemu/hw_accel.h"
 #ifndef CONFIG_USER_ONLY
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 58e4380ae3..ac370da281 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -39,7 +39,6 @@
 #include "hw/hw.h"
 #include "sysemu/device_tree.h"
 #include "exec/gdbstub.h"
-#include "exec/address-spaces.h"
 #include "trace.h"
 #include "hw/s390x/s390-pci-inst.h"
 #include "hw/s390x/s390-pci-bus.h"
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index a0e28bd124..e21a47fb4d 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -21,7 +21,6 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "internal.h"
-#include "exec/address-spaces.h"
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index 1f834f35ef..de1ced2082 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -26,7 +26,6 @@
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
 #include "qemu/timer.h"
-#include "exec/address-spaces.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 
diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c
index a25deef5dd..145b62a7ef 100644
--- a/target/s390x/mmu_helper.c
+++ b/target/s390x/mmu_helper.c
@@ -461,7 +461,8 @@ static int translate_pages(S390CPU *cpu, vaddr addr, int nr_pages,
             return ret;
         }
         if (!address_space_access_valid(&address_space_memory, pages[i],
-                                        TARGET_PAGE_SIZE, is_write)) {
+                                        TARGET_PAGE_SIZE, is_write,
+                                        MEMTXATTRS_UNSPECIFIED)) {
             trigger_access_exception(env, PGM_ADDRESSING, ILEN_AUTO, 0);
             return -EFAULT;
         }
diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c
index aff1530c82..c1f9245797 100644
--- a/target/s390x/sigp.c
+++ b/target/s390x/sigp.c
@@ -280,7 +280,8 @@ static void sigp_set_prefix(CPUState *cs, run_on_cpu_data arg)
     cpu_synchronize_state(cs);
 
     if (!address_space_access_valid(&address_space_memory, addr,
-                                    sizeof(struct LowCore), false)) {
+                                    sizeof(struct LowCore), false,
+                                    MEMTXATTRS_UNSPECIFIED)) {
         set_sigp_status(si, SIGP_STAT_INVALID_PARAMETER);
         return;
     }
diff --git a/target/sparc/mmu_helper.c b/target/sparc/mmu_helper.c
index f8886ae039..135a9c9d9b 100644
--- a/target/sparc/mmu_helper.c
+++ b/target/sparc/mmu_helper.c
@@ -21,7 +21,6 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "trace.h"
-#include "exec/address-spaces.h"
 
 /* Sparc MMU emulation */
 
diff --git a/target/tilegx/cpu.c b/target/tilegx/cpu.c
index b7451bdcf2..bfe9be59b5 100644
--- a/target/tilegx/cpu.c
+++ b/target/tilegx/cpu.c
@@ -24,7 +24,6 @@
 #include "qemu-common.h"
 #include "hw/qdev-properties.h"
 #include "linux-user/syscall_defs.h"
-#include "exec/exec-all.h"
 
 static void tilegx_cpu_dump_state(CPUState *cs, FILE *f,
                                   fprintf_function cpu_fprintf, int flags)
diff --git a/target/xtensa/core-dc232b.c b/target/xtensa/core-dc232b.c
index 7331eeea2f..7131337840 100644
--- a/target/xtensa/core-dc232b.c
+++ b/target/xtensa/core-dc232b.c
@@ -27,9 +27,9 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "qemu/host-utils.h"
+#include "qemu/timer.h"
 
 #include "core-dc232b/core-isa.h"
 #include "overlay_tool.h"
diff --git a/target/xtensa/core-dc233c.c b/target/xtensa/core-dc233c.c
index 8296e6fa10..d701e3f5de 100644
--- a/target/xtensa/core-dc233c.c
+++ b/target/xtensa/core-dc233c.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "qemu-common.h"
 #include "qemu/host-utils.h"
diff --git a/target/xtensa/core-de212.c b/target/xtensa/core-de212.c
index 53775a97fa..7322179b56 100644
--- a/target/xtensa/core-de212.c
+++ b/target/xtensa/core-de212.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "qemu-common.h"
 #include "qemu/host-utils.h"
diff --git a/target/xtensa/core-fsf.c b/target/xtensa/core-fsf.c
index 01932bdc8b..e100e212b9 100644
--- a/target/xtensa/core-fsf.c
+++ b/target/xtensa/core-fsf.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "qemu-common.h"
 #include "qemu/host-utils.h"
diff --git a/target/xtensa/core-sample_controller.c b/target/xtensa/core-sample_controller.c
index c622335ca5..f433ea8d66 100644
--- a/target/xtensa/core-sample_controller.c
+++ b/target/xtensa/core-sample_controller.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "qemu-common.h"
 #include "qemu/host-utils.h"
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
index 2b5b537222..b50c840e09 100644
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@@ -33,7 +33,6 @@
 #include "cpu.h"
 #include "qemu-common.h"
 #include "migration/vmstate.h"
-#include "exec/exec-all.h"
 
 
 static void xtensa_cpu_set_pc(CPUState *cs, vaddr value)
diff --git a/target/xtensa/import_core.sh b/target/xtensa/import_core.sh
index af6c610479..039406bf28 100755
--- a/target/xtensa/import_core.sh
+++ b/target/xtensa/import_core.sh
@@ -39,7 +39,6 @@ tar -xf "$OVERLAY" -O binutils/xtensa-modules.c | \
 cat <<EOF > "${TARGET}.c"
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "qemu-common.h"
 #include "qemu/host-utils.h"
diff --git a/target/xtensa/op_helper.c b/target/xtensa/op_helper.c
index e3bcbe10d6..8a8c763c63 100644
--- a/target/xtensa/op_helper.c
+++ b/target/xtensa/op_helper.c
@@ -105,7 +105,8 @@ static void tb_invalidate_virtual_addr(CPUXtensaState *env, uint32_t vaddr)
     int ret = xtensa_get_physical_addr(env, false, vaddr, 2, 0,
             &paddr, &page_size, &access);
     if (ret == 0) {
-        tb_invalidate_phys_addr(&address_space_memory, paddr);
+        tb_invalidate_phys_addr(&address_space_memory, paddr,
+                                MEMTXATTRS_UNSPECIFIED);
     }
 }