summary refs log tree commit diff stats
path: root/target/arm
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm')
-rw-r--r--target/arm/arm-semi.c2
-rw-r--r--target/arm/cpu.h16
-rw-r--r--target/arm/helper.c375
-rw-r--r--target/arm/iwmmxt_helper.c234
-rw-r--r--target/arm/op_helper.c22
-rw-r--r--target/arm/translate.c198
6 files changed, 521 insertions, 326 deletions
diff --git a/target/arm/arm-semi.c b/target/arm/arm-semi.c
index 7cac8734c7..b2b22d231e 100644
--- a/target/arm/arm-semi.c
+++ b/target/arm/arm-semi.c
@@ -136,7 +136,7 @@ static void arm_semi_cb(CPUState *cs, target_ulong ret, target_ulong err)
 #ifdef CONFIG_USER_ONLY
         ts->swi_errno = err;
 #else
-	syscall_err = err;
+        syscall_err = err;
 #endif
         reg0 = ret;
     } else {
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 62c36b4150..65c0fa0a65 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1320,14 +1320,14 @@ enum arm_cpu_mode {
 #define ARM_VFP_FPINST2 10
 
 /* iwMMXt coprocessor control registers.  */
-#define ARM_IWMMXT_wCID		0
-#define ARM_IWMMXT_wCon		1
-#define ARM_IWMMXT_wCSSF	2
-#define ARM_IWMMXT_wCASF	3
-#define ARM_IWMMXT_wCGR0	8
-#define ARM_IWMMXT_wCGR1	9
-#define ARM_IWMMXT_wCGR2	10
-#define ARM_IWMMXT_wCGR3	11
+#define ARM_IWMMXT_wCID  0
+#define ARM_IWMMXT_wCon  1
+#define ARM_IWMMXT_wCSSF 2
+#define ARM_IWMMXT_wCASF 3
+#define ARM_IWMMXT_wCGR0 8
+#define ARM_IWMMXT_wCGR1 9
+#define ARM_IWMMXT_wCGR2 10
+#define ARM_IWMMXT_wCGR3 11
 
 /* V7M CCR bits */
 FIELD(V7M_CCR, NONBASETHRDENA, 0, 1)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index a3124082a6..088f452716 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -3750,15 +3750,19 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
 
 /* Used to describe the behaviour of EL2 regs when EL2 does not exist.  */
 static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
-    { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64,
+    { .name = "VBAR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0,
       .access = PL2_RW,
       .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore },
-    { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64,
+    { .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH,
       .type = ARM_CP_NO_RAW,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
       .access = PL2_RW,
-      .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore },
+      .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
+      .access = PL2_RW,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "CPTR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 2,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
@@ -3767,14 +3771,14 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
       .access = PL2_RW, .type = ARM_CP_CONST,
       .resetvalue = 0 },
     { .name = "HMAIR1", .state = ARM_CP_STATE_AA32,
-      .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1,
+      .cp = 15, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "AMAIR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 0,
       .access = PL2_RW, .type = ARM_CP_CONST,
       .resetvalue = 0 },
-    { .name = "HMAIR1", .state = ARM_CP_STATE_AA32,
-      .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 1,
+    { .name = "HAMAIR1", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 1,
       .access = PL2_RW, .type = ARM_CP_CONST,
       .resetvalue = 0 },
     { .name = "AFSR0_EL2", .state = ARM_CP_STATE_BOTH,
@@ -3843,6 +3847,22 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
     { .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "FAR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0,
+      .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "HIFAR", .state = ARM_CP_STATE_AA32,
+      .type = ARM_CP_CONST,
+      .cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 2,
+      .access = PL2_RW, .resetvalue = 0 },
+    REGINFO_SENTINEL
+};
+
+/* Ditto, but for registers which exist in ARMv8 but not v7 */
+static const ARMCPRegInfo el3_no_el2_v8_cp_reginfo[] = {
+    { .name = "HCR2", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4,
+      .access = PL2_RW,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
     REGINFO_SENTINEL
 };
 
@@ -3872,10 +3892,26 @@ static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
      * HCR_PTW forbids certain page-table setups
      * HCR_DC Disables stage1 and enables stage2 translation
      */
-    if ((raw_read(env, ri) ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) {
+    if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) {
         tlb_flush(CPU(cpu));
     }
-    raw_write(env, ri, value);
+    env->cp15.hcr_el2 = value;
+}
+
+static void hcr_writehigh(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t value)
+{
+    /* Handle HCR2 write, i.e. write to high half of HCR_EL2 */
+    value = deposit64(env->cp15.hcr_el2, 32, 32, value);
+    hcr_write(env, NULL, value);
+}
+
+static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value)
+{
+    /* Handle HCR write, i.e. write to low half of HCR_EL2 */
+    value = deposit64(env->cp15.hcr_el2, 0, 32, value);
+    hcr_write(env, NULL, value);
 }
 
 static const ARMCPRegInfo el2_cp_reginfo[] = {
@@ -3883,23 +3919,33 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
       .writefn = hcr_write },
+    { .name = "HCR", .state = ARM_CP_STATE_AA32,
+      .type = ARM_CP_ALIAS,
+      .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
+      .writefn = hcr_writelow },
     { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64,
       .type = ARM_CP_ALIAS,
       .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1,
       .access = PL2_RW,
       .fieldoffset = offsetof(CPUARMState, elr_el[2]) },
-    { .name = "ESR_EL2", .state = ARM_CP_STATE_AA64,
+    { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[2]) },
-    { .name = "FAR_EL2", .state = ARM_CP_STATE_AA64,
+    { .name = "FAR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[2]) },
+    { .name = "HIFAR", .state = ARM_CP_STATE_AA32,
+      .type = ARM_CP_ALIAS,
+      .cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 2,
+      .access = PL2_RW,
+      .fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el[2]) },
     { .name = "SPSR_EL2", .state = ARM_CP_STATE_AA64,
       .type = ARM_CP_ALIAS,
       .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0,
       .access = PL2_RW,
       .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_HYP]) },
-    { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64,
+    { .name = "VBAR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .writefn = vbar_write,
       .fieldoffset = offsetof(CPUARMState, cp15.vbar_el[2]),
@@ -3917,7 +3963,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[2]),
       .resetvalue = 0 },
     { .name = "HMAIR1", .state = ARM_CP_STATE_AA32,
-      .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1,
+      .cp = 15, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1,
       .access = PL2_RW, .type = ARM_CP_ALIAS,
       .fieldoffset = offsetofhigh32(CPUARMState, cp15.mair_el[2]) },
     { .name = "AMAIR_EL2", .state = ARM_CP_STATE_BOTH,
@@ -3925,8 +3971,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
       .access = PL2_RW, .type = ARM_CP_CONST,
       .resetvalue = 0 },
     /* HAMAIR1 is mapped to AMAIR_EL2[63:32] */
-    { .name = "HMAIR1", .state = ARM_CP_STATE_AA32,
-      .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 1,
+    { .name = "HAMAIR1", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 1,
       .access = PL2_RW, .type = ARM_CP_CONST,
       .resetvalue = 0 },
     { .name = "AFSR0_EL2", .state = ARM_CP_STATE_BOTH,
@@ -4112,6 +4158,16 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
+static const ARMCPRegInfo el2_v8_cp_reginfo[] = {
+    { .name = "HCR2", .state = ARM_CP_STATE_AA32,
+      .type = ARM_CP_ALIAS,
+      .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4,
+      .access = PL2_RW,
+      .fieldoffset = offsetofhigh32(CPUARMState, cp15.hcr_el2),
+      .writefn = hcr_writehigh },
+    REGINFO_SENTINEL
+};
+
 static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri,
                                    bool isread)
 {
@@ -5163,6 +5219,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
         };
         define_arm_cp_regs(cpu, vpidr_regs);
         define_arm_cp_regs(cpu, el2_cp_reginfo);
+        if (arm_feature(env, ARM_FEATURE_V8)) {
+            define_arm_cp_regs(cpu, el2_v8_cp_reginfo);
+        }
         /* RVBAR_EL2 is only implemented if EL2 is the highest EL */
         if (!arm_feature(env, ARM_FEATURE_EL3)) {
             ARMCPRegInfo rvbar = {
@@ -5195,6 +5254,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
             };
             define_arm_cp_regs(cpu, vpidr_regs);
             define_arm_cp_regs(cpu, el3_no_el2_cp_reginfo);
+            if (arm_feature(env, ARM_FEATURE_V8)) {
+                define_arm_cp_regs(cpu, el3_no_el2_v8_cp_reginfo);
+            }
         }
     }
     if (arm_feature(env, ARM_FEATURE_EL3)) {
@@ -5443,6 +5505,16 @@ void register_cp_regs_for_features(ARMCPU *cpu)
             REGINFO_SENTINEL
         };
         define_arm_cp_regs(cpu, auxcr_reginfo);
+        if (arm_feature(env, ARM_FEATURE_V8)) {
+            /* HACTLR2 maps to ACTLR_EL2[63:32] and is not in ARMv7 */
+            ARMCPRegInfo hactlr2_reginfo = {
+                .name = "HACTLR2", .state = ARM_CP_STATE_AA32,
+                .cp = 15, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 3,
+                .access = PL2_RW, .type = ARM_CP_CONST,
+                .resetvalue = 0
+            };
+            define_one_arm_cp_reg(cpu, &hactlr2_reginfo);
+        }
     }
 
     if (arm_feature(env, ARM_FEATURE_CBAR)) {
@@ -7961,6 +8033,125 @@ void aarch64_sync_64_to_32(CPUARMState *env)
     env->regs[15] = env->pc;
 }
 
+static void take_aarch32_exception(CPUARMState *env, int new_mode,
+                                   uint32_t mask, uint32_t offset,
+                                   uint32_t newpc)
+{
+    /* Change the CPU state so as to actually take the exception. */
+    switch_mode(env, new_mode);
+    /*
+     * For exceptions taken to AArch32 we must clear the SS bit in both
+     * PSTATE and in the old-state value we save to SPSR_<mode>, so zero it now.
+     */
+    env->uncached_cpsr &= ~PSTATE_SS;
+    env->spsr = cpsr_read(env);
+    /* Clear IT bits.  */
+    env->condexec_bits = 0;
+    /* Switch to the new mode, and to the correct instruction set.  */
+    env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode;
+    /* Set new mode endianness */
+    env->uncached_cpsr &= ~CPSR_E;
+    if (env->cp15.sctlr_el[arm_current_el(env)] & SCTLR_EE) {
+        env->uncached_cpsr |= CPSR_E;
+    }
+    /* J and IL must always be cleared for exception entry */
+    env->uncached_cpsr &= ~(CPSR_IL | CPSR_J);
+    env->daif |= mask;
+
+    if (new_mode == ARM_CPU_MODE_HYP) {
+        env->thumb = (env->cp15.sctlr_el[2] & SCTLR_TE) != 0;
+        env->elr_el[2] = env->regs[15];
+    } else {
+        /*
+         * this is a lie, as there was no c1_sys on V4T/V5, but who cares
+         * and we should just guard the thumb mode on V4
+         */
+        if (arm_feature(env, ARM_FEATURE_V4T)) {
+            env->thumb =
+                (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_TE) != 0;
+        }
+        env->regs[14] = env->regs[15] + offset;
+    }
+    env->regs[15] = newpc;
+}
+
+static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs)
+{
+    /*
+     * Handle exception entry to Hyp mode; this is sufficiently
+     * different to entry to other AArch32 modes that we handle it
+     * separately here.
+     *
+     * The vector table entry used is always the 0x14 Hyp mode entry point,
+     * unless this is an UNDEF/HVC/abort taken from Hyp to Hyp.
+     * The offset applied to the preferred return address is always zero
+     * (see DDI0487C.a section G1.12.3).
+     * PSTATE A/I/F masks are set based only on the SCR.EA/IRQ/FIQ values.
+     */
+    uint32_t addr, mask;
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+
+    switch (cs->exception_index) {
+    case EXCP_UDEF:
+        addr = 0x04;
+        break;
+    case EXCP_SWI:
+        addr = 0x14;
+        break;
+    case EXCP_BKPT:
+        /* Fall through to prefetch abort.  */
+    case EXCP_PREFETCH_ABORT:
+        env->cp15.ifar_s = env->exception.vaddress;
+        qemu_log_mask(CPU_LOG_INT, "...with HIFAR 0x%x\n",
+                      (uint32_t)env->exception.vaddress);
+        addr = 0x0c;
+        break;
+    case EXCP_DATA_ABORT:
+        env->cp15.dfar_s = env->exception.vaddress;
+        qemu_log_mask(CPU_LOG_INT, "...with HDFAR 0x%x\n",
+                      (uint32_t)env->exception.vaddress);
+        addr = 0x10;
+        break;
+    case EXCP_IRQ:
+        addr = 0x18;
+        break;
+    case EXCP_FIQ:
+        addr = 0x1c;
+        break;
+    case EXCP_HVC:
+        addr = 0x08;
+        break;
+    case EXCP_HYP_TRAP:
+        addr = 0x14;
+    default:
+        cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
+    }
+
+    if (cs->exception_index != EXCP_IRQ && cs->exception_index != EXCP_FIQ) {
+        env->cp15.esr_el[2] = env->exception.syndrome;
+    }
+
+    if (arm_current_el(env) != 2 && addr < 0x14) {
+        addr = 0x14;
+    }
+
+    mask = 0;
+    if (!(env->cp15.scr_el3 & SCR_EA)) {
+        mask |= CPSR_A;
+    }
+    if (!(env->cp15.scr_el3 & SCR_IRQ)) {
+        mask |= CPSR_I;
+    }
+    if (!(env->cp15.scr_el3 & SCR_FIQ)) {
+        mask |= CPSR_F;
+    }
+
+    addr += env->cp15.hvbar;
+
+    take_aarch32_exception(env, ARM_CPU_MODE_HYP, mask, 0, addr);
+}
+
 static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
 {
     ARMCPU *cpu = ARM_CPU(cs);
@@ -7996,6 +8187,11 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
         env->cp15.mdscr_el1 = deposit64(env->cp15.mdscr_el1, 2, 4, moe);
     }
 
+    if (env->exception.target_el == 2) {
+        arm_cpu_do_interrupt_aarch32_hyp(cs);
+        return;
+    }
+
     /* TODO: Vectored interrupt controller.  */
     switch (cs->exception_index) {
     case EXCP_UDEF:
@@ -8103,29 +8299,7 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
         env->cp15.scr_el3 &= ~SCR_NS;
     }
 
-    switch_mode (env, new_mode);
-    /* For exceptions taken to AArch32 we must clear the SS bit in both
-     * PSTATE and in the old-state value we save to SPSR_<mode>, so zero it now.
-     */
-    env->uncached_cpsr &= ~PSTATE_SS;
-    env->spsr = cpsr_read(env);
-    /* Clear IT bits.  */
-    env->condexec_bits = 0;
-    /* Switch to the new mode, and to the correct instruction set.  */
-    env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode;
-    /* Set new mode endianness */
-    env->uncached_cpsr &= ~CPSR_E;
-    if (env->cp15.sctlr_el[arm_current_el(env)] & SCTLR_EE) {
-        env->uncached_cpsr |= CPSR_E;
-    }
-    env->daif |= mask;
-    /* this is a lie, as the was no c1_sys on V4T/V5, but who cares
-     * and we should just guard the thumb mode on V4 */
-    if (arm_feature(env, ARM_FEATURE_V4T)) {
-        env->thumb = (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_TE) != 0;
-    }
-    env->regs[14] = env->regs[15] + offset;
-    env->regs[15] = addr;
+    take_aarch32_exception(env, new_mode, mask, offset, addr);
 }
 
 /* Handle exception entry to a target EL which is using AArch64 */
@@ -11548,45 +11722,30 @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
 #define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
 float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t  x, uint32_t shift, \
                                      void *fpstp) \
-{ \
-    float_status *fpst = fpstp; \
-    float##fsz tmp; \
-    tmp = itype##_to_##float##fsz(x, fpst); \
-    return float##fsz##_scalbn(tmp, -(int)shift, fpst); \
-}
+{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
 
-/* Notice that we want only input-denormal exception flags from the
- * scalbn operation: the other possible flags (overflow+inexact if
- * we overflow to infinity, output-denormal) aren't correct for the
- * complete scale-and-convert operation.
- */
-#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, round) \
-uint##isz##_t HELPER(vfp_to##name##p##round)(float##fsz x, \
-                                             uint32_t shift, \
-                                             void *fpstp) \
-{ \
-    float_status *fpst = fpstp; \
-    int old_exc_flags = get_float_exception_flags(fpst); \
-    float##fsz tmp; \
-    if (float##fsz##_is_any_nan(x)) { \
-        float_raise(float_flag_invalid, fpst); \
-        return 0; \
-    } \
-    tmp = float##fsz##_scalbn(x, shift, fpst); \
-    old_exc_flags |= get_float_exception_flags(fpst) \
-        & float_flag_input_denormal; \
-    set_float_exception_flags(old_exc_flags, fpst); \
-    return float##fsz##_to_##itype##round(tmp, fpst); \
+#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ROUND, suff)   \
+uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \
+                                            void *fpst)                   \
+{                                                                         \
+    if (unlikely(float##fsz##_is_any_nan(x))) {                           \
+        float_raise(float_flag_invalid, fpst);                            \
+        return 0;                                                         \
+    }                                                                     \
+    return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst);       \
 }
 
 #define VFP_CONV_FIX(name, p, fsz, isz, itype)                   \
 VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, _round_to_zero) \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, )
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
+                         float_round_to_zero, _round_to_zero)    \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
+                         get_float_rounding_mode(fpst), )
 
 #define VFP_CONV_FIX_A64(name, p, fsz, isz, itype)               \
 VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, )
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
+                         get_float_rounding_mode(fpst), )
 
 VFP_CONV_FIX(sh, d, 64, 64, int16)
 VFP_CONV_FIX(sl, d, 64, 64, int32)
@@ -11606,87 +11765,84 @@ VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
 #undef VFP_CONV_FLOAT_FIX_ROUND
 #undef VFP_CONV_FIX_A64
 
-/* Conversion to/from f16 can overflow to infinity before/after scaling.
- * Therefore we convert to f64, scale, and then convert f64 to f16; or
- * vice versa for conversion to integer.
- *
- * For 16- and 32-bit integers, the conversion to f64 never rounds.
- * For 64-bit integers, any integer that would cause rounding will also
- * overflow to f16 infinity, so there is no double rounding problem.
- */
-
-static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst)
-{
-    return float64_to_float16(float64_scalbn(f, -shift, fpst), true, fpst);
-}
-
 uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
 {
-    return do_postscale_fp16(int32_to_float64(x, fpst), shift, fpst);
+    return int32_to_float16_scalbn(x, -shift, fpst);
 }
 
 uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
 {
-    return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst);
+    return uint32_to_float16_scalbn(x, -shift, fpst);
 }
 
 uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
 {
-    return do_postscale_fp16(int64_to_float64(x, fpst), shift, fpst);
+    return int64_to_float16_scalbn(x, -shift, fpst);
 }
 
 uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
 {
-    return do_postscale_fp16(uint64_to_float64(x, fpst), shift, fpst);
+    return uint64_to_float16_scalbn(x, -shift, fpst);
 }
 
-static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst)
+uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
 {
-    if (unlikely(float16_is_any_nan(f))) {
+    if (unlikely(float16_is_any_nan(x))) {
         float_raise(float_flag_invalid, fpst);
         return 0;
-    } else {
-        int old_exc_flags = get_float_exception_flags(fpst);
-        float64 ret;
-
-        ret = float16_to_float64(f, true, fpst);
-        ret = float64_scalbn(ret, shift, fpst);
-        old_exc_flags |= get_float_exception_flags(fpst)
-            & float_flag_input_denormal;
-        set_float_exception_flags(old_exc_flags, fpst);
-
-        return ret;
     }
-}
-
-uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
-{
-    return float64_to_int16(do_prescale_fp16(x, shift, fpst), fpst);
+    return float16_to_int16_scalbn(x, get_float_rounding_mode(fpst),
+                                   shift, fpst);
 }
 
 uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
 {
-    return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst);
+    if (unlikely(float16_is_any_nan(x))) {
+        float_raise(float_flag_invalid, fpst);
+        return 0;
+    }
+    return float16_to_uint16_scalbn(x, get_float_rounding_mode(fpst),
+                                    shift, fpst);
 }
 
 uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
 {
-    return float64_to_int32(do_prescale_fp16(x, shift, fpst), fpst);
+    if (unlikely(float16_is_any_nan(x))) {
+        float_raise(float_flag_invalid, fpst);
+        return 0;
+    }
+    return float16_to_int32_scalbn(x, get_float_rounding_mode(fpst),
+                                   shift, fpst);
 }
 
 uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
 {
-    return float64_to_uint32(do_prescale_fp16(x, shift, fpst), fpst);
+    if (unlikely(float16_is_any_nan(x))) {
+        float_raise(float_flag_invalid, fpst);
+        return 0;
+    }
+    return float16_to_uint32_scalbn(x, get_float_rounding_mode(fpst),
+                                    shift, fpst);
 }
 
 uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
 {
-    return float64_to_int64(do_prescale_fp16(x, shift, fpst), fpst);
+    if (unlikely(float16_is_any_nan(x))) {
+        float_raise(float_flag_invalid, fpst);
+        return 0;
+    }
+    return float16_to_int64_scalbn(x, get_float_rounding_mode(fpst),
+                                   shift, fpst);
 }
 
 uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
 {
-    return float64_to_uint64(do_prescale_fp16(x, shift, fpst), fpst);
+    if (unlikely(float16_is_any_nan(x))) {
+        float_raise(float_flag_invalid, fpst);
+        return 0;
+    }
+    return float16_to_uint64_scalbn(x, get_float_rounding_mode(fpst),
+                                    shift, fpst);
 }
 
 /* Set the current fp rounding mode and return the old one.
@@ -12315,6 +12471,7 @@ int arm_rmode_to_sf(int rmode)
         /* FIXME: add support for TIEAWAY and ODD */
         qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
                       rmode);
+        /* fall through for now */
     case FPROUNDING_TIEEVEN:
     default:
         rmode = float_round_nearest_even;
diff --git a/target/arm/iwmmxt_helper.c b/target/arm/iwmmxt_helper.c
index f6a4fc5b7f..24244d012c 100644
--- a/target/arm/iwmmxt_helper.c
+++ b/target/arm/iwmmxt_helper.c
@@ -27,30 +27,30 @@
 /* iwMMXt macros extracted from GNU gdb.  */
 
 /* Set the SIMD wCASF flags for 8, 16, 32 or 64-bit operations.  */
-#define SIMD8_SET( v, n, b)	((v != 0) << ((((b) + 1) * 4) + (n)))
-#define SIMD16_SET(v, n, h)	((v != 0) << ((((h) + 1) * 8) + (n)))
-#define SIMD32_SET(v, n, w)	((v != 0) << ((((w) + 1) * 16) + (n)))
-#define SIMD64_SET(v, n)	((v != 0) << (32 + (n)))
+#define SIMD8_SET(v, n, b)      ((v != 0) << ((((b) + 1) * 4) + (n)))
+#define SIMD16_SET(v, n, h)     ((v != 0) << ((((h) + 1) * 8) + (n)))
+#define SIMD32_SET(v, n, w)     ((v != 0) << ((((w) + 1) * 16) + (n)))
+#define SIMD64_SET(v, n)        ((v != 0) << (32 + (n)))
 /* Flags to pass as "n" above.  */
-#define SIMD_NBIT	-1
-#define SIMD_ZBIT	-2
-#define SIMD_CBIT	-3
-#define SIMD_VBIT	-4
+#define SIMD_NBIT       -1
+#define SIMD_ZBIT       -2
+#define SIMD_CBIT       -3
+#define SIMD_VBIT       -4
 /* Various status bit macros.  */
-#define NBIT8(x)	((x) & 0x80)
-#define NBIT16(x)	((x) & 0x8000)
-#define NBIT32(x)	((x) & 0x80000000)
-#define NBIT64(x)	((x) & 0x8000000000000000ULL)
-#define ZBIT8(x)	(((x) & 0xff) == 0)
-#define ZBIT16(x)	(((x) & 0xffff) == 0)
-#define ZBIT32(x)	(((x) & 0xffffffff) == 0)
-#define ZBIT64(x)	(x == 0)
+#define NBIT8(x)        ((x) & 0x80)
+#define NBIT16(x)       ((x) & 0x8000)
+#define NBIT32(x)       ((x) & 0x80000000)
+#define NBIT64(x)       ((x) & 0x8000000000000000ULL)
+#define ZBIT8(x)        (((x) & 0xff) == 0)
+#define ZBIT16(x)       (((x) & 0xffff) == 0)
+#define ZBIT32(x)       (((x) & 0xffffffff) == 0)
+#define ZBIT64(x)       (x == 0)
 /* Sign extension macros.  */
-#define EXTEND8H(a)	((uint16_t) (int8_t) (a))
-#define EXTEND8(a)	((uint32_t) (int8_t) (a))
-#define EXTEND16(a)	((uint32_t) (int16_t) (a))
-#define EXTEND16S(a)	((int32_t) (int16_t) (a))
-#define EXTEND32(a)	((uint64_t) (int32_t) (a))
+#define EXTEND8H(a)     ((uint16_t) (int8_t) (a))
+#define EXTEND8(a)      ((uint32_t) (int8_t) (a))
+#define EXTEND16(a)     ((uint32_t) (int16_t) (a))
+#define EXTEND16S(a)    ((int32_t) (int16_t) (a))
+#define EXTEND32(a)     ((uint64_t) (int32_t) (a))
 
 uint64_t HELPER(iwmmxt_maddsq)(uint64_t a, uint64_t b)
 {
@@ -159,141 +159,141 @@ uint64_t HELPER(iwmmxt_macuw)(uint64_t a, uint64_t b)
 #define NZBIT64(x) \
     SIMD64_SET(NBIT64(x), SIMD_NBIT) | \
     SIMD64_SET(ZBIT64(x), SIMD_ZBIT)
-#define IWMMXT_OP_UNPACK(S, SH0, SH1, SH2, SH3)			\
+#define IWMMXT_OP_UNPACK(S, SH0, SH1, SH2, SH3)                         \
 uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(CPUARMState *env, \
                                                  uint64_t a, uint64_t b) \
-{								\
-    a =							        \
-        (((a >> SH0) & 0xff) << 0) | (((b >> SH0) & 0xff) << 8) |	\
-        (((a >> SH1) & 0xff) << 16) | (((b >> SH1) & 0xff) << 24) |	\
-        (((a >> SH2) & 0xff) << 32) | (((b >> SH2) & 0xff) << 40) |	\
-        (((a >> SH3) & 0xff) << 48) | (((b >> SH3) & 0xff) << 56);	\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =			\
-        NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) |		        \
-        NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) |		\
-        NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) |		\
-        NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7);		\
+{                                                               \
+    a =                                                                 \
+        (((a >> SH0) & 0xff) << 0) | (((b >> SH0) & 0xff) << 8) |       \
+        (((a >> SH1) & 0xff) << 16) | (((b >> SH1) & 0xff) << 24) |     \
+        (((a >> SH2) & 0xff) << 32) | (((b >> SH2) & 0xff) << 40) |     \
+        (((a >> SH3) & 0xff) << 48) | (((b >> SH3) & 0xff) << 56);      \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
+        NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) |                         \
+        NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) |               \
+        NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) |               \
+        NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7);                \
     return a;                                                   \
-}								\
+}                                                               \
 uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(CPUARMState *env, \
                                         uint64_t a, uint64_t b) \
-{								\
-    a =							        \
-        (((a >> SH0) & 0xffff) << 0) |				\
-        (((b >> SH0) & 0xffff) << 16) |			        \
-        (((a >> SH2) & 0xffff) << 32) |			        \
-        (((b >> SH2) & 0xffff) << 48);				\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =			\
-        NZBIT8(a >> 0, 0) | NZBIT8(a >> 16, 1) |		\
-        NZBIT8(a >> 32, 2) | NZBIT8(a >> 48, 3);		\
+{                                                               \
+    a =                                                                 \
+        (((a >> SH0) & 0xffff) << 0) |                          \
+        (((b >> SH0) & 0xffff) << 16) |                                 \
+        (((a >> SH2) & 0xffff) << 32) |                                 \
+        (((b >> SH2) & 0xffff) << 48);                          \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
+        NZBIT8(a >> 0, 0) | NZBIT8(a >> 16, 1) |                \
+        NZBIT8(a >> 32, 2) | NZBIT8(a >> 48, 3);                \
     return a;                                                   \
-}								\
+}                                                               \
 uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(CPUARMState *env, \
                                         uint64_t a, uint64_t b) \
-{								\
-    a =							        \
-        (((a >> SH0) & 0xffffffff) << 0) |			\
-        (((b >> SH0) & 0xffffffff) << 32);			\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =			\
-        NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1);		\
+{                                                               \
+    a =                                                                 \
+        (((a >> SH0) & 0xffffffff) << 0) |                      \
+        (((b >> SH0) & 0xffffffff) << 32);                      \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
+        NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1);               \
     return a;                                                   \
-}								\
+}                                                               \
 uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(CPUARMState *env, \
                                                   uint64_t x)   \
-{								\
-    x =							        \
-        (((x >> SH0) & 0xff) << 0) |				\
-        (((x >> SH1) & 0xff) << 16) |				\
-        (((x >> SH2) & 0xff) << 32) |				\
-        (((x >> SH3) & 0xff) << 48);				\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =			\
-        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |		\
-        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);		\
+{                                                               \
+    x =                                                                 \
+        (((x >> SH0) & 0xff) << 0) |                            \
+        (((x >> SH1) & 0xff) << 16) |                           \
+        (((x >> SH2) & 0xff) << 32) |                           \
+        (((x >> SH3) & 0xff) << 48);                            \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
+        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |              \
+        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);              \
     return x;                                                   \
-}								\
+}                                                               \
 uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(CPUARMState *env, \
                                                   uint64_t x)   \
-{								\
-    x =							        \
-        (((x >> SH0) & 0xffff) << 0) |				\
-        (((x >> SH2) & 0xffff) << 32);				\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =			\
-        NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);		\
+{                                                               \
+    x =                                                                 \
+        (((x >> SH0) & 0xffff) << 0) |                          \
+        (((x >> SH2) & 0xffff) << 32);                          \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
+        NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);               \
     return x;                                                   \
-}								\
+}                                                               \
 uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(CPUARMState *env, \
                                                   uint64_t x)   \
-{								\
-    x = (((x >> SH0) & 0xffffffff) << 0);			\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0);	\
+{                                                               \
+    x = (((x >> SH0) & 0xffffffff) << 0);                       \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0);      \
     return x;                                                   \
-}								\
+}                                                               \
 uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(CPUARMState *env, \
                                                   uint64_t x)   \
-{								\
-    x =							        \
-        ((uint64_t) EXTEND8H((x >> SH0) & 0xff) << 0) |	        \
-        ((uint64_t) EXTEND8H((x >> SH1) & 0xff) << 16) |	\
-        ((uint64_t) EXTEND8H((x >> SH2) & 0xff) << 32) |	\
-        ((uint64_t) EXTEND8H((x >> SH3) & 0xff) << 48);	        \
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =			\
-        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |		\
-        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);		\
+{                                                               \
+    x =                                                                 \
+        ((uint64_t) EXTEND8H((x >> SH0) & 0xff) << 0) |                 \
+        ((uint64_t) EXTEND8H((x >> SH1) & 0xff) << 16) |        \
+        ((uint64_t) EXTEND8H((x >> SH2) & 0xff) << 32) |        \
+        ((uint64_t) EXTEND8H((x >> SH3) & 0xff) << 48);                 \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
+        NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) |              \
+        NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3);              \
     return x;                                                   \
-}								\
+}                                                               \
 uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(CPUARMState *env, \
                                                   uint64_t x)   \
-{								\
-    x =							        \
-        ((uint64_t) EXTEND16((x >> SH0) & 0xffff) << 0) |	\
-        ((uint64_t) EXTEND16((x >> SH2) & 0xffff) << 32);	\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =			\
-        NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);		\
+{                                                               \
+    x =                                                                 \
+        ((uint64_t) EXTEND16((x >> SH0) & 0xffff) << 0) |       \
+        ((uint64_t) EXTEND16((x >> SH2) & 0xffff) << 32);       \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
+        NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1);               \
     return x;                                                   \
-}								\
+}                                                               \
 uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(CPUARMState *env, \
                                                   uint64_t x)   \
-{								\
-    x = EXTEND32((x >> SH0) & 0xffffffff);			\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0);	\
+{                                                               \
+    x = EXTEND32((x >> SH0) & 0xffffffff);                      \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0);      \
     return x;                                                   \
 }
 IWMMXT_OP_UNPACK(l, 0, 8, 16, 24)
 IWMMXT_OP_UNPACK(h, 32, 40, 48, 56)
 
-#define IWMMXT_OP_CMP(SUFF, Tb, Tw, Tl, O)			\
+#define IWMMXT_OP_CMP(SUFF, Tb, Tw, Tl, O)                      \
 uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(CPUARMState *env,    \
                                         uint64_t a, uint64_t b) \
-{								\
-    a =							        \
-        CMP(0, Tb, O, 0xff) | CMP(8, Tb, O, 0xff) |		\
-        CMP(16, Tb, O, 0xff) | CMP(24, Tb, O, 0xff) |		\
-        CMP(32, Tb, O, 0xff) | CMP(40, Tb, O, 0xff) |		\
-        CMP(48, Tb, O, 0xff) | CMP(56, Tb, O, 0xff);		\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =			\
-        NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) |		        \
-        NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) |		\
-        NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) |		\
-        NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7);		\
+{                                                               \
+    a =                                                                 \
+        CMP(0, Tb, O, 0xff) | CMP(8, Tb, O, 0xff) |             \
+        CMP(16, Tb, O, 0xff) | CMP(24, Tb, O, 0xff) |           \
+        CMP(32, Tb, O, 0xff) | CMP(40, Tb, O, 0xff) |           \
+        CMP(48, Tb, O, 0xff) | CMP(56, Tb, O, 0xff);            \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
+        NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) |                         \
+        NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) |               \
+        NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) |               \
+        NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7);                \
     return a;                                                   \
-}								\
+}                                                               \
 uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(CPUARMState *env,    \
                                         uint64_t a, uint64_t b) \
-{								\
-    a = CMP(0, Tw, O, 0xffff) | CMP(16, Tw, O, 0xffff) |	\
-        CMP(32, Tw, O, 0xffff) | CMP(48, Tw, O, 0xffff);	\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =			\
-        NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) |		\
-        NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3);		\
+{                                                               \
+    a = CMP(0, Tw, O, 0xffff) | CMP(16, Tw, O, 0xffff) |        \
+        CMP(32, Tw, O, 0xffff) | CMP(48, Tw, O, 0xffff);        \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
+        NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) |              \
+        NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3);              \
     return a;                                                   \
-}								\
+}                                                               \
 uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(CPUARMState *env,    \
                                         uint64_t a, uint64_t b) \
-{								\
-    a = CMP(0, Tl, O, 0xffffffff) |				\
-        CMP(32, Tl, O, 0xffffffff);				\
-    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =			\
-        NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1);		\
+{                                                               \
+    a = CMP(0, Tl, O, 0xffffffff) |                             \
+        CMP(32, Tl, O, 0xffffffff);                             \
+    env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =                       \
+        NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1);               \
     return a;                                                   \
 }
 #define CMP(SHR, TYPE, OPER, MASK) ((((TYPE) ((a >> SHR) & MASK) OPER \
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index d550978b5b..952b8d122b 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -611,6 +611,14 @@ static void msr_mrs_banked_exc_checks(CPUARMState *env, uint32_t tgtmode,
      */
     int curmode = env->uncached_cpsr & CPSR_M;
 
+    if (regno == 17) {
+        /* ELR_Hyp: a special case because access from tgtmode is OK */
+        if (curmode != ARM_CPU_MODE_HYP && curmode != ARM_CPU_MODE_MON) {
+            goto undef;
+        }
+        return;
+    }
+
     if (curmode == tgtmode) {
         goto undef;
     }
@@ -638,17 +646,9 @@ static void msr_mrs_banked_exc_checks(CPUARMState *env, uint32_t tgtmode,
     }
 
     if (tgtmode == ARM_CPU_MODE_HYP) {
-        switch (regno) {
-        case 17: /* ELR_Hyp */
-            if (curmode != ARM_CPU_MODE_HYP && curmode != ARM_CPU_MODE_MON) {
-                goto undef;
-            }
-            break;
-        default:
-            if (curmode != ARM_CPU_MODE_MON) {
-                goto undef;
-            }
-            break;
+        /* SPSR_Hyp, r13_hyp: accessible from Monitor mode only */
+        if (curmode != ARM_CPU_MODE_MON) {
+            goto undef;
         }
     }
 
diff --git a/target/arm/translate.c b/target/arm/translate.c
index f845da7c63..c6a5d2ac44 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1627,7 +1627,7 @@ static inline void gen_mov_vreg_F0(int dp, int reg)
         tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
 }
 
-#define ARM_CP_RW_BIT	(1 << 20)
+#define ARM_CP_RW_BIT   (1 << 20)
 
 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
 {
@@ -1861,12 +1861,12 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
             wrd = insn & 0xf;
             rdlo = (insn >> 12) & 0xf;
             rdhi = (insn >> 16) & 0xf;
-            if (insn & ARM_CP_RW_BIT) {			/* TMRRC */
+            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
                 iwmmxt_load_reg(cpu_V0, wrd);
                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
                 tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
-            } else {					/* TMCRR */
+            } else {                                    /* TMCRR */
                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
                 iwmmxt_store_reg(cpu_V0, wrd);
                 gen_op_iwmmxt_set_mup();
@@ -1881,25 +1881,25 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
             return 1;
         }
         if (insn & ARM_CP_RW_BIT) {
-            if ((insn >> 28) == 0xf) {			/* WLDRW wCx */
+            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
                 tmp = tcg_temp_new_i32();
                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
                 iwmmxt_store_creg(wrd, tmp);
             } else {
                 i = 1;
                 if (insn & (1 << 8)) {
-                    if (insn & (1 << 22)) {		/* WLDRD */
+                    if (insn & (1 << 22)) {             /* WLDRD */
                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
                         i = 0;
-                    } else {				/* WLDRW wRd */
+                    } else {                            /* WLDRW wRd */
                         tmp = tcg_temp_new_i32();
                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
                     }
                 } else {
                     tmp = tcg_temp_new_i32();
-                    if (insn & (1 << 22)) {		/* WLDRH */
+                    if (insn & (1 << 22)) {             /* WLDRH */
                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
-                    } else {				/* WLDRB */
+                    } else {                            /* WLDRB */
                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
                     }
                 }
@@ -1910,24 +1910,24 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
                 gen_op_iwmmxt_movq_wRn_M0(wrd);
             }
         } else {
-            if ((insn >> 28) == 0xf) {			/* WSTRW wCx */
+            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
                 tmp = iwmmxt_load_creg(wrd);
                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
             } else {
                 gen_op_iwmmxt_movq_M0_wRn(wrd);
                 tmp = tcg_temp_new_i32();
                 if (insn & (1 << 8)) {
-                    if (insn & (1 << 22)) {		/* WSTRD */
+                    if (insn & (1 << 22)) {             /* WSTRD */
                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
-                    } else {				/* WSTRW wRd */
+                    } else {                            /* WSTRW wRd */
                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
                     }
                 } else {
-                    if (insn & (1 << 22)) {		/* WSTRH */
+                    if (insn & (1 << 22)) {             /* WSTRH */
                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
-                    } else {				/* WSTRB */
+                    } else {                            /* WSTRB */
                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
                     }
@@ -1943,7 +1943,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         return 1;
 
     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
-    case 0x000:						/* WOR */
+    case 0x000:                                                 /* WOR */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 0) & 0xf;
         rd1 = (insn >> 16) & 0xf;
@@ -1954,7 +1954,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x011:						/* TMCR */
+    case 0x011:                                                 /* TMCR */
         if (insn & 0xf)
             return 1;
         rd = (insn >> 12) & 0xf;
@@ -1985,7 +1985,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
             return 1;
         }
         break;
-    case 0x100:						/* WXOR */
+    case 0x100:                                                 /* WXOR */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 0) & 0xf;
         rd1 = (insn >> 16) & 0xf;
@@ -1996,7 +1996,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x111:						/* TMRC */
+    case 0x111:                                                 /* TMRC */
         if (insn & 0xf)
             return 1;
         rd = (insn >> 12) & 0xf;
@@ -2004,7 +2004,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         tmp = iwmmxt_load_creg(wrd);
         store_reg(s, rd, tmp);
         break;
-    case 0x300:						/* WANDN */
+    case 0x300:                                                 /* WANDN */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 0) & 0xf;
         rd1 = (insn >> 16) & 0xf;
@@ -2016,7 +2016,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x200:						/* WAND */
+    case 0x200:                                                 /* WAND */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 0) & 0xf;
         rd1 = (insn >> 16) & 0xf;
@@ -2027,7 +2027,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x810: case 0xa10:				/* WMADD */
+    case 0x810: case 0xa10:                             /* WMADD */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 0) & 0xf;
         rd1 = (insn >> 16) & 0xf;
@@ -2039,7 +2039,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:	/* WUNPCKIL */
+    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         rd1 = (insn >> 0) & 0xf;
@@ -2061,7 +2061,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:	/* WUNPCKIH */
+    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         rd1 = (insn >> 0) & 0xf;
@@ -2083,7 +2083,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x012: case 0x112: case 0x412: case 0x512:	/* WSAD */
+    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         rd1 = (insn >> 0) & 0xf;
@@ -2097,7 +2097,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x010: case 0x110: case 0x210: case 0x310:	/* WMUL */
+    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         rd1 = (insn >> 0) & 0xf;
@@ -2116,7 +2116,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x410: case 0x510: case 0x610: case 0x710:	/* WMAC */
+    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         rd1 = (insn >> 0) & 0xf;
@@ -2132,7 +2132,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x006: case 0x406: case 0x806: case 0xc06:	/* WCMPEQ */
+    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         rd1 = (insn >> 0) & 0xf;
@@ -2154,7 +2154,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x800: case 0x900: case 0xc00: case 0xd00:	/* WAVG2 */
+    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         rd1 = (insn >> 0) & 0xf;
@@ -2174,7 +2174,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x802: case 0x902: case 0xa02: case 0xb02:	/* WALIGNR */
+    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         rd1 = (insn >> 0) & 0xf;
@@ -2187,7 +2187,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x601: case 0x605: case 0x609: case 0x60d:	/* TINSR */
+    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
         if (((insn >> 6) & 3) == 3)
             return 1;
         rd = (insn >> 12) & 0xf;
@@ -2218,7 +2218,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x107: case 0x507: case 0x907: case 0xd07:	/* TEXTRM */
+    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
         rd = (insn >> 12) & 0xf;
         wrd = (insn >> 16) & 0xf;
         if (rd == 15 || ((insn >> 22) & 3) == 3)
@@ -2251,7 +2251,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         }
         store_reg(s, rd, tmp);
         break;
-    case 0x117: case 0x517: case 0x917: case 0xd17:	/* TEXTRC */
+    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
             return 1;
         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
@@ -2270,7 +2270,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_set_nzcv(tmp);
         tcg_temp_free_i32(tmp);
         break;
-    case 0x401: case 0x405: case 0x409: case 0x40d:	/* TBCST */
+    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
         if (((insn >> 6) & 3) == 3)
             return 1;
         rd = (insn >> 12) & 0xf;
@@ -2291,7 +2291,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x113: case 0x513: case 0x913: case 0xd13:	/* TANDC */
+    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
             return 1;
         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
@@ -2319,7 +2319,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         tcg_temp_free_i32(tmp2);
         tcg_temp_free_i32(tmp);
         break;
-    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:	/* WACC */
+    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         gen_op_iwmmxt_movq_M0_wRn(rd0);
@@ -2339,7 +2339,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x115: case 0x515: case 0x915: case 0xd15:	/* TORC */
+    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
             return 1;
         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
@@ -2367,7 +2367,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         tcg_temp_free_i32(tmp2);
         tcg_temp_free_i32(tmp);
         break;
-    case 0x103: case 0x503: case 0x903: case 0xd03:	/* TMOVMSK */
+    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
         rd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
@@ -2387,7 +2387,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         }
         store_reg(s, rd, tmp);
         break;
-    case 0x106: case 0x306: case 0x506: case 0x706:	/* WCMPGT */
+    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
     case 0x906: case 0xb06: case 0xd06: case 0xf06:
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
@@ -2419,7 +2419,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x00e: case 0x20e: case 0x40e: case 0x60e:	/* WUNPCKEL */
+    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
@@ -2450,7 +2450,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x00c: case 0x20c: case 0x40c: case 0x60c:	/* WUNPCKEH */
+    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
@@ -2481,7 +2481,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x204: case 0x604: case 0xa04: case 0xe04:	/* WSRL */
+    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
     case 0x214: case 0x614: case 0xa14: case 0xe14:
         if (((insn >> 22) & 3) == 0)
             return 1;
@@ -2509,7 +2509,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x004: case 0x404: case 0x804: case 0xc04:	/* WSRA */
+    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
     case 0x014: case 0x414: case 0x814: case 0xc14:
         if (((insn >> 22) & 3) == 0)
             return 1;
@@ -2537,7 +2537,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x104: case 0x504: case 0x904: case 0xd04:	/* WSLL */
+    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
     case 0x114: case 0x514: case 0x914: case 0xd14:
         if (((insn >> 22) & 3) == 0)
             return 1;
@@ -2565,7 +2565,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x304: case 0x704: case 0xb04: case 0xf04:	/* WROR */
+    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
     case 0x314: case 0x714: case 0xb14: case 0xf14:
         if (((insn >> 22) & 3) == 0)
             return 1;
@@ -2601,7 +2601,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x116: case 0x316: case 0x516: case 0x716:	/* WMIN */
+    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
     case 0x916: case 0xb16: case 0xd16: case 0xf16:
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
@@ -2632,7 +2632,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x016: case 0x216: case 0x416: case 0x616:	/* WMAX */
+    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
     case 0x816: case 0xa16: case 0xc16: case 0xe16:
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
@@ -2663,7 +2663,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x002: case 0x102: case 0x202: case 0x302:	/* WALIGNI */
+    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
     case 0x402: case 0x502: case 0x602: case 0x702:
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
@@ -2676,7 +2676,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
-    case 0x01a: case 0x11a: case 0x21a: case 0x31a:	/* WSUB */
+    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
@@ -2719,7 +2719,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x01e: case 0x11e: case 0x21e: case 0x31e:	/* WSHUFH */
+    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
@@ -2733,7 +2733,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x018: case 0x118: case 0x218: case 0x318:	/* WADD */
+    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
     case 0x418: case 0x518: case 0x618: case 0x718:
     case 0x818: case 0x918: case 0xa18: case 0xb18:
     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
@@ -2776,7 +2776,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
         break;
-    case 0x008: case 0x108: case 0x208: case 0x308:	/* WPACK */
+    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
     case 0x408: case 0x508: case 0x608: case 0x708:
     case 0x808: case 0x908: case 0xa08: case 0xb08:
     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
@@ -2823,13 +2823,13 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
         tmp = load_reg(s, rd0);
         tmp2 = load_reg(s, rd1);
         switch ((insn >> 16) & 0xf) {
-        case 0x0:					/* TMIA */
+        case 0x0:                                       /* TMIA */
             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
             break;
-        case 0x8:					/* TMIAPH */
+        case 0x8:                                       /* TMIAPH */
             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
             break;
-        case 0xc: case 0xd: case 0xe: case 0xf:		/* TMIAxy */
+        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
             if (insn & (1 << 16))
                 tcg_gen_shri_i32(tmp, tmp, 16);
             if (insn & (1 << 17))
@@ -2872,16 +2872,16 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
         tmp = load_reg(s, rd0);
         tmp2 = load_reg(s, rd1);
         switch ((insn >> 16) & 0xf) {
-        case 0x0:					/* MIA */
+        case 0x0:                                       /* MIA */
             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
             break;
-        case 0x8:					/* MIAPH */
+        case 0x8:                                       /* MIAPH */
             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
             break;
-        case 0xc:					/* MIABB */
-        case 0xd:					/* MIABT */
-        case 0xe:					/* MIATB */
-        case 0xf:					/* MIATT */
+        case 0xc:                                       /* MIABB */
+        case 0xd:                                       /* MIABT */
+        case 0xe:                                       /* MIATB */
+        case 0xf:                                       /* MIATT */
             if (insn & (1 << 16))
                 tcg_gen_shri_i32(tmp, tmp, 16);
             if (insn & (1 << 17))
@@ -2907,13 +2907,13 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
         if (acc != 0)
             return 1;
 
-        if (insn & ARM_CP_RW_BIT) {			/* MRA */
+        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
             iwmmxt_load_reg(cpu_V0, acc);
             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
             tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
             tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
-        } else {					/* MAR */
+        } else {                                        /* MAR */
             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
             iwmmxt_store_reg(cpu_V0, acc);
         }
@@ -4506,10 +4506,14 @@ static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
         }
         break;
     case ARM_CPU_MODE_HYP:
-        /* Note that we can forbid accesses from EL2 here because they
-         * must be from Hyp mode itself
+        /*
+         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
+         * (and so we can forbid accesses from EL2 or below). elr_hyp
+         * can be accessed also from Hyp mode, so forbid accesses from
+         * EL0 or EL1.
          */
-        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 3) {
+        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
+            (s->current_el < 3 && *regno != 17)) {
             goto undef;
         }
         break;
@@ -8480,6 +8484,22 @@ static void gen_srs(DisasContext *s,
     s->base.is_jmp = DISAS_UPDATE;
 }
 
+/* Generate a label used for skipping this instruction */
+static void arm_gen_condlabel(DisasContext *s)
+{
+    if (!s->condjmp) {
+        s->condlabel = gen_new_label();
+        s->condjmp = 1;
+    }
+}
+
+/* Skip this instruction if the ARM condition is false */
+static void arm_skip_unless(DisasContext *s, uint32_t cond)
+{
+    arm_gen_condlabel(s);
+    arm_gen_test_cc(cond ^ 1, s->condlabel);
+}
+
 static void disas_arm_insn(DisasContext *s, unsigned int insn)
 {
     unsigned int cond, val, op1, i, shift, rm, rs, rn, rd, sh;
@@ -8709,9 +8729,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
     if (cond != 0xe) {
         /* if not always execute, we generate a conditional jump to
            next instruction */
-        s->condlabel = gen_new_label();
-        arm_gen_test_cc(cond ^ 1, s->condlabel);
-        s->condjmp = 1;
+        arm_skip_unless(s, cond);
     }
     if ((insn & 0x0f900000) == 0x03000000) {
         if ((insn & (1 << 21)) == 0) {
@@ -8883,6 +8901,25 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
             tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
             break;
+        case 0x6: /* ERET */
+            if (op1 != 3) {
+                goto illegal_op;
+            }
+            if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
+                goto illegal_op;
+            }
+            if ((insn & 0x000fff0f) != 0x0000000e) {
+                /* UNPREDICTABLE; we choose to UNDEF */
+                goto illegal_op;
+            }
+
+            if (s->current_el == 2) {
+                tmp = load_cpu_field(elr_el[2]);
+            } else {
+                tmp = load_reg(s, 14);
+            }
+            gen_exception_return(s, tmp);
+            break;
         case 7:
         {
             int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
@@ -11140,8 +11177,16 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
                         if (rn != 14 || rd != 15) {
                             goto illegal_op;
                         }
-                        tmp = load_reg(s, rn);
-                        tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
+                        if (s->current_el == 2) {
+                            /* ERET from Hyp uses ELR_Hyp, not LR */
+                            if (insn & 0xff) {
+                                goto illegal_op;
+                            }
+                            tmp = load_cpu_field(elr_el[2]);
+                        } else {
+                            tmp = load_reg(s, rn);
+                            tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
+                        }
                         gen_exception_return(s, tmp);
                         break;
                     case 6: /* MRS */
@@ -11205,9 +11250,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
                 /* Conditional branch.  */
                 op = (insn >> 22) & 0xf;
                 /* Generate a conditional jump to next instruction.  */
-                s->condlabel = gen_new_label();
-                arm_gen_test_cc(op ^ 1, s->condlabel);
-                s->condjmp = 1;
+                arm_skip_unless(s, op);
 
                 /* offset[11:1] = insn[10:0] */
                 offset = (insn & 0x7ff) << 1;
@@ -12131,8 +12174,7 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
         case 1: case 3: case 9: case 11: /* czb */
             rm = insn & 7;
             tmp = load_reg(s, rm);
-            s->condlabel = gen_new_label();
-            s->condjmp = 1;
+            arm_gen_condlabel(s);
             if (insn & (1 << 11))
                 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
             else
@@ -12295,9 +12337,7 @@ static void disas_thumb_insn(DisasContext *s, uint32_t insn)
             break;
         }
         /* generate a conditional jump to next instruction */
-        s->condlabel = gen_new_label();
-        arm_gen_test_cc(cond ^ 1, s->condlabel);
-        s->condjmp = 1;
+        arm_skip_unless(s, cond);
 
         /* jump to the offset */
         val = (uint32_t)s->pc + 2;
@@ -12676,9 +12716,7 @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
         uint32_t cond = dc->condexec_cond;
 
         if (cond != 0x0e) {     /* Skip conditional when condition is AL. */
-            dc->condlabel = gen_new_label();
-            arm_gen_test_cc(cond ^ 1, dc->condlabel);
-            dc->condjmp = 1;
+            arm_skip_unless(dc, cond);
         }
     }