summary refs log tree commit diff stats
path: root/target-arm
diff options
context:
space:
mode:
Diffstat (limited to 'target-arm')
-rw-r--r--target-arm/arm-semi.c171
-rw-r--r--target-arm/cpu-qom.h13
-rw-r--r--target-arm/cpu.c11
-rw-r--r--target-arm/cpu.h25
-rw-r--r--target-arm/helper-a64.c9
-rw-r--r--target-arm/helper.c256
-rw-r--r--target-arm/internals.h2
-rw-r--r--target-arm/kvm32.c3
-rw-r--r--target-arm/kvm64.c3
-rw-r--r--target-arm/translate-a64.c370
-rw-r--r--target-arm/translate.c142
-rw-r--r--target-arm/translate.h22
12 files changed, 711 insertions, 316 deletions
diff --git a/target-arm/arm-semi.c b/target-arm/arm-semi.c
index a2a7369567..d7cff3db23 100644
--- a/target-arm/arm-semi.c
+++ b/target-arm/arm-semi.c
@@ -58,6 +58,7 @@
 #define TARGET_SYS_GET_CMDLINE 0x15
 #define TARGET_SYS_HEAPINFO    0x16
 #define TARGET_SYS_EXIT        0x18
+#define TARGET_SYS_SYNCCACHE   0x19
 
 /* ADP_Stopped_ApplicationExit is used for exit(0),
  * anything else is implemented as exit(1) */
@@ -134,6 +135,7 @@ static void arm_semi_cb(CPUState *cs, target_ulong ret, target_ulong err)
 #ifdef CONFIG_USER_ONLY
     TaskState *ts = cs->opaque;
 #endif
+    target_ulong reg0 = is_a64(env) ? env->xregs[0] : env->regs[0];
 
     if (ret == (target_ulong)-1) {
 #ifdef CONFIG_USER_ONLY
@@ -141,22 +143,46 @@ static void arm_semi_cb(CPUState *cs, target_ulong ret, target_ulong err)
 #else
 	syscall_err = err;
 #endif
-        env->regs[0] = ret;
+        reg0 = ret;
     } else {
         /* Fixup syscalls that use nonstardard return conventions.  */
-        switch (env->regs[0]) {
+        switch (reg0) {
         case TARGET_SYS_WRITE:
         case TARGET_SYS_READ:
-            env->regs[0] = arm_semi_syscall_len - ret;
+            reg0 = arm_semi_syscall_len - ret;
             break;
         case TARGET_SYS_SEEK:
-            env->regs[0] = 0;
+            reg0 = 0;
             break;
         default:
-            env->regs[0] = ret;
+            reg0 = ret;
             break;
         }
     }
+    if (is_a64(env)) {
+        env->xregs[0] = reg0;
+    } else {
+        env->regs[0] = reg0;
+    }
+}
+
+static target_ulong arm_flen_buf(ARMCPU *cpu)
+{
+    /* Return an address in target memory of 64 bytes where the remote
+     * gdb should write its stat struct. (The format of this structure
+     * is defined by GDB's remote protocol and is not target-specific.)
+     * We put this on the guest's stack just below SP.
+     */
+    CPUARMState *env = &cpu->env;
+    target_ulong sp;
+
+    if (is_a64(env)) {
+        sp = env->xregs[31];
+    } else {
+        sp = env->regs[13];
+    }
+
+    return sp - 64;
 }
 
 static void arm_semi_flen_cb(CPUState *cs, target_ulong ret, target_ulong err)
@@ -166,8 +192,13 @@ static void arm_semi_flen_cb(CPUState *cs, target_ulong ret, target_ulong err)
     /* The size is always stored in big-endian order, extract
        the value. We assume the size always fit in 32 bits.  */
     uint32_t size;
-    cpu_memory_rw_debug(cs, env->regs[13]-64+32, (uint8_t *)&size, 4, 0);
-    env->regs[0] = be32_to_cpu(size);
+    cpu_memory_rw_debug(cs, arm_flen_buf(cpu) + 32, (uint8_t *)&size, 4, 0);
+    size = be32_to_cpu(size);
+    if (is_a64(env)) {
+        env->xregs[0] = size;
+    } else {
+        env->regs[0] = size;
+    }
 #ifdef CONFIG_USER_ONLY
     ((TaskState *)cs->opaque)->swi_errno = err;
 #else
@@ -175,17 +206,46 @@ static void arm_semi_flen_cb(CPUState *cs, target_ulong ret, target_ulong err)
 #endif
 }
 
+static target_ulong arm_gdb_syscall(ARMCPU *cpu, gdb_syscall_complete_cb cb,
+                                    const char *fmt, ...)
+{
+    va_list va;
+    CPUARMState *env = &cpu->env;
+
+    va_start(va, fmt);
+    gdb_do_syscallv(cb, fmt, va);
+    va_end(va);
+
+    /* FIXME: we are implicitly relying on the syscall completing
+     * before this point, which is not guaranteed. We should
+     * put in an explicit synchronization between this and
+     * the callback function.
+     */
+
+    return is_a64(env) ? env->xregs[0] : env->regs[0];
+}
+
 /* Read the input value from the argument block; fail the semihosting
  * call if the memory read fails.
  */
 #define GET_ARG(n) do {                                 \
-    if (get_user_ual(arg ## n, args + (n) * 4)) {       \
-        return (uint32_t)-1;                            \
+    if (is_a64(env)) {                                  \
+        if (get_user_u64(arg ## n, args + (n) * 8)) {   \
+            return -1;                                  \
+        }                                               \
+    } else {                                            \
+        if (get_user_u32(arg ## n, args + (n) * 4)) {   \
+            return -1;                                  \
+        }                                               \
     }                                                   \
 } while (0)
 
-#define SET_ARG(n, val) put_user_ual(val, args + (n) * 4)
-uint32_t do_arm_semihosting(CPUARMState *env)
+#define SET_ARG(n, val)                                 \
+    (is_a64(env) ?                                      \
+     put_user_u64(val, args + (n) * 8) :                \
+     put_user_u32(val, args + (n) * 4))
+
+target_ulong do_arm_semihosting(CPUARMState *env)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
@@ -201,8 +261,15 @@ uint32_t do_arm_semihosting(CPUARMState *env)
     CPUARMState *ts = env;
 #endif
 
-    nr = env->regs[0];
-    args = env->regs[1];
+    if (is_a64(env)) {
+        /* Note that the syscall number is in W0, not X0 */
+        nr = env->xregs[0] & 0xffffffffU;
+        args = env->xregs[1];
+    } else {
+        nr = env->regs[0];
+        args = env->regs[1];
+    }
+
     switch (nr) {
     case TARGET_SYS_OPEN:
         GET_ARG(0);
@@ -223,9 +290,8 @@ uint32_t do_arm_semihosting(CPUARMState *env)
             return result_fileno;
         }
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "open,%s,%x,1a4", arg0,
-                           (int)arg2+1, gdb_open_modeflags[arg1]);
-            ret = env->regs[0];
+            ret = arm_gdb_syscall(cpu, arm_semi_cb, "open,%s,%x,1a4", arg0,
+                                  (int)arg2+1, gdb_open_modeflags[arg1]);
         } else {
             ret = set_swi_errno(ts, open(s, open_modeflags[arg1], 0644));
         }
@@ -234,8 +300,7 @@ uint32_t do_arm_semihosting(CPUARMState *env)
     case TARGET_SYS_CLOSE:
         GET_ARG(0);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "close,%x", arg0);
-            return env->regs[0];
+            return arm_gdb_syscall(cpu, arm_semi_cb, "close,%x", arg0);
         } else {
             return set_swi_errno(ts, close(arg0));
         }
@@ -248,8 +313,7 @@ uint32_t do_arm_semihosting(CPUARMState *env)
               return (uint32_t)-1;
           /* Write to debug console.  stderr is near enough.  */
           if (use_gdb_syscalls()) {
-                gdb_do_syscall(arm_semi_cb, "write,2,%x,1", args);
-                return env->regs[0];
+                return arm_gdb_syscall(cpu, arm_semi_cb, "write,2,%x,1", args);
           } else {
                 return write(STDERR_FILENO, &c, 1);
           }
@@ -260,8 +324,8 @@ uint32_t do_arm_semihosting(CPUARMState *env)
             return (uint32_t)-1;
         len = strlen(s);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "write,2,%x,%x\n", args, len);
-            ret = env->regs[0];
+            return arm_gdb_syscall(cpu, arm_semi_cb, "write,2,%x,%x",
+                                   args, len);
         } else {
             ret = write(STDERR_FILENO, s, len);
         }
@@ -274,8 +338,8 @@ uint32_t do_arm_semihosting(CPUARMState *env)
         len = arg2;
         if (use_gdb_syscalls()) {
             arm_semi_syscall_len = len;
-            gdb_do_syscall(arm_semi_cb, "write,%x,%x,%x", arg0, arg1, len);
-            return env->regs[0];
+            return arm_gdb_syscall(cpu, arm_semi_cb, "write,%x,%x,%x",
+                                   arg0, arg1, len);
         } else {
             s = lock_user(VERIFY_READ, arg1, len, 1);
             if (!s) {
@@ -295,8 +359,8 @@ uint32_t do_arm_semihosting(CPUARMState *env)
         len = arg2;
         if (use_gdb_syscalls()) {
             arm_semi_syscall_len = len;
-            gdb_do_syscall(arm_semi_cb, "read,%x,%x,%x", arg0, arg1, len);
-            return env->regs[0];
+            return arm_gdb_syscall(cpu, arm_semi_cb, "read,%x,%x,%x",
+                                   arg0, arg1, len);
         } else {
             s = lock_user(VERIFY_WRITE, arg1, len, 0);
             if (!s) {
@@ -317,8 +381,7 @@ uint32_t do_arm_semihosting(CPUARMState *env)
     case TARGET_SYS_ISTTY:
         GET_ARG(0);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "isatty,%x", arg0);
-            return env->regs[0];
+            return arm_gdb_syscall(cpu, arm_semi_cb, "isatty,%x", arg0);
         } else {
             return isatty(arg0);
         }
@@ -326,8 +389,8 @@ uint32_t do_arm_semihosting(CPUARMState *env)
         GET_ARG(0);
         GET_ARG(1);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "lseek,%x,%x,0", arg0, arg1);
-            return env->regs[0];
+            return arm_gdb_syscall(cpu, arm_semi_cb, "lseek,%x,%x,0",
+                                   arg0, arg1);
         } else {
             ret = set_swi_errno(ts, lseek(arg0, arg1, SEEK_SET));
             if (ret == (uint32_t)-1)
@@ -337,9 +400,8 @@ uint32_t do_arm_semihosting(CPUARMState *env)
     case TARGET_SYS_FLEN:
         GET_ARG(0);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_flen_cb, "fstat,%x,%x",
-                           arg0, env->regs[13]-64);
-            return env->regs[0];
+            return arm_gdb_syscall(cpu, arm_semi_flen_cb, "fstat,%x,%x",
+                                   arg0, arm_flen_buf(cpu));
         } else {
             struct stat buf;
             ret = set_swi_errno(ts, fstat(arg0, &buf));
@@ -354,8 +416,8 @@ uint32_t do_arm_semihosting(CPUARMState *env)
         GET_ARG(0);
         GET_ARG(1);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "unlink,%s", arg0, (int)arg1+1);
-            ret = env->regs[0];
+            ret = arm_gdb_syscall(cpu, arm_semi_cb, "unlink,%s",
+                                  arg0, (int)arg1+1);
         } else {
             s = lock_user_string(arg0);
             if (!s) {
@@ -372,9 +434,8 @@ uint32_t do_arm_semihosting(CPUARMState *env)
         GET_ARG(2);
         GET_ARG(3);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "rename,%s,%s",
-                           arg0, (int)arg1+1, arg2, (int)arg3+1);
-            return env->regs[0];
+            return arm_gdb_syscall(cpu, arm_semi_cb, "rename,%s,%s",
+                                   arg0, (int)arg1+1, arg2, (int)arg3+1);
         } else {
             char *s2;
             s = lock_user_string(arg0);
@@ -398,8 +459,8 @@ uint32_t do_arm_semihosting(CPUARMState *env)
         GET_ARG(0);
         GET_ARG(1);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "system,%s", arg0, (int)arg1+1);
-            return env->regs[0];
+            return arm_gdb_syscall(cpu, arm_semi_cb, "system,%s",
+                                   arg0, (int)arg1+1);
         } else {
             s = lock_user_string(arg0);
             if (!s) {
@@ -558,11 +619,35 @@ uint32_t do_arm_semihosting(CPUARMState *env)
             return 0;
         }
     case TARGET_SYS_EXIT:
-        /* ARM specifies only Stopped_ApplicationExit as normal
-         * exit, everything else is considered an error */
-        ret = (args == ADP_Stopped_ApplicationExit) ? 0 : 1;
+        if (is_a64(env)) {
+            /* The A64 version of this call takes a parameter block,
+             * so the application-exit type can return a subcode which
+             * is the exit status code from the application.
+             */
+            GET_ARG(0);
+            GET_ARG(1);
+
+            if (arg0 == ADP_Stopped_ApplicationExit) {
+                ret = arg1;
+            } else {
+                ret = 1;
+            }
+        } else {
+            /* ARM specifies only Stopped_ApplicationExit as normal
+             * exit, everything else is considered an error */
+            ret = (args == ADP_Stopped_ApplicationExit) ? 0 : 1;
+        }
         gdb_exit(env, ret);
         exit(ret);
+    case TARGET_SYS_SYNCCACHE:
+        /* Clean the D-cache and invalidate the I-cache for the specified
+         * virtual address range. This is a nop for us since we don't
+         * implement caches. This is only present on A64.
+         */
+        if (is_a64(env)) {
+            return 0;
+        }
+        /* fall through -- invalid for A32/T32 */
     default:
         fprintf(stderr, "qemu: Unsupported SemiHosting SWI 0x%02x\n", nr);
         cpu_dump_state(cs, stderr, fprintf, 0);
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 00c0716f7d..25fb1ce0f3 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -227,6 +227,19 @@ void arm_gt_vtimer_cb(void *opaque);
 void arm_gt_htimer_cb(void *opaque);
 void arm_gt_stimer_cb(void *opaque);
 
+#define ARM_AFF0_SHIFT 0
+#define ARM_AFF0_MASK  (0xFFULL << ARM_AFF0_SHIFT)
+#define ARM_AFF1_SHIFT 8
+#define ARM_AFF1_MASK  (0xFFULL << ARM_AFF1_SHIFT)
+#define ARM_AFF2_SHIFT 16
+#define ARM_AFF2_MASK  (0xFFULL << ARM_AFF2_SHIFT)
+#define ARM_AFF3_SHIFT 32
+#define ARM_AFF3_MASK  (0xFFULL << ARM_AFF3_SHIFT)
+
+#define ARM32_AFFINITY_MASK (ARM_AFF0_MASK|ARM_AFF1_MASK|ARM_AFF2_MASK)
+#define ARM64_AFFINITY_MASK \
+    (ARM_AFF0_MASK|ARM_AFF1_MASK|ARM_AFF2_MASK|ARM_AFF3_MASK)
+
 #ifdef TARGET_AARCH64
 int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index cc6c6f3d4c..d7b4445413 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -331,10 +331,7 @@ static void arm_cpu_set_irq(void *opaque, int irq, int level)
     switch (irq) {
     case ARM_CPU_VIRQ:
     case ARM_CPU_VFIQ:
-        if (!arm_feature(env, ARM_FEATURE_EL2)) {
-            hw_error("%s: Virtual interrupt line %d with no EL2 support\n",
-                     __func__, irq);
-        }
+        assert(arm_feature(env, ARM_FEATURE_EL2));
         /* fall through */
     case ARM_CPU_IRQ:
     case ARM_CPU_FIQ:
@@ -345,7 +342,7 @@ static void arm_cpu_set_irq(void *opaque, int irq, int level)
         }
         break;
     default:
-        hw_error("arm_cpu_set_irq: Bad interrupt line %d\n", irq);
+        g_assert_not_reached();
     }
 }
 
@@ -364,7 +361,7 @@ static void arm_cpu_kvm_set_irq(void *opaque, int irq, int level)
         kvm_irq |= KVM_ARM_IRQ_CPU_FIQ;
         break;
     default:
-        hw_error("arm_cpu_kvm_set_irq: Bad interrupt line %d\n", irq);
+        g_assert_not_reached();
     }
     kvm_irq |= cs->cpu_index << KVM_ARM_IRQ_VCPU_SHIFT;
     kvm_set_irq(kvm_state, kvm_irq, level ? 1 : 0);
@@ -459,7 +456,7 @@ static void arm_cpu_initfn(Object *obj)
      */
     Aff1 = cs->cpu_index / ARM_CPUS_PER_CLUSTER;
     Aff0 = cs->cpu_index % ARM_CPUS_PER_CLUSTER;
-    cpu->mp_affinity = (Aff1 << 8) | Aff0;
+    cpu->mp_affinity = (Aff1 << ARM_AFF1_SHIFT) | Aff0;
 
 #ifndef CONFIG_USER_ONLY
     /* Our inbound IRQ and FIQ lines */
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 31825d34a1..1b8051613f 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -56,6 +56,7 @@
 #define EXCP_SMC            13   /* Secure Monitor Call */
 #define EXCP_VIRQ           14
 #define EXCP_VFIQ           15
+#define EXCP_SEMIHOST       16   /* semihosting call (A64 only) */
 
 #define ARMV7M_EXCP_RESET   1
 #define ARMV7M_EXCP_NMI     2
@@ -172,7 +173,7 @@ typedef struct CPUARMState {
     uint32_t GE; /* cpsr[19:16] */
     uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
     uint32_t condexec_bits; /* IT bits.  cpsr[15:10,26:25].  */
-    uint64_t daif; /* exception masks, in the bits they are in in PSTATE */
+    uint64_t daif; /* exception masks, in the bits they are in PSTATE */
 
     uint64_t elr_el[4]; /* AArch64 exception link regs  */
     uint64_t sp_el[4]; /* AArch64 banked stack pointers */
@@ -221,10 +222,12 @@ typedef struct CPUARMState {
             };
             uint64_t ttbr1_el[4];
         };
+        uint64_t vttbr_el2; /* Virtualization Translation Table Base.  */
         /* MMU translation table base control. */
         TCR tcr_el[4];
-        uint32_t c2_data; /* MPU data cachable bits.  */
-        uint32_t c2_insn; /* MPU instruction cachable bits.  */
+        TCR vtcr_el2; /* Virtualization Translation Control.  */
+        uint32_t c2_data; /* MPU data cacheable bits.  */
+        uint32_t c2_insn; /* MPU instruction cacheable bits.  */
         union { /* MMU domain access control register
                  * MPU write buffer control.
                  */
@@ -382,6 +385,8 @@ typedef struct CPUARMState {
          */
         uint64_t c15_ccnt;
         uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
+        uint64_t vpidr_el2; /* Virtualization Processor ID Register */
+        uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
     } cp15;
 
     struct {
@@ -504,7 +509,7 @@ typedef struct CPUARMState {
 
 ARMCPU *cpu_arm_init(const char *cpu_model);
 int cpu_arm_exec(CPUState *cpu);
-uint32_t do_arm_semihosting(CPUARMState *env);
+target_ulong do_arm_semihosting(CPUARMState *env);
 void aarch64_sync_32_to_64(CPUARMState *env);
 void aarch64_sync_64_to_32(CPUARMState *env);
 
@@ -1487,7 +1492,7 @@ bool write_list_to_cpustate(ARMCPU *cpu);
  */
 bool write_cpustate_to_list(ARMCPU *cpu);
 
-/* Does the core conform to the the "MicroController" profile. e.g. Cortex-M3.
+/* Does the core conform to the "MicroController" profile. e.g. Cortex-M3.
    Note the M in older cores (eg. ARM7TDMI) stands for Multiply. These are
    conventional cores (ie. Application or Realtime profile).  */
 
@@ -1519,8 +1524,8 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
     CPUARMState *env = cs->env_ptr;
     unsigned int cur_el = arm_current_el(env);
     bool secure = arm_is_secure(env);
-    uint32_t scr;
-    uint32_t hcr;
+    bool scr;
+    bool hcr;
     bool pstate_unmasked;
     int8_t unmasked = 0;
 
@@ -1547,7 +1552,7 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
          * set then FIQs can be masked by CPSR.F when non-secure but only
          * when FIQs are only routed to EL3.
          */
-        scr &= !((env->cp15.scr_el3 & SCR_FW) && !hcr);
+        scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr);
         pstate_unmasked = !(env->daif & PSTATE_F);
         break;
 
@@ -1677,7 +1682,7 @@ static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
 }
 
 /* Determine the current mmu_idx to use for normal loads/stores */
-static inline int cpu_mmu_index(CPUARMState *env)
+static inline int cpu_mmu_index(CPUARMState *env, bool ifetch)
 {
     int el = arm_current_el(env);
 
@@ -1910,7 +1915,7 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                    << ARM_TBFLAG_XSCALE_CPAR_SHIFT);
     }
 
-    *flags |= (cpu_mmu_index(env) << ARM_TBFLAG_MMUIDX_SHIFT);
+    *flags |= (cpu_mmu_index(env, false) << ARM_TBFLAG_MMUIDX_SHIFT);
     /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
      * states defined in the ARM ARM for software singlestep:
      *  SS_ACTIVE   PSTATE.SS   State
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 08c95a3f52..2c7a645f66 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -478,7 +478,8 @@ void aarch64_cpu_do_interrupt(CPUState *cs)
     }
 
     arm_log_exception(cs->exception_index);
-    qemu_log_mask(CPU_LOG_INT, "...from EL%d\n", arm_current_el(env));
+    qemu_log_mask(CPU_LOG_INT, "...from EL%d to EL%d\n", arm_current_el(env),
+                  new_el);
     if (qemu_loglevel_mask(CPU_LOG_INT)
         && !excp_is_internal(cs->exception_index)) {
         qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%" PRIx32 "\n",
@@ -514,6 +515,12 @@ void aarch64_cpu_do_interrupt(CPUState *cs)
     case EXCP_VFIQ:
         addr += 0x100;
         break;
+    case EXCP_SEMIHOST:
+        qemu_log_mask(CPU_LOG_INT,
+                      "...handling as semihosting call 0x%" PRIx64 "\n",
+                      env->xregs[0]);
+        env->xregs[0] = do_arm_semihosting(env);
+        return;
     default:
         cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
     }
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 7df1f0684d..65b9ff5ad0 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -325,6 +325,34 @@ void init_cpreg_list(ARMCPU *cpu)
     g_list_free(keys);
 }
 
+/*
+ * Some registers are not accessible if EL3.NS=0 and EL3 is using AArch32 but
+ * they are accessible when EL3 is using AArch64 regardless of EL3.NS.
+ *
+ * access_el3_aa32ns: Used to check AArch32 register views.
+ * access_el3_aa32ns_aa64any: Used to check both AArch32/64 register views.
+ */
+static CPAccessResult access_el3_aa32ns(CPUARMState *env,
+                                        const ARMCPRegInfo *ri)
+{
+    bool secure = arm_is_secure_below_el3(env);
+
+    assert(!arm_el_is_aa64(env, 3));
+    if (secure) {
+        return CP_ACCESS_TRAP_UNCATEGORIZED;
+    }
+    return CP_ACCESS_OK;
+}
+
+static CPAccessResult access_el3_aa32ns_aa64any(CPUARMState *env,
+                                                const ARMCPRegInfo *ri)
+{
+    if (!arm_el_is_aa64(env, 3)) {
+        return access_el3_aa32ns(env, ri);
+    }
+    return CP_ACCESS_OK;
+}
+
 static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
@@ -2123,7 +2151,7 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
         }
     }
 
-    /* Update the masks corresponding to the the TCR bank being written
+    /* Update the masks corresponding to the TCR bank being written
      * Note that we always calculate mask and base_mask, but
      * they are only used for short-descriptor tables (ie if EAE is 0);
      * for long-descriptor tables the TCR fields are used differently
@@ -2185,6 +2213,20 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     raw_write(env, ri, value);
 }
 
+static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t value)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
+
+    /* Accesses to VTTBR may change the VMID so we must flush the TLB.  */
+    if (raw_read(env, ri) != value) {
+        tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
+                            ARMMMUIdx_S2NS, -1);
+        raw_write(env, ri, value);
+    }
+}
+
 static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = {
     { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_ALIAS,
@@ -2403,7 +2445,19 @@ static const ARMCPRegInfo strongarm_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+static uint64_t midr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    unsigned int cur_el = arm_current_el(env);
+    bool secure = arm_is_secure(env);
+
+    if (arm_feature(&cpu->env, ARM_FEATURE_EL2) && !secure && cur_el == 1) {
+        return env->cp15.vpidr_el2;
+    }
+    return raw_read(env, ri);
+}
+
+static uint64_t mpidr_read_val(CPUARMState *env)
 {
     ARMCPU *cpu = ARM_CPU(arm_env_get_cpu(env));
     uint64_t mpidr = cpu->mp_affinity;
@@ -2421,6 +2475,17 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
     return mpidr;
 }
 
+static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    unsigned int cur_el = arm_current_el(env);
+    bool secure = arm_is_secure(env);
+
+    if (arm_feature(env, ARM_FEATURE_EL2) && !secure && cur_el == 1) {
+        return env->cp15.vmpidr_el2;
+    }
+    return mpidr_read_val(env);
+}
+
 static const ARMCPRegInfo mpidr_cp_reginfo[] = {
     { .name = "MPIDR", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
@@ -2975,16 +3040,16 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
       .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3,
       .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
     { .name = "AT_S12E1R", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 4,
+      .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 4,
       .access = PL2_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
     { .name = "AT_S12E1W", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 5,
+      .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 5,
       .access = PL2_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
     { .name = "AT_S12E0R", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 6,
+      .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 6,
       .access = PL2_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
     { .name = "AT_S12E0W", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 7,
+      .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 7,
       .access = PL2_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
     /* AT S1E2* are elsewhere as they UNDEF from EL3 if EL2 is not present */
     { .name = "AT_S1E3R", .state = ARM_CP_STATE_AA64,
@@ -2993,6 +3058,12 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
     { .name = "AT_S1E3W", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 1,
       .access = PL3_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+    { .name = "PAR_EL1", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 0, .crn = 7, .crm = 4, .opc2 = 0,
+      .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.par_el[1]),
+      .writefn = par_write },
 #endif
     /* TLB invalidate last level of translation table walk */
     { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
@@ -3106,6 +3177,17 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
     { .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "VTCR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "VTTBR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 6, .crm = 2,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0,
+      .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
@@ -3240,6 +3322,24 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
       .access = PL2_RW, .writefn = vmsa_tcr_el1_write,
       .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write,
       .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) },
+    { .name = "VTCR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) },
+    { .name = "VTCR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
+      .access = PL2_RW, .type = ARM_CP_ALIAS,
+      .fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) },
+    { .name = "VTTBR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 6, .crm = 2,
+      .type = ARM_CP_64BIT | ARM_CP_ALIAS,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2),
+      .writefn = vttbr_write },
+    { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0,
+      .access = PL2_RW, .writefn = vttbr_write,
+      .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2) },
     { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .raw_writefn = raw_write, .writefn = sctlr_write,
@@ -4044,6 +4144,30 @@ void register_cp_regs_for_features(ARMCPU *cpu)
         define_arm_cp_regs(cpu, v8_cp_reginfo);
     }
     if (arm_feature(env, ARM_FEATURE_EL2)) {
+        uint64_t vmpidr_def = mpidr_read_val(env);
+        ARMCPRegInfo vpidr_regs[] = {
+            { .name = "VPIDR", .state = ARM_CP_STATE_AA32,
+              .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
+              .access = PL2_RW, .accessfn = access_el3_aa32ns,
+              .resetvalue = cpu->midr,
+              .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
+            { .name = "VPIDR_EL2", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
+              .access = PL2_RW, .resetvalue = cpu->midr,
+              .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
+            { .name = "VMPIDR", .state = ARM_CP_STATE_AA32,
+              .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
+              .access = PL2_RW, .accessfn = access_el3_aa32ns,
+              .resetvalue = vmpidr_def,
+              .fieldoffset = offsetof(CPUARMState, cp15.vmpidr_el2) },
+            { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
+              .access = PL2_RW,
+              .resetvalue = vmpidr_def,
+              .fieldoffset = offsetof(CPUARMState, cp15.vmpidr_el2) },
+            REGINFO_SENTINEL
+        };
+        define_arm_cp_regs(cpu, vpidr_regs);
         define_arm_cp_regs(cpu, el2_cp_reginfo);
         /* RVBAR_EL2 is only implemented if EL2 is the highest EL */
         if (!arm_feature(env, ARM_FEATURE_EL3)) {
@@ -4059,6 +4183,23 @@ void register_cp_regs_for_features(ARMCPU *cpu)
          * register the no_el2 reginfos.
          */
         if (arm_feature(env, ARM_FEATURE_EL3)) {
+            /* When EL3 exists but not EL2, VPIDR and VMPIDR take the value
+             * of MIDR_EL1 and MPIDR_EL1.
+             */
+            ARMCPRegInfo vpidr_regs[] = {
+                { .name = "VPIDR_EL2", .state = ARM_CP_STATE_BOTH,
+                  .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
+                  .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+                  .type = ARM_CP_CONST, .resetvalue = cpu->midr,
+                  .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
+                { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_BOTH,
+                  .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
+                  .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+                  .type = ARM_CP_NO_RAW,
+                  .writefn = arm_cp_write_ignore, .readfn = mpidr_read },
+                REGINFO_SENTINEL
+            };
+            define_arm_cp_regs(cpu, vpidr_regs);
             define_arm_cp_regs(cpu, el3_no_el2_cp_reginfo);
         }
     }
@@ -4136,6 +4277,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
               .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = CP_ANY,
               .access = PL1_R, .resetvalue = cpu->midr,
               .writefn = arm_cp_write_ignore, .raw_writefn = raw_write,
+              .readfn = midr_read,
               .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
               .type = ARM_CP_OVERRIDE },
             /* crn = 0 op1 = 0 crm = 3..7 : currently unassigned; we RAZ. */
@@ -4159,7 +4301,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
         ARMCPRegInfo id_v8_midr_cp_reginfo[] = {
             { .name = "MIDR_EL1", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 0,
-              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->midr },
+              .access = PL1_R, .type = ARM_CP_NO_RAW, .resetvalue = cpu->midr,
+              .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
+              .readfn = midr_read },
             /* crn = 0 op1 = 0 crm = 0 op2 = 4,7 : AArch32 aliases of MIDR */
             { .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST,
               .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4,
@@ -4990,7 +5134,7 @@ int bank_number(int mode)
     case ARM_CPU_MODE_MON:
         return 7;
     }
-    hw_error("bank number requested for bad CPSR mode value 0x%x\n", mode);
+    g_assert_not_reached();
 }
 
 void switch_mode(CPUARMState *env, int mode)
@@ -5228,8 +5372,10 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
             nr = arm_lduw_code(env, env->regs[15], env->bswap_code) & 0xff;
             if (nr == 0xab) {
                 env->regs[15] += 2;
+                qemu_log_mask(CPU_LOG_INT,
+                              "...handling as semihosting call 0x%x\n",
+                              env->regs[0]);
                 env->regs[0] = do_arm_semihosting(env);
-                qemu_log_mask(CPU_LOG_INT, "...handled as semihosting call\n");
                 return;
             }
         }
@@ -5322,35 +5468,35 @@ void aarch64_sync_32_to_64(CPUARMState *env)
     }
 
     if (mode == ARM_CPU_MODE_IRQ) {
-        env->xregs[16] = env->regs[13];
-        env->xregs[17] = env->regs[14];
+        env->xregs[16] = env->regs[14];
+        env->xregs[17] = env->regs[13];
     } else {
-        env->xregs[16] = env->banked_r13[bank_number(ARM_CPU_MODE_IRQ)];
-        env->xregs[17] = env->banked_r14[bank_number(ARM_CPU_MODE_IRQ)];
+        env->xregs[16] = env->banked_r14[bank_number(ARM_CPU_MODE_IRQ)];
+        env->xregs[17] = env->banked_r13[bank_number(ARM_CPU_MODE_IRQ)];
     }
 
     if (mode == ARM_CPU_MODE_SVC) {
-        env->xregs[18] = env->regs[13];
-        env->xregs[19] = env->regs[14];
+        env->xregs[18] = env->regs[14];
+        env->xregs[19] = env->regs[13];
     } else {
-        env->xregs[18] = env->banked_r13[bank_number(ARM_CPU_MODE_SVC)];
-        env->xregs[19] = env->banked_r14[bank_number(ARM_CPU_MODE_SVC)];
+        env->xregs[18] = env->banked_r14[bank_number(ARM_CPU_MODE_SVC)];
+        env->xregs[19] = env->banked_r13[bank_number(ARM_CPU_MODE_SVC)];
     }
 
     if (mode == ARM_CPU_MODE_ABT) {
-        env->xregs[20] = env->regs[13];
-        env->xregs[21] = env->regs[14];
+        env->xregs[20] = env->regs[14];
+        env->xregs[21] = env->regs[13];
     } else {
-        env->xregs[20] = env->banked_r13[bank_number(ARM_CPU_MODE_ABT)];
-        env->xregs[21] = env->banked_r14[bank_number(ARM_CPU_MODE_ABT)];
+        env->xregs[20] = env->banked_r14[bank_number(ARM_CPU_MODE_ABT)];
+        env->xregs[21] = env->banked_r13[bank_number(ARM_CPU_MODE_ABT)];
     }
 
     if (mode == ARM_CPU_MODE_UND) {
-        env->xregs[22] = env->regs[13];
-        env->xregs[23] = env->regs[14];
+        env->xregs[22] = env->regs[14];
+        env->xregs[23] = env->regs[13];
     } else {
-        env->xregs[22] = env->banked_r13[bank_number(ARM_CPU_MODE_UND)];
-        env->xregs[23] = env->banked_r14[bank_number(ARM_CPU_MODE_UND)];
+        env->xregs[22] = env->banked_r14[bank_number(ARM_CPU_MODE_UND)];
+        env->xregs[23] = env->banked_r13[bank_number(ARM_CPU_MODE_UND)];
     }
 
     /* Registers x24-x30 are mapped to r8-r14 in FIQ mode.  If we are in FIQ
@@ -5427,35 +5573,35 @@ void aarch64_sync_64_to_32(CPUARMState *env)
     }
 
     if (mode == ARM_CPU_MODE_IRQ) {
-        env->regs[13] = env->xregs[16];
-        env->regs[14] = env->xregs[17];
+        env->regs[14] = env->xregs[16];
+        env->regs[13] = env->xregs[17];
     } else {
-        env->banked_r13[bank_number(ARM_CPU_MODE_IRQ)] = env->xregs[16];
-        env->banked_r14[bank_number(ARM_CPU_MODE_IRQ)] = env->xregs[17];
+        env->banked_r14[bank_number(ARM_CPU_MODE_IRQ)] = env->xregs[16];
+        env->banked_r13[bank_number(ARM_CPU_MODE_IRQ)] = env->xregs[17];
     }
 
     if (mode == ARM_CPU_MODE_SVC) {
-        env->regs[13] = env->xregs[18];
-        env->regs[14] = env->xregs[19];
+        env->regs[14] = env->xregs[18];
+        env->regs[13] = env->xregs[19];
     } else {
-        env->banked_r13[bank_number(ARM_CPU_MODE_SVC)] = env->xregs[18];
-        env->banked_r14[bank_number(ARM_CPU_MODE_SVC)] = env->xregs[19];
+        env->banked_r14[bank_number(ARM_CPU_MODE_SVC)] = env->xregs[18];
+        env->banked_r13[bank_number(ARM_CPU_MODE_SVC)] = env->xregs[19];
     }
 
     if (mode == ARM_CPU_MODE_ABT) {
-        env->regs[13] = env->xregs[20];
-        env->regs[14] = env->xregs[21];
+        env->regs[14] = env->xregs[20];
+        env->regs[13] = env->xregs[21];
     } else {
-        env->banked_r13[bank_number(ARM_CPU_MODE_ABT)] = env->xregs[20];
-        env->banked_r14[bank_number(ARM_CPU_MODE_ABT)] = env->xregs[21];
+        env->banked_r14[bank_number(ARM_CPU_MODE_ABT)] = env->xregs[20];
+        env->banked_r13[bank_number(ARM_CPU_MODE_ABT)] = env->xregs[21];
     }
 
     if (mode == ARM_CPU_MODE_UND) {
-        env->regs[13] = env->xregs[22];
-        env->regs[14] = env->xregs[23];
+        env->regs[14] = env->xregs[22];
+        env->regs[13] = env->xregs[23];
     } else {
-        env->banked_r13[bank_number(ARM_CPU_MODE_UND)] = env->xregs[22];
-        env->banked_r14[bank_number(ARM_CPU_MODE_UND)] = env->xregs[23];
+        env->banked_r14[bank_number(ARM_CPU_MODE_UND)] = env->xregs[22];
+        env->banked_r13[bank_number(ARM_CPU_MODE_UND)] = env->xregs[23];
     }
 
     /* Registers x24-x30 are mapped to r8-r14 in FIQ mode.  If we are in FIQ
@@ -5549,8 +5695,10 @@ void arm_cpu_do_interrupt(CPUState *cs)
             if (((mask == 0x123456 && !env->thumb)
                     || (mask == 0xab && env->thumb))
                   && (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR) {
+                qemu_log_mask(CPU_LOG_INT,
+                              "...handling as semihosting call 0x%x\n",
+                              env->regs[0]);
                 env->regs[0] = do_arm_semihosting(env);
-                qemu_log_mask(CPU_LOG_INT, "...handled as semihosting call\n");
                 return;
             }
         }
@@ -5567,8 +5715,10 @@ void arm_cpu_do_interrupt(CPUState *cs)
             if (mask == 0xab
                   && (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR) {
                 env->regs[15] += 2;
+                qemu_log_mask(CPU_LOG_INT,
+                              "...handling as semihosting call 0x%x\n",
+                              env->regs[0]);
                 env->regs[0] = do_arm_semihosting(env);
-                qemu_log_mask(CPU_LOG_INT, "...handled as semihosting call\n");
                 return;
             }
         }
@@ -5729,8 +5879,7 @@ static inline bool regime_translation_disabled(CPUARMState *env,
 static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
 {
     if (mmu_idx == ARMMMUIdx_S2NS) {
-        /* TODO: return VTCR_EL2 */
-        g_assert_not_reached();
+        return &env->cp15.vtcr_el2;
     }
     return &env->cp15.tcr_el[regime_el(env, mmu_idx)];
 }
@@ -5740,8 +5889,7 @@ static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx,
                                    int ttbrn)
 {
     if (mmu_idx == ARMMMUIdx_S2NS) {
-        /* TODO: return VTTBR_EL2 */
-        g_assert_not_reached();
+        return env->cp15.vttbr_el2;
     }
     if (ttbrn == 0) {
         return env->cp15.ttbr0_el[regime_el(env, mmu_idx)];
@@ -6263,7 +6411,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     /* Read an LPAE long-descriptor translation table. */
     MMUFaultType fault_type = translation_fault;
     uint32_t level = 1;
-    uint32_t epd;
+    uint32_t epd = 0;
     int32_t tsz;
     uint32_t tg;
     uint64_t ttbr;
@@ -6289,7 +6437,9 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     if (arm_el_is_aa64(env, el)) {
         va_size = 64;
         if (el > 1) {
-            tbi = extract64(tcr->raw_tcr, 20, 1);
+            if (mmu_idx != ARMMMUIdx_S2NS) {
+                tbi = extract64(tcr->raw_tcr, 20, 1);
+            }
         } else {
             if (extract64(address, 55, 1)) {
                 tbi = extract64(tcr->raw_tcr, 38, 1);
@@ -6355,7 +6505,9 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
      */
     if (ttbr_select == 0) {
         ttbr = regime_ttbr(env, mmu_idx, 0);
-        epd = extract32(tcr->raw_tcr, 7, 1);
+        if (el < 2) {
+            epd = extract32(tcr->raw_tcr, 7, 1);
+        }
         tsz = t0sz;
 
         tg = extract32(tcr->raw_tcr, 14, 2);
@@ -6880,7 +7032,7 @@ hwaddr arm_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     uint32_t fsr;
     MemTxAttrs attrs = {};
 
-    ret = get_phys_addr(env, addr, 0, cpu_mmu_index(env), &phys_addr,
+    ret = get_phys_addr(env, addr, 0, cpu_mmu_index(env, false), &phys_addr,
                         &attrs, &prot, &page_size, &fsr);
 
     if (ret) {
@@ -7045,7 +7197,7 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
         int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
         void *hostaddr[maxidx];
         int try, i;
-        unsigned mmu_idx = cpu_mmu_index(env);
+        unsigned mmu_idx = cpu_mmu_index(env, false);
         TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 
         for (try = 0; try < 2; try++) {
diff --git a/target-arm/internals.h b/target-arm/internals.h
index 924aff9d04..36a56aadb0 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -36,6 +36,7 @@ static inline bool excp_is_internal(int excp)
         || excp == EXCP_HALTED
         || excp == EXCP_EXCEPTION_EXIT
         || excp == EXCP_KERNEL_TRAP
+        || excp == EXCP_SEMIHOST
         || excp == EXCP_STREX;
 }
 
@@ -58,6 +59,7 @@ static const char * const excnames[] = {
     [EXCP_SMC] = "Secure Monitor Call",
     [EXCP_VIRQ] = "Virtual IRQ",
     [EXCP_VFIQ] = "Virtual FIQ",
+    [EXCP_SEMIHOST] = "Semihosting call",
 };
 
 static inline void arm_log_exception(int idx)
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index 421ce0ea0d..3ae57a6d68 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -181,7 +181,6 @@ int kvm_arm_cpreg_level(uint64_t regidx)
     return KVM_PUT_RUNTIME_STATE;
 }
 
-#define ARM_MPIDR_HWID_BITMASK 0xFFFFFF
 #define ARM_CPU_ID_MPIDR       0, 0, 0, 5
 
 int kvm_arch_init_vcpu(CPUState *cs)
@@ -234,7 +233,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     if (ret) {
         return ret;
     }
-    cpu->mp_affinity = mpidr & ARM_MPIDR_HWID_BITMASK;
+    cpu->mp_affinity = mpidr & ARM32_AFFINITY_MASK;
 
     return kvm_arm_init_cpreg_list(cpu);
 }
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index bd60889d12..ceebfeb774 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -77,7 +77,6 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc)
     return true;
 }
 
-#define ARM_MPIDR_HWID_BITMASK 0xFF00FFFFFFULL
 #define ARM_CPU_ID_MPIDR       3, 0, 0, 0, 5
 
 int kvm_arch_init_vcpu(CPUState *cs)
@@ -120,7 +119,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     if (ret) {
         return ret;
     }
-    cpu->mp_affinity = mpidr & ARM_MPIDR_HWID_BITMASK;
+    cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK;
 
     return kvm_arm_init_cpreg_list(cpu);
 }
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 5c13e153d4..ec0936cf97 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -30,6 +30,7 @@
 #include "internals.h"
 #include "qemu/host-utils.h"
 
+#include "exec/semihost.h"
 #include "exec/gen-icount.h"
 
 #include "exec/helper-proto.h"
@@ -39,16 +40,9 @@
 
 static TCGv_i64 cpu_X[32];
 static TCGv_i64 cpu_pc;
-static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
 
 /* Load/store exclusive handling */
-static TCGv_i64 cpu_exclusive_addr;
-static TCGv_i64 cpu_exclusive_val;
 static TCGv_i64 cpu_exclusive_high;
-#ifdef CONFIG_USER_ONLY
-static TCGv_i64 cpu_exclusive_test;
-static TCGv_i32 cpu_exclusive_info;
-#endif
 
 static const char *regnames[] = {
     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
@@ -104,23 +98,8 @@ void a64_translate_init(void)
                                           regnames[i]);
     }
 
-    cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
-    cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
-    cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
-    cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
-
-    cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
-    cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_val), "exclusive_val");
     cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
         offsetof(CPUARMState, exclusive_high), "exclusive_high");
-#ifdef CONFIG_USER_ONLY
-    cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_test), "exclusive_test");
-    cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_info), "exclusive_info");
-#endif
 }
 
 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
@@ -188,6 +167,31 @@ void gen_a64_set_pc_im(uint64_t val)
     tcg_gen_movi_i64(cpu_pc, val);
 }
 
+typedef struct DisasCompare64 {
+    TCGCond cond;
+    TCGv_i64 value;
+} DisasCompare64;
+
+static void a64_test_cc(DisasCompare64 *c64, int cc)
+{
+    DisasCompare c32;
+
+    arm_test_cc(&c32, cc);
+
+    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
+       * properly.  The NE/EQ comparisons are also fine with this choice.  */
+    c64->cond = c32.cond;
+    c64->value = tcg_temp_new_i64();
+    tcg_gen_ext_i32_i64(c64->value, c32.value);
+
+    arm_free_cc(&c32);
+}
+
+static void a64_free_cc(DisasCompare64 *c64)
+{
+    tcg_temp_free_i64(c64->value);
+}
+
 static void gen_exception_internal(int excp)
 {
     TCGv_i32 tcg_excp = tcg_const_i32(excp);
@@ -525,13 +529,8 @@ static TCGv_ptr get_fpstatus_ptr(void)
  */
 static inline void gen_set_NZ64(TCGv_i64 result)
 {
-    TCGv_i64 flag = tcg_temp_new_i64();
-
-    tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
-    tcg_gen_extrl_i64_i32(cpu_ZF, flag);
-    tcg_gen_shri_i64(flag, result, 32);
-    tcg_gen_extrl_i64_i32(cpu_NF, flag);
-    tcg_temp_free_i64(flag);
+    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
+    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 }
 
 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
@@ -541,7 +540,7 @@ static inline void gen_logic_CC(int sf, TCGv_i64 result)
         gen_set_NZ64(result);
     } else {
         tcg_gen_extrl_i64_i32(cpu_ZF, result);
-        tcg_gen_extrl_i64_i32(cpu_NF, result);
+        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
     }
     tcg_gen_movi_i32(cpu_CF, 0);
     tcg_gen_movi_i32(cpu_VF, 0);
@@ -567,8 +566,7 @@ static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
         tcg_gen_xor_i64(tmp, t0, t1);
         tcg_gen_andc_i64(flag, flag, tmp);
         tcg_temp_free_i64(tmp);
-        tcg_gen_shri_i64(flag, flag, 32);
-        tcg_gen_extrl_i64_i32(cpu_VF, flag);
+        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 
         tcg_gen_mov_i64(dest, result);
         tcg_temp_free_i64(result);
@@ -616,8 +614,7 @@ static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
         tcg_gen_xor_i64(tmp, t0, t1);
         tcg_gen_and_i64(flag, flag, tmp);
         tcg_temp_free_i64(tmp);
-        tcg_gen_shri_i64(flag, flag, 32);
-        tcg_gen_extrl_i64_i32(cpu_VF, flag);
+        tcg_gen_extrh_i64_i32(cpu_VF, flag);
         tcg_gen_mov_i64(dest, result);
         tcg_temp_free_i64(flag);
         tcg_temp_free_i64(result);
@@ -676,8 +673,7 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
         tcg_gen_xor_i64(vf_64, result, t0);
         tcg_gen_xor_i64(tmp, t0, t1);
         tcg_gen_andc_i64(vf_64, vf_64, tmp);
-        tcg_gen_shri_i64(vf_64, vf_64, 32);
-        tcg_gen_extrl_i64_i32(cpu_VF, vf_64);
+        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 
         tcg_gen_mov_i64(dest, result);
 
@@ -1553,8 +1549,27 @@ static void disas_exc(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             break;
         }
-        /* HLT */
-        unsupported_encoding(s, insn);
+        /* HLT. This has two purposes.
+         * Architecturally, it is an external halting debug instruction.
+         * Since QEMU doesn't implement external debug, we treat this as
+         * it is required for halting debug disabled: it will UNDEF.
+         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
+         */
+        if (semihosting_enabled() && imm16 == 0xf000) {
+#ifndef CONFIG_USER_ONLY
+            /* In system mode, don't allow userspace access to semihosting,
+             * to provide some semblance of security (and for consistency
+             * with our 32-bit semihosting).
+             */
+            if (s->current_el == 0) {
+                unsupported_encoding(s, insn);
+                break;
+            }
+#endif
+            gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
+        } else {
+            unsupported_encoding(s, insn);
+        }
         break;
     case 5:
         if (op2_ll < 1 || op2_ll > 3) {
@@ -2992,9 +3007,51 @@ static void disas_bitfield(DisasContext *s, uint32_t insn)
     }
 
     tcg_rd = cpu_reg(s, rd);
-    tcg_tmp = read_cpu_reg(s, rn, sf);
 
-    /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
+    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
+       to be smaller than bitsize, we'll never reference data outside the
+       low 32-bits anyway.  */
+    tcg_tmp = read_cpu_reg(s, rn, 1);
+
+    /* Recognize the common aliases.  */
+    if (opc == 0) { /* SBFM */
+        if (ri == 0) {
+            if (si == 7) { /* SXTB */
+                tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
+                goto done;
+            } else if (si == 15) { /* SXTH */
+                tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
+                goto done;
+            } else if (si == 31) { /* SXTW */
+                tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
+                goto done;
+            }
+        }
+        if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
+            if (si == 31) {
+                tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
+            }
+            tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
+            goto done;
+        }
+    } else if (opc == 2) { /* UBFM */
+        if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
+            tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
+            return;
+        }
+        if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
+            if (si == 31) {
+                tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
+            }
+            tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
+            return;
+        }
+        if (si + 1 == ri && si != bitsize - 1) { /* LSL */
+            int shift = bitsize - 1 - si;
+            tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
+            goto done;
+        }
+    }
 
     if (opc != 1) { /* SBFM or UBFM */
         tcg_gen_movi_i64(tcg_rd, 0);
@@ -3019,6 +3076,7 @@ static void disas_bitfield(DisasContext *s, uint32_t insn)
         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
     }
 
+ done:
     if (!sf) { /* zero extend final result */
         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
     }
@@ -3051,17 +3109,7 @@ static void disas_extract(DisasContext *s, uint32_t insn)
 
         tcg_rd = cpu_reg(s, rd);
 
-        if (imm) {
-            /* OPTME: we can special case rm==rn as a rotate */
-            tcg_rm = read_cpu_reg(s, rm, sf);
-            tcg_rn = read_cpu_reg(s, rn, sf);
-            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
-            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
-            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
-            if (!sf) {
-                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
-            }
-        } else {
+        if (unlikely(imm == 0)) {
             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
              * so an extract from bit 0 is a special case.
              */
@@ -3070,8 +3118,27 @@ static void disas_extract(DisasContext *s, uint32_t insn)
             } else {
                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
             }
+        } else if (rm == rn) { /* ROR */
+            tcg_rm = cpu_reg(s, rm);
+            if (sf) {
+                tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
+            } else {
+                TCGv_i32 tmp = tcg_temp_new_i32();
+                tcg_gen_extrl_i64_i32(tmp, tcg_rm);
+                tcg_gen_rotri_i32(tmp, tmp, imm);
+                tcg_gen_extu_i32_i64(tcg_rd, tmp);
+                tcg_temp_free_i32(tmp);
+            }
+        } else {
+            tcg_rm = read_cpu_reg(s, rm, sf);
+            tcg_rn = read_cpu_reg(s, rn, sf);
+            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
+            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
+            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
+            if (!sf) {
+                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+            }
         }
-
     }
 }
 
@@ -3547,8 +3614,9 @@ static void disas_adc_sbc(DisasContext *s, uint32_t insn)
 static void disas_cc(DisasContext *s, uint32_t insn)
 {
     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
-    TCGLabel *label_continue = NULL;
+    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
+    DisasCompare c;
 
     if (!extract32(insn, 29, 1)) {
         unallocated_encoding(s);
@@ -3566,19 +3634,13 @@ static void disas_cc(DisasContext *s, uint32_t insn)
     rn = extract32(insn, 5, 5);
     nzcv = extract32(insn, 0, 4);
 
-    if (cond < 0x0e) { /* not always */
-        TCGLabel *label_match = gen_new_label();
-        label_continue = gen_new_label();
-        arm_gen_test_cc(cond, label_match);
-        /* nomatch: */
-        tcg_tmp = tcg_temp_new_i64();
-        tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
-        gen_set_nzcv(tcg_tmp);
-        tcg_temp_free_i64(tcg_tmp);
-        tcg_gen_br(label_continue);
-        gen_set_label(label_match);
-    }
-    /* match, or condition is always */
+    /* Set T0 = !COND.  */
+    tcg_t0 = tcg_temp_new_i32();
+    arm_test_cc(&c, cond);
+    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
+    arm_free_cc(&c);
+
+    /* Load the arguments for the new comparison.  */
     if (is_imm) {
         tcg_y = new_tmp_a64(s);
         tcg_gen_movi_i64(tcg_y, y);
@@ -3587,6 +3649,7 @@ static void disas_cc(DisasContext *s, uint32_t insn)
     }
     tcg_rn = cpu_reg(s, rn);
 
+    /* Set the flags for the new comparison.  */
     tcg_tmp = tcg_temp_new_i64();
     if (op) {
         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
@@ -3595,9 +3658,55 @@ static void disas_cc(DisasContext *s, uint32_t insn)
     }
     tcg_temp_free_i64(tcg_tmp);
 
-    if (cond < 0x0e) { /* continue */
-        gen_set_label(label_continue);
+    /* If COND was false, force the flags to #nzcv.  Compute two masks
+     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
+     * For tcg hosts that support ANDC, we can make do with just T1.
+     * In either case, allow the tcg optimizer to delete any unused mask.
+     */
+    tcg_t1 = tcg_temp_new_i32();
+    tcg_t2 = tcg_temp_new_i32();
+    tcg_gen_neg_i32(tcg_t1, tcg_t0);
+    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
+
+    if (nzcv & 8) { /* N */
+        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
+    } else {
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
+        }
+    }
+    if (nzcv & 4) { /* Z */
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
+        }
+    } else {
+        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
+    }
+    if (nzcv & 2) { /* C */
+        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
+    } else {
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
+        }
     }
+    if (nzcv & 1) { /* V */
+        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
+    } else {
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
+        }
+    }
+    tcg_temp_free_i32(tcg_t0);
+    tcg_temp_free_i32(tcg_t1);
+    tcg_temp_free_i32(tcg_t2);
 }
 
 /* C3.5.6 Conditional select
@@ -3609,7 +3718,8 @@ static void disas_cc(DisasContext *s, uint32_t insn)
 static void disas_cond_select(DisasContext *s, uint32_t insn)
 {
     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
-    TCGv_i64 tcg_rd, tcg_src;
+    TCGv_i64 tcg_rd, zero;
+    DisasCompare64 c;
 
     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
         /* S == 1 or op2<1> == 1 */
@@ -3624,48 +3734,35 @@ static void disas_cond_select(DisasContext *s, uint32_t insn)
     rn = extract32(insn, 5, 5);
     rd = extract32(insn, 0, 5);
 
-    if (rd == 31) {
-        /* silly no-op write; until we use movcond we must special-case
-         * this to avoid a dead temporary across basic blocks.
-         */
-        return;
-    }
-
     tcg_rd = cpu_reg(s, rd);
 
-    if (cond >= 0x0e) { /* condition "always" */
-        tcg_src = read_cpu_reg(s, rn, sf);
-        tcg_gen_mov_i64(tcg_rd, tcg_src);
-    } else {
-        /* OPTME: we could use movcond here, at the cost of duplicating
-         * a lot of the arm_gen_test_cc() logic.
-         */
-        TCGLabel *label_match = gen_new_label();
-        TCGLabel *label_continue = gen_new_label();
-
-        arm_gen_test_cc(cond, label_match);
-        /* nomatch: */
-        tcg_src = cpu_reg(s, rm);
+    a64_test_cc(&c, cond);
+    zero = tcg_const_i64(0);
 
+    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
+        /* CSET & CSETM.  */
+        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
+        if (else_inv) {
+            tcg_gen_neg_i64(tcg_rd, tcg_rd);
+        }
+    } else {
+        TCGv_i64 t_true = cpu_reg(s, rn);
+        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
         if (else_inv && else_inc) {
-            tcg_gen_neg_i64(tcg_rd, tcg_src);
+            tcg_gen_neg_i64(t_false, t_false);
         } else if (else_inv) {
-            tcg_gen_not_i64(tcg_rd, tcg_src);
+            tcg_gen_not_i64(t_false, t_false);
         } else if (else_inc) {
-            tcg_gen_addi_i64(tcg_rd, tcg_src, 1);
-        } else {
-            tcg_gen_mov_i64(tcg_rd, tcg_src);
-        }
-        if (!sf) {
-            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+            tcg_gen_addi_i64(t_false, t_false, 1);
         }
-        tcg_gen_br(label_continue);
-        /* match: */
-        gen_set_label(label_match);
-        tcg_src = read_cpu_reg(s, rn, sf);
-        tcg_gen_mov_i64(tcg_rd, tcg_src);
-        /* continue: */
-        gen_set_label(label_continue);
+        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
+    }
+
+    tcg_temp_free_i64(zero);
+    a64_free_cc(&c);
+
+    if (!sf) {
+        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
     }
 }
 
@@ -4152,20 +4249,6 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
     }
 }
 
-/* copy src FP register to dst FP register; type specifies single or double */
-static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
-{
-    if (type) {
-        TCGv_i64 v = read_fp_dreg(s, src);
-        write_fp_dreg(s, dst, v);
-        tcg_temp_free_i64(v);
-    } else {
-        TCGv_i32 v = read_fp_sreg(s, src);
-        write_fp_sreg(s, dst, v);
-        tcg_temp_free_i32(v);
-    }
-}
-
 /* C3.6.24 Floating point conditional select
  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
  * +---+---+---+-----------+------+---+------+------+-----+------+------+
@@ -4175,7 +4258,8 @@ static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
 static void disas_fp_csel(DisasContext *s, uint32_t insn)
 {
     unsigned int mos, type, rm, cond, rn, rd;
-    TCGLabel *label_continue = NULL;
+    TCGv_i64 t_true, t_false, t_zero;
+    DisasCompare64 c;
 
     mos = extract32(insn, 29, 3);
     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
@@ -4193,21 +4277,23 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
         return;
     }
 
-    if (cond < 0x0e) { /* not always */
-        TCGLabel *label_match = gen_new_label();
-        label_continue = gen_new_label();
-        arm_gen_test_cc(cond, label_match);
-        /* nomatch: */
-        gen_mov_fp2fp(s, type, rd, rm);
-        tcg_gen_br(label_continue);
-        gen_set_label(label_match);
-    }
+    /* Zero extend sreg inputs to 64 bits now.  */
+    t_true = tcg_temp_new_i64();
+    t_false = tcg_temp_new_i64();
+    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
+    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
 
-    gen_mov_fp2fp(s, type, rd, rn);
+    a64_test_cc(&c, cond);
+    t_zero = tcg_const_i64(0);
+    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
+    tcg_temp_free_i64(t_zero);
+    tcg_temp_free_i64(t_false);
+    a64_free_cc(&c);
 
-    if (cond < 0x0e) { /* continue */
-        gen_set_label(label_continue);
-    }
+    /* Note that sregs write back zeros to the high bits,
+       and we've already done the zero-extension.  */
+    write_fp_dreg(s, rd, t_true);
+    tcg_temp_free_i64(t_true);
 }
 
 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
@@ -7681,10 +7767,8 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
             } else {
                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
-                tcg_gen_extrl_i64_i32(tcg_lo, tcg_op);
+                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
-                tcg_gen_shri_i64(tcg_op, tcg_op, 32);
-                tcg_gen_extrl_i64_i32(tcg_hi, tcg_op);
                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
                 tcg_temp_free_i32(tcg_lo);
@@ -8590,16 +8674,10 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
     }
 }
 
-static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in)
-{
-    tcg_gen_shri_i64(in, in, 32);
-    tcg_gen_extrl_i64_i32(res, in);
-}
-
 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
 {
     tcg_gen_addi_i64(in, in, 1U << 31);
-    do_narrow_high_u32(res, in);
+    tcg_gen_extrh_i64_i32(res, in);
 }
 
 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
@@ -8618,7 +8696,7 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
               gen_helper_neon_narrow_round_high_u8 },
             { gen_helper_neon_narrow_high_u16,
               gen_helper_neon_narrow_round_high_u16 },
-            { do_narrow_high_u32, do_narrow_round_high_u32 },
+            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
         };
         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
 
@@ -10946,7 +11024,11 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
     dc->condjmp = 0;
 
     dc->aarch64 = 1;
-    dc->el3_is_aa64 = arm_el_is_aa64(env, 3);
+    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
+     * there is no secure EL1, so we route exceptions to EL3.
+     */
+    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
+                               !arm_el_is_aa64(env, 3);
     dc->thumb = 0;
     dc->bswap_code = 0;
     dc->condexec_mask = 0;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index e27634f3c8..84a21ace54 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -64,12 +64,12 @@ TCGv_ptr cpu_env;
 /* We reuse the same 64-bit temporaries for efficiency.  */
 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 static TCGv_i32 cpu_R[16];
-static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
-static TCGv_i64 cpu_exclusive_addr;
-static TCGv_i64 cpu_exclusive_val;
+TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
+TCGv_i64 cpu_exclusive_addr;
+TCGv_i64 cpu_exclusive_val;
 #ifdef CONFIG_USER_ONLY
-static TCGv_i64 cpu_exclusive_test;
-static TCGv_i32 cpu_exclusive_info;
+TCGv_i64 cpu_exclusive_test;
+TCGv_i32 cpu_exclusive_info;
 #endif
 
 /* FIXME:  These should be removed.  */
@@ -738,81 +738,113 @@ static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
 #undef PAS_OP
 
 /*
- * generate a conditional branch based on ARM condition code cc.
+ * Generate a conditional based on ARM condition code cc.
  * This is common between ARM and Aarch64 targets.
  */
-void arm_gen_test_cc(int cc, TCGLabel *label)
+void arm_test_cc(DisasCompare *cmp, int cc)
 {
-    TCGv_i32 tmp;
-    TCGLabel *inv;
+    TCGv_i32 value;
+    TCGCond cond;
+    bool global = true;
 
     switch (cc) {
     case 0: /* eq: Z */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
-        break;
     case 1: /* ne: !Z */
-        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
+        cond = TCG_COND_EQ;
+        value = cpu_ZF;
         break;
+
     case 2: /* cs: C */
-        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_CF, 0, label);
-        break;
     case 3: /* cc: !C */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
+        cond = TCG_COND_NE;
+        value = cpu_CF;
         break;
+
     case 4: /* mi: N */
-        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_NF, 0, label);
-        break;
     case 5: /* pl: !N */
-        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_NF, 0, label);
+        cond = TCG_COND_LT;
+        value = cpu_NF;
         break;
+
     case 6: /* vs: V */
-        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_VF, 0, label);
-        break;
     case 7: /* vc: !V */
-        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_VF, 0, label);
+        cond = TCG_COND_LT;
+        value = cpu_VF;
         break;
+
     case 8: /* hi: C && !Z */
-        inv = gen_new_label();
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, inv);
-        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
-        gen_set_label(inv);
-        break;
-    case 9: /* ls: !C || Z */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
+    case 9: /* ls: !C || Z -> !(C && !Z) */
+        cond = TCG_COND_NE;
+        value = tcg_temp_new_i32();
+        global = false;
+        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
+           ZF is non-zero for !Z; so AND the two subexpressions.  */
+        tcg_gen_neg_i32(value, cpu_CF);
+        tcg_gen_and_i32(value, value, cpu_ZF);
         break;
+
     case 10: /* ge: N == V -> N ^ V == 0 */
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        break;
     case 11: /* lt: N != V -> N ^ V != 0 */
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
+        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
+        cond = TCG_COND_GE;
+        value = tcg_temp_new_i32();
+        global = false;
+        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
         break;
+
     case 12: /* gt: !Z && N == V */
-        inv = gen_new_label();
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, inv);
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        gen_set_label(inv);
-        break;
     case 13: /* le: Z || N != V */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
+        cond = TCG_COND_NE;
+        value = tcg_temp_new_i32();
+        global = false;
+        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
+         * the sign bit then AND with ZF to yield the result.  */
+        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
+        tcg_gen_sari_i32(value, value, 31);
+        tcg_gen_andc_i32(value, cpu_ZF, value);
         break;
+
+    case 14: /* always */
+    case 15: /* always */
+        /* Use the ALWAYS condition, which will fold early.
+         * It doesn't matter what we use for the value.  */
+        cond = TCG_COND_ALWAYS;
+        value = cpu_ZF;
+        goto no_invert;
+
     default:
         fprintf(stderr, "Bad condition code 0x%x\n", cc);
         abort();
     }
+
+    if (cc & 1) {
+        cond = tcg_invert_cond(cond);
+    }
+
+ no_invert:
+    cmp->cond = cond;
+    cmp->value = value;
+    cmp->value_global = global;
+}
+
+void arm_free_cc(DisasCompare *cmp)
+{
+    if (!cmp->value_global) {
+        tcg_temp_free_i32(cmp->value);
+    }
+}
+
+void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
+{
+    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
+}
+
+void arm_gen_test_cc(int cc, TCGLabel *label)
+{
+    DisasCompare cmp;
+    arm_test_cc(&cmp, cc);
+    arm_jump_cc(&cmp, label);
+    arm_free_cc(&cmp);
 }
 
 static const uint8_t table_logic_cc[16] = {
@@ -8500,7 +8532,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
                 }
                 /* Perform base writeback before the loaded value to
                    ensure correct behavior with overlapping index registers.
-                   ldrd with base writeback is is undefined if the
+                   ldrd with base writeback is undefined if the
                    destination and index registers overlap.  */
                 if (!(insn & (1 << 24))) {
                     gen_add_datah_offset(s, insn, address_offset, addr);
@@ -11172,7 +11204,11 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     dc->condjmp = 0;
 
     dc->aarch64 = 0;
-    dc->el3_is_aa64 = arm_el_is_aa64(env, 3);
+    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
+     * there is no secure EL1, so we route exceptions to EL3.
+     */
+    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
+                               !arm_el_is_aa64(env, 3);
     dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
     dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
     dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 9ab978fb75..b8fe37a0a7 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -23,7 +23,8 @@ typedef struct DisasContext {
     ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
     bool ns;        /* Use non-secure CPREG bank on access */
     int fp_excp_el; /* FP exception EL or 0 if enabled */
-    bool el3_is_aa64;  /* Flag indicating whether EL3 is AArch64 or not */
+    /* Flag indicating that exceptions from secure mode are routed to EL3. */
+    bool secure_routed_to_el3;
     bool vfp_enabled; /* FP enabled via FPSCR.EN */
     int vec_len;
     int vec_stride;
@@ -62,7 +63,21 @@ typedef struct DisasContext {
     TCGv_i64 tmp_a64[TMP_A64_MAX];
 } DisasContext;
 
+typedef struct DisasCompare {
+    TCGCond cond;
+    TCGv_i32 value;
+    bool value_global;
+} DisasCompare;
+
+/* Share the TCG temporaries common between 32 and 64 bit modes.  */
 extern TCGv_ptr cpu_env;
+extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
+extern TCGv_i64 cpu_exclusive_addr;
+extern TCGv_i64 cpu_exclusive_val;
+#ifdef CONFIG_USER_ONLY
+extern TCGv_i64 cpu_exclusive_test;
+extern TCGv_i32 cpu_exclusive_info;
+#endif
 
 static inline int arm_dc_feature(DisasContext *dc, int feature)
 {
@@ -84,7 +99,7 @@ static inline int default_exception_el(DisasContext *s)
      * exceptions can only be routed to ELs above 1, so we target the higher of
      * 1 or the current EL.
      */
-    return (s->mmu_idx == ARMMMUIdx_S1SE0 && !s->el3_is_aa64)
+    return (s->mmu_idx == ARMMMUIdx_S1SE0 && s->secure_routed_to_el3)
             ? 3 : MAX(1, s->current_el);
 }
 
@@ -135,6 +150,9 @@ static inline void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 }
 #endif
 
+void arm_test_cc(DisasCompare *cmp, int cc);
+void arm_free_cc(DisasCompare *cmp);
+void arm_jump_cc(DisasCompare *cmp, TCGLabel *label);
 void arm_gen_test_cc(int cc, TCGLabel *label);
 
 #endif /* TARGET_ARM_TRANSLATE_H */