12 files changed, 1337 insertions, 1112 deletions
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 5b7a36b5d7..e44e18062c 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1108,7 +1108,7 @@ static void arm_set_pmu(Object *obj, bool value, Error **errp)
     ARMCPU *cpu = ARM_CPU(obj);
 
     if (value) {
-        if (kvm_enabled() && !kvm_arm_pmu_supported(CPU(cpu))) {
+        if (kvm_enabled() && !kvm_arm_pmu_supported()) {
             error_setg(errp, "'pmu' feature not supported by KVM on this host");
             return;
         }
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 677584e5da..cf66b8c7fb 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2334,7 +2334,7 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
  * migration or KVM state synchronization. (Typically this is for "registers"
  * which are actually used as instructions for cache maintenance and so on.)
  * IO indicates that this register does I/O and therefore its accesses
- * need to be surrounded by gen_io_start()/gen_io_end(). In particular,
+ * need to be marked with gen_io_start() and also end the TB. In particular,
  * registers which implement clocks or timers require this.
  * RAISES_EXC is for when the read or write hook might raise an exception;
  * the generated code will synchronize the CPU state before calling the hook
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 778cecc2e6..a0c1d8894b 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -266,7 +266,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
 
     /* Collect the set of vector lengths supported by KVM. */
     bitmap_zero(kvm_supported, ARM_MAX_VQ);
-    if (kvm_enabled() && kvm_arm_sve_supported(CPU(cpu))) {
+    if (kvm_enabled() && kvm_arm_sve_supported()) {
         kvm_arm_sve_get_vls(CPU(cpu), kvm_supported);
     } else if (kvm_enabled()) {
         assert(!cpu_isar_feature(aa64_sve, cpu));
@@ -473,7 +473,7 @@ static void cpu_max_set_sve_max_vq(Object *obj, Visitor *v, const char *name,
         return;
     }
 
-    if (kvm_enabled() && !kvm_arm_sve_supported(CPU(cpu))) {
+    if (kvm_enabled() && !kvm_arm_sve_supported()) {
         error_setg(errp, "cannot set sve-max-vq");
         error_append_hint(errp, "SVE not supported by KVM on this host\n");
         return;
@@ -519,7 +519,7 @@ static void cpu_arm_set_sve_vq(Object *obj, Visitor *v, const char *name,
         return;
     }
 
-    if (value && kvm_enabled() && !kvm_arm_sve_supported(CPU(cpu))) {
+    if (value && kvm_enabled() && !kvm_arm_sve_supported()) {
         error_setg(errp, "cannot enable %s", name);
         error_append_hint(errp, "SVE not supported by KVM on this host\n");
         return;
@@ -556,7 +556,7 @@ static void cpu_arm_set_sve(Object *obj, Visitor *v, const char *name,
         return;
     }
 
-    if (value && kvm_enabled() && !kvm_arm_sve_supported(CPU(cpu))) {
+    if (value && kvm_enabled() && !kvm_arm_sve_supported()) {
         error_setg(errp, "'sve' feature not supported by KVM on this host");
         return;
     }
@@ -751,7 +751,7 @@ static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp)
      * uniform execution state like do_interrupt.
      */
     if (value == false) {
-        if (!kvm_enabled() || !kvm_arm_aarch32_supported(CPU(cpu))) {
+        if (!kvm_enabled() || !kvm_arm_aarch32_supported()) {
             error_setg(errp, "'aarch64' feature cannot be disabled "
                              "unless KVM is enabled and 32-bit EL1 "
                              "is supported");
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index eef3bbd1cc..7c672c78b8 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -208,9 +208,9 @@ void kvm_arm_add_vcpu_properties(Object *obj)
     }
 }
 
-bool kvm_arm_pmu_supported(CPUState *cpu)
+bool kvm_arm_pmu_supported(void)
 {
-    return kvm_check_extension(cpu->kvm_state, KVM_CAP_ARM_PMU_V3);
+    return kvm_check_extension(kvm_state, KVM_CAP_ARM_PMU_V3);
 }
 
 int kvm_arm_get_max_vm_ipa_size(MachineState *ms)
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index f09ed9f4df..3dc494aaa7 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -652,18 +652,14 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
     return true;
 }
 
-bool kvm_arm_aarch32_supported(CPUState *cpu)
+bool kvm_arm_aarch32_supported(void)
 {
-    KVMState *s = KVM_STATE(current_accel());
-
-    return kvm_check_extension(s, KVM_CAP_ARM_EL1_32BIT);
+    return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT);
 }
 
-bool kvm_arm_sve_supported(CPUState *cpu)
+bool kvm_arm_sve_supported(void)
 {
-    KVMState *s = KVM_STATE(current_accel());
-
-    return kvm_check_extension(s, KVM_CAP_ARM_SVE);
+    return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
 }
 
 QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
@@ -798,7 +794,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
         env->features &= ~(1ULL << ARM_FEATURE_PMU);
     }
     if (cpu_isar_feature(aa64_sve, cpu)) {
-        assert(kvm_arm_sve_supported(cs));
+        assert(kvm_arm_sve_supported());
         cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE;
     }
 
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 48bf5e16d5..a4ce4fd93d 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -269,29 +269,26 @@ void kvm_arm_add_vcpu_properties(Object *obj);
 
 /**
  * kvm_arm_aarch32_supported:
- * @cs: CPUState
  *
- * Returns: true if the KVM VCPU can enable AArch32 mode
+ * Returns: true if KVM can enable AArch32 mode
  * and false otherwise.
  */
-bool kvm_arm_aarch32_supported(CPUState *cs);
+bool kvm_arm_aarch32_supported(void);
 
 /**
  * kvm_arm_pmu_supported:
- * @cs: CPUState
  *
- * Returns: true if the KVM VCPU can enable its PMU
+ * Returns: true if KVM can enable the PMU
  * and false otherwise.
  */
-bool kvm_arm_pmu_supported(CPUState *cs);
+bool kvm_arm_pmu_supported(void);
 
 /**
  * kvm_arm_sve_supported:
- * @cs: CPUState
  *
- * Returns true if the KVM VCPU can enable SVE and false otherwise.
+ * Returns true if KVM can enable SVE and false otherwise.
  */
-bool kvm_arm_sve_supported(CPUState *cs);
+bool kvm_arm_sve_supported(void);
 
 /**
  * kvm_arm_get_max_vm_ipa_size:
@@ -359,17 +356,17 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
 
 static inline void kvm_arm_add_vcpu_properties(Object *obj) {}
 
-static inline bool kvm_arm_aarch32_supported(CPUState *cs)
+static inline bool kvm_arm_aarch32_supported(void)
 {
     return false;
 }
 
-static inline bool kvm_arm_pmu_supported(CPUState *cs)
+static inline bool kvm_arm_pmu_supported(void)
 {
     return false;
 }
 
-static inline bool kvm_arm_sve_supported(CPUState *cs)
+static inline bool kvm_arm_sve_supported(void)
 {
     return false;
 }
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index 6d890b2161..686f9fbf46 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -429,6 +429,112 @@ Vimm_1r          1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
                  vm=%vm_dp vd=%vd_dp size=1
     VDUP_scalar  1111 001 1 1 . 11 index:1 100 .... 11 000 q:1 . 0 .... \
                  vm=%vm_dp vd=%vd_dp size=2
+
+    ##################################################################
+    # 2-reg-misc grouping:
+    # 1111 001 11 D 11 size:2 opc1:2 Vd:4 0 opc2:4 q:1 M 0 Vm:4
+    ##################################################################
+
+    &2misc vd vm q size
+
+    @2misc       .... ... .. . .. size:2 .. .... . .... q:1 . . .... \
+                 &2misc vm=%vm_dp vd=%vd_dp
+    @2misc_q0    .... ... .. . .. size:2 .. .... . .... . . . .... \
+                 &2misc vm=%vm_dp vd=%vd_dp q=0
+    @2misc_q1    .... ... .. . .. size:2 .. .... . .... . . . .... \
+                 &2misc vm=%vm_dp vd=%vd_dp q=1
+
+    VREV64       1111 001 11 . 11 .. 00 .... 0 0000 . . 0 .... @2misc
+    VREV32       1111 001 11 . 11 .. 00 .... 0 0001 . . 0 .... @2misc
+    VREV16       1111 001 11 . 11 .. 00 .... 0 0010 . . 0 .... @2misc
+
+    VPADDL_S     1111 001 11 . 11 .. 00 .... 0 0100 . . 0 .... @2misc
+    VPADDL_U     1111 001 11 . 11 .. 00 .... 0 0101 . . 0 .... @2misc
+
+    AESE         1111 001 11 . 11 .. 00 .... 0 0110 0 . 0 .... @2misc_q1
+    AESD         1111 001 11 . 11 .. 00 .... 0 0110 1 . 0 .... @2misc_q1
+    AESMC        1111 001 11 . 11 .. 00 .... 0 0111 0 . 0 .... @2misc_q1
+    AESIMC       1111 001 11 . 11 .. 00 .... 0 0111 1 . 0 .... @2misc_q1
+
+    VCLS         1111 001 11 . 11 .. 00 .... 0 1000 . . 0 .... @2misc
+    VCLZ         1111 001 11 . 11 .. 00 .... 0 1001 . . 0 .... @2misc
+    VCNT         1111 001 11 . 11 .. 00 .... 0 1010 . . 0 .... @2misc
+
+    VMVN         1111 001 11 . 11 .. 00 .... 0 1011 . . 0 .... @2misc
+
+    VPADAL_S     1111 001 11 . 11 .. 00 .... 0 1100 . . 0 .... @2misc
+    VPADAL_U     1111 001 11 . 11 .. 00 .... 0 1101 . . 0 .... @2misc
+
+    VQABS        1111 001 11 . 11 .. 00 .... 0 1110 . . 0 .... @2misc
+    VQNEG        1111 001 11 . 11 .. 00 .... 0 1111 . . 0 .... @2misc
+
+    VCGT0        1111 001 11 . 11 .. 01 .... 0 0000 . . 0 .... @2misc
+    VCGE0        1111 001 11 . 11 .. 01 .... 0 0001 . . 0 .... @2misc
+    VCEQ0        1111 001 11 . 11 .. 01 .... 0 0010 . . 0 .... @2misc
+    VCLE0        1111 001 11 . 11 .. 01 .... 0 0011 . . 0 .... @2misc
+    VCLT0        1111 001 11 . 11 .. 01 .... 0 0100 . . 0 .... @2misc
+
+    SHA1H        1111 001 11 . 11 .. 01 .... 0 0101 1 . 0 .... @2misc_q1
+
+    VABS         1111 001 11 . 11 .. 01 .... 0 0110 . . 0 .... @2misc
+    VNEG         1111 001 11 . 11 .. 01 .... 0 0111 . . 0 .... @2misc
+
+    VCGT0_F      1111 001 11 . 11 .. 01 .... 0 1000 . . 0 .... @2misc
+    VCGE0_F      1111 001 11 . 11 .. 01 .... 0 1001 . . 0 .... @2misc
+    VCEQ0_F      1111 001 11 . 11 .. 01 .... 0 1010 . . 0 .... @2misc
+    VCLE0_F      1111 001 11 . 11 .. 01 .... 0 1011 . . 0 .... @2misc
+    VCLT0_F      1111 001 11 . 11 .. 01 .... 0 1100 . . 0 .... @2misc
+
+    VABS_F       1111 001 11 . 11 .. 01 .... 0 1110 . . 0 .... @2misc
+    VNEG_F       1111 001 11 . 11 .. 01 .... 0 1111 . . 0 .... @2misc
+
+    VSWP         1111 001 11 . 11 .. 10 .... 0 0000 . . 0 .... @2misc
+    VTRN         1111 001 11 . 11 .. 10 .... 0 0001 . . 0 .... @2misc
+    VUZP         1111 001 11 . 11 .. 10 .... 0 0010 . . 0 .... @2misc
+    VZIP         1111 001 11 . 11 .. 10 .... 0 0011 . . 0 .... @2misc
+
+    VMOVN        1111 001 11 . 11 .. 10 .... 0 0100 0 . 0 .... @2misc_q0
+    # VQMOVUN: unsigned result (source is always signed)
+    VQMOVUN      1111 001 11 . 11 .. 10 .... 0 0100 1 . 0 .... @2misc_q0
+    # VQMOVN: signed result, source may be signed (_S) or unsigned (_U)
+    VQMOVN_S     1111 001 11 . 11 .. 10 .... 0 0101 0 . 0 .... @2misc_q0
+    VQMOVN_U     1111 001 11 . 11 .. 10 .... 0 0101 1 . 0 .... @2misc_q0
+
+    VSHLL        1111 001 11 . 11 .. 10 .... 0 0110 0 . 0 .... @2misc_q0
+
+    SHA1SU1      1111 001 11 . 11 .. 10 .... 0 0111 0 . 0 .... @2misc_q1
+    SHA256SU0    1111 001 11 . 11 .. 10 .... 0 0111 1 . 0 .... @2misc_q1
+
+    VRINTN       1111 001 11 . 11 .. 10 .... 0 1000 . . 0 .... @2misc
+    VRINTX       1111 001 11 . 11 .. 10 .... 0 1001 . . 0 .... @2misc
+    VRINTA       1111 001 11 . 11 .. 10 .... 0 1010 . . 0 .... @2misc
+    VRINTZ       1111 001 11 . 11 .. 10 .... 0 1011 . . 0 .... @2misc
+
+    VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0
+
+    VRINTM       1111 001 11 . 11 .. 10 .... 0 1101 . . 0 .... @2misc
+
+    VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0
+
+    VRINTP       1111 001 11 . 11 .. 10 .... 0 1111 . . 0 .... @2misc
+
+    VCVTAS       1111 001 11 . 11 .. 11 .... 0 0000 . . 0 .... @2misc
+    VCVTAU       1111 001 11 . 11 .. 11 .... 0 0001 . . 0 .... @2misc
+    VCVTNS       1111 001 11 . 11 .. 11 .... 0 0010 . . 0 .... @2misc
+    VCVTNU       1111 001 11 . 11 .. 11 .... 0 0011 . . 0 .... @2misc
+    VCVTPS       1111 001 11 . 11 .. 11 .... 0 0100 . . 0 .... @2misc
+    VCVTPU       1111 001 11 . 11 .. 11 .... 0 0101 . . 0 .... @2misc
+    VCVTMS       1111 001 11 . 11 .. 11 .... 0 0110 . . 0 .... @2misc
+    VCVTMU       1111 001 11 . 11 .. 11 .... 0 0111 . . 0 .... @2misc
+
+    VRECPE       1111 001 11 . 11 .. 11 .... 0 1000 . . 0 .... @2misc
+    VRSQRTE      1111 001 11 . 11 .. 11 .... 0 1001 . . 0 .... @2misc
+    VRECPE_F     1111 001 11 . 11 .. 11 .... 0 1010 . . 0 .... @2misc
+    VRSQRTE_F    1111 001 11 . 11 .. 11 .... 0 1011 . . 0 .... @2misc
+    VCVT_FS      1111 001 11 . 11 .. 11 .... 0 1100 . . 0 .... @2misc
+    VCVT_FU      1111 001 11 . 11 .. 11 .... 0 1101 . . 0 .... @2misc
+    VCVT_SF      1111 001 11 . 11 .. 11 .... 0 1110 . . 0 .... @2misc
+    VCVT_UF      1111 001 11 . 11 .. 11 .... 0 1111 . . 0 .... @2misc
   ]
 
   # Subgroup for size != 0b11
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index a0e72ad694..4cef862c41 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -9534,7 +9534,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
         TCGv_i64 tcg_op = tcg_temp_new_i64();
         TCGv_i64 tcg_zero = tcg_const_i64(0);
         TCGv_i64 tcg_res = tcg_temp_new_i64();
-        NeonGenTwoDoubleOPFn *genfn;
+        NeonGenTwoDoubleOpFn *genfn;
         bool swap = false;
         int pass;
 
@@ -9576,7 +9576,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
         TCGv_i32 tcg_op = tcg_temp_new_i32();
         TCGv_i32 tcg_zero = tcg_const_i32(0);
         TCGv_i32 tcg_res = tcg_temp_new_i32();
-        NeonGenTwoSingleOPFn *genfn;
+        NeonGenTwoSingleOpFn *genfn;
         bool swap = false;
         int pass, maxpasses;
 
@@ -11370,18 +11370,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
                 genfn(tcg_res, tcg_op1, tcg_op2);
             }
 
-            if (opcode == 0xf) {
-                /* SABA, UABA: accumulating ops */
-                static NeonGenTwoOpFn * const fns[3] = {
-                    gen_helper_neon_add_u8,
-                    gen_helper_neon_add_u16,
-                    tcg_gen_add_i32,
-                };
-
-                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
-                fns[size](tcg_res, tcg_op1, tcg_res);
-            }
-
             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
 
             tcg_temp_free_i32(tcg_res);
@@ -11917,8 +11905,8 @@ static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
     } else {
         for (pass = 0; pass < maxpass; pass++) {
             TCGv_i64 tcg_op = tcg_temp_new_i64();
-            NeonGenOneOpFn *genfn;
-            static NeonGenOneOpFn * const fns[2][2] = {
+            NeonGenOne64OpFn *genfn;
+            static NeonGenOne64OpFn * const fns[2][2] = {
                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
             };
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index a5aa56bbde..f6cb921573 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -54,6 +54,107 @@ static inline int rsub_8(DisasContext *s, int x)
 #include "decode-neon-ls.inc.c"
 #include "decode-neon-shared.inc.c"
 
+/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
+ * where 0 is the least significant end of the register.
+ */
+static inline long
+neon_element_offset(int reg, int element, MemOp size)
+{
+    int element_size = 1 << size;
+    int ofs = element * element_size;
+#ifdef HOST_WORDS_BIGENDIAN
+    /* Calculate the offset assuming fully little-endian,
+     * then XOR to account for the order of the 8-byte units.
+     */
+    if (element_size < 8) {
+        ofs ^= 8 - element_size;
+    }
+#endif
+    return neon_reg_offset(reg, 0) + ofs;
+}
+
+static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
+{
+    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+    switch (mop) {
+    case MO_UB:
+        tcg_gen_ld8u_i32(var, cpu_env, offset);
+        break;
+    case MO_UW:
+        tcg_gen_ld16u_i32(var, cpu_env, offset);
+        break;
+    case MO_UL:
+        tcg_gen_ld_i32(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
+{
+    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+    switch (mop) {
+    case MO_UB:
+        tcg_gen_ld8u_i64(var, cpu_env, offset);
+        break;
+    case MO_UW:
+        tcg_gen_ld16u_i64(var, cpu_env, offset);
+        break;
+    case MO_UL:
+        tcg_gen_ld32u_i64(var, cpu_env, offset);
+        break;
+    case MO_Q:
+        tcg_gen_ld_i64(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
+{
+    long offset = neon_element_offset(reg, ele, size);
+
+    switch (size) {
+    case MO_8:
+        tcg_gen_st8_i32(var, cpu_env, offset);
+        break;
+    case MO_16:
+        tcg_gen_st16_i32(var, cpu_env, offset);
+        break;
+    case MO_32:
+        tcg_gen_st_i32(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
+{
+    long offset = neon_element_offset(reg, ele, size);
+
+    switch (size) {
+    case MO_8:
+        tcg_gen_st8_i64(var, cpu_env, offset);
+        break;
+    case MO_16:
+        tcg_gen_st16_i64(var, cpu_env, offset);
+        break;
+    case MO_32:
+        tcg_gen_st32_i64(var, cpu_env, offset);
+        break;
+    case MO_64:
+        tcg_gen_st_i64(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
 {
     int opr_sz;
@@ -1664,7 +1765,7 @@ static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
 }
 
 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
-                      NeonGenTwoSingleOPFn *fn)
+                      NeonGenTwoSingleOpFn *fn)
 {
     /* FP operations in 2-reg-and-shift group */
     TCGv_i32 tmp, shiftv;
@@ -2970,3 +3071,1091 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
                          a->q ? 16 : 8, a->q ? 16 : 8);
     return true;
 }
+
+static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
+{
+    int pass, half;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
+        TCGv_i32 tmp[2];
+
+        for (half = 0; half < 2; half++) {
+            tmp[half] = neon_load_reg(a->vm, pass * 2 + half);
+            switch (a->size) {
+            case 0:
+                tcg_gen_bswap32_i32(tmp[half], tmp[half]);
+                break;
+            case 1:
+                gen_swap_half(tmp[half], tmp[half]);
+                break;
+            case 2:
+                break;
+            default:
+                g_assert_not_reached();
+            }
+        }
+        neon_store_reg(a->vd, pass * 2, tmp[1]);
+        neon_store_reg(a->vd, pass * 2 + 1, tmp[0]);
+    }
+    return true;
+}
+
+static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
+                              NeonGenWidenFn *widenfn,
+                              NeonGenTwo64OpFn *opfn,
+                              NeonGenTwo64OpFn *accfn)
+{
+    /*
+     * Pairwise long operations: widen both halves of the pair,
+     * combine the pairs with the opfn, and then possibly accumulate
+     * into the destination with the accfn.
+     */
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!widenfn) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    for (pass = 0; pass < a->q + 1; pass++) {
+        TCGv_i32 tmp;
+        TCGv_i64 rm0_64, rm1_64, rd_64;
+
+        rm0_64 = tcg_temp_new_i64();
+        rm1_64 = tcg_temp_new_i64();
+        rd_64 = tcg_temp_new_i64();
+        tmp = neon_load_reg(a->vm, pass * 2);
+        widenfn(rm0_64, tmp);
+        tcg_temp_free_i32(tmp);
+        tmp = neon_load_reg(a->vm, pass * 2 + 1);
+        widenfn(rm1_64, tmp);
+        tcg_temp_free_i32(tmp);
+        opfn(rd_64, rm0_64, rm1_64);
+        tcg_temp_free_i64(rm0_64);
+        tcg_temp_free_i64(rm1_64);
+
+        if (accfn) {
+            TCGv_i64 tmp64 = tcg_temp_new_i64();
+            neon_load_reg64(tmp64, a->vd + pass);
+            accfn(rd_64, tmp64, rd_64);
+            tcg_temp_free_i64(tmp64);
+        }
+        neon_store_reg64(rd_64, a->vd + pass);
+        tcg_temp_free_i64(rd_64);
+    }
+    return true;
+}
+
+static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_s8,
+        gen_helper_neon_widen_s16,
+        tcg_gen_ext_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
+}
+
+static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
+}
+
+static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_s8,
+        gen_helper_neon_widen_s16,
+        tcg_gen_ext_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        gen_helper_neon_addl_u16,
+        gen_helper_neon_addl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
+                             accfn[a->size]);
+}
+
+static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        gen_helper_neon_addl_u16,
+        gen_helper_neon_addl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
+                             accfn[a->size]);
+}
+
+typedef void ZipFn(TCGv_ptr, TCGv_ptr);
+
+static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
+                       ZipFn *fn)
+{
+    TCGv_ptr pd, pm;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!fn) {
+        /* Bad size or size/q combination */
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    pd = vfp_reg_ptr(true, a->vd);
+    pm = vfp_reg_ptr(true, a->vm);
+    fn(pd, pm);
+    tcg_temp_free_ptr(pd);
+    tcg_temp_free_ptr(pm);
+    return true;
+}
+
+static bool trans_VUZP(DisasContext *s, arg_2misc *a)
+{
+    static ZipFn * const fn[2][4] = {
+        {
+            gen_helper_neon_unzip8,
+            gen_helper_neon_unzip16,
+            NULL,
+            NULL,
+        }, {
+            gen_helper_neon_qunzip8,
+            gen_helper_neon_qunzip16,
+            gen_helper_neon_qunzip32,
+            NULL,
+        }
+    };
+    return do_zip_uzp(s, a, fn[a->q][a->size]);
+}
+
+static bool trans_VZIP(DisasContext *s, arg_2misc *a)
+{
+    static ZipFn * const fn[2][4] = {
+        {
+            gen_helper_neon_zip8,
+            gen_helper_neon_zip16,
+            NULL,
+            NULL,
+        }, {
+            gen_helper_neon_qzip8,
+            gen_helper_neon_qzip16,
+            gen_helper_neon_qzip32,
+            NULL,
+        }
+    };
+    return do_zip_uzp(s, a, fn[a->q][a->size]);
+}
+
+static bool do_vmovn(DisasContext *s, arg_2misc *a,
+                     NeonGenNarrowEnvFn *narrowfn)
+{
+    TCGv_i64 rm;
+    TCGv_i32 rd0, rd1;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!narrowfn) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rm = tcg_temp_new_i64();
+    rd0 = tcg_temp_new_i32();
+    rd1 = tcg_temp_new_i32();
+
+    neon_load_reg64(rm, a->vm);
+    narrowfn(rd0, cpu_env, rm);
+    neon_load_reg64(rm, a->vm + 1);
+    narrowfn(rd1, cpu_env, rm);
+    neon_store_reg(a->vd, 0, rd0);
+    neon_store_reg(a->vd, 1, rd1);
+    tcg_temp_free_i64(rm);
+    return true;
+}
+
+#define DO_VMOVN(INSN, FUNC)                                    \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        static NeonGenNarrowEnvFn * const narrowfn[] = {        \
+            FUNC##8,                                            \
+            FUNC##16,                                           \
+            FUNC##32,                                           \
+            NULL,                                               \
+        };                                                      \
+        return do_vmovn(s, a, narrowfn[a->size]);               \
+    }
+
+DO_VMOVN(VMOVN, gen_neon_narrow_u)
+DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
+DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
+DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
+
+static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
+{
+    TCGv_i32 rm0, rm1;
+    TCGv_i64 rd;
+    static NeonGenWidenFn * const widenfns[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+        NULL,
+    };
+    NeonGenWidenFn *widenfn = widenfns[a->size];
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!widenfn) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rd = tcg_temp_new_i64();
+
+    rm0 = neon_load_reg(a->vm, 0);
+    rm1 = neon_load_reg(a->vm, 1);
+
+    widenfn(rd, rm0);
+    tcg_gen_shli_i64(rd, rd, 8 << a->size);
+    neon_store_reg64(rd, a->vd);
+    widenfn(rd, rm1);
+    tcg_gen_shli_i64(rd, rd, 8 << a->size);
+    neon_store_reg64(rd, a->vd + 1);
+
+    tcg_temp_free_i64(rd);
+    tcg_temp_free_i32(rm0);
+    tcg_temp_free_i32(rm1);
+    return true;
+}
+
+static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 ahp, tmp, tmp2, tmp3;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !dc_isar_feature(aa32_fp16_spconv, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm & 1) || (a->size != 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = get_fpstatus_ptr(true);
+    ahp = get_ahp_flag();
+    tmp = neon_load_reg(a->vm, 0);
+    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+    tmp2 = neon_load_reg(a->vm, 1);
+    gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
+    tcg_gen_shli_i32(tmp2, tmp2, 16);
+    tcg_gen_or_i32(tmp2, tmp2, tmp);
+    tcg_temp_free_i32(tmp);
+    tmp = neon_load_reg(a->vm, 2);
+    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+    tmp3 = neon_load_reg(a->vm, 3);
+    neon_store_reg(a->vd, 0, tmp2);
+    gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
+    tcg_gen_shli_i32(tmp3, tmp3, 16);
+    tcg_gen_or_i32(tmp3, tmp3, tmp);
+    neon_store_reg(a->vd, 1, tmp3);
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(ahp);
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 ahp, tmp, tmp2, tmp3;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !dc_isar_feature(aa32_fp16_spconv, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd & 1) || (a->size != 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = get_fpstatus_ptr(true);
+    ahp = get_ahp_flag();
+    tmp3 = tcg_temp_new_i32();
+    tmp = neon_load_reg(a->vm, 0);
+    tmp2 = neon_load_reg(a->vm, 1);
+    tcg_gen_ext16u_i32(tmp3, tmp);
+    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
+    neon_store_reg(a->vd, 0, tmp3);
+    tcg_gen_shri_i32(tmp, tmp, 16);
+    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
+    neon_store_reg(a->vd, 1, tmp);
+    tmp3 = tcg_temp_new_i32();
+    tcg_gen_ext16u_i32(tmp3, tmp2);
+    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
+    neon_store_reg(a->vd, 2, tmp3);
+    tcg_gen_shri_i32(tmp2, tmp2, 16);
+    gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
+    neon_store_reg(a->vd, 3, tmp2);
+    tcg_temp_free_i32(ahp);
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
+{
+    int vec_size = a->q ? 16 : 8;
+    int rd_ofs = neon_reg_offset(a->vd, 0);
+    int rm_ofs = neon_reg_offset(a->vm, 0);
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
+
+    return true;
+}
+
+#define DO_2MISC_VEC(INSN, FN)                                  \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_2misc_vec(s, a, FN);                          \
+    }
+
+DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
+DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
+DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
+DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
+DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
+DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
+DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
+
+static bool trans_VMVN(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_2misc_vec(s, a, tcg_gen_gvec_not);
+}
+
+#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
+    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
+                         uint32_t rm_ofs, uint32_t oprsz,               \
+                         uint32_t maxsz)                                \
+    {                                                                   \
+        tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
+                           DATA, FUNC);                                 \
+    }
+
+#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
+    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
+                         uint32_t rm_ofs, uint32_t oprsz,               \
+                         uint32_t maxsz)                                \
+    {                                                                   \
+        tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
+    }
+
+WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
+WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
+WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
+WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
+WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
+WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
+WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
+
+#define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
+            return false;                                       \
+        }                                                       \
+        return do_2misc_vec(s, a, gen_##INSN);                  \
+    }
+
+DO_2M_CRYPTO(AESE, aa32_aes, 0)
+DO_2M_CRYPTO(AESD, aa32_aes, 0)
+DO_2M_CRYPTO(AESMC, aa32_aes, 0)
+DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
+DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
+DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
+DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
+
+static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
+{
+    int pass;
+
+    /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!fn) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 tmp = neon_load_reg(a->vm, pass);
+        fn(tmp, tmp);
+        neon_store_reg(a->vd, pass, tmp);
+    }
+
+    return true;
+}
+
+static bool trans_VREV32(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        tcg_gen_bswap32_i32,
+        gen_swap_half,
+        NULL,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VREV16(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_2misc(s, a, gen_rev16);
+}
+
+static bool trans_VCLS(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_helper_neon_cls_s8,
+        gen_helper_neon_cls_s16,
+        gen_helper_neon_cls_s32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
+{
+    tcg_gen_clzi_i32(rd, rm, 32);
+}
+
+static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_helper_neon_clz_u8,
+        gen_helper_neon_clz_u16,
+        do_VCLZ_32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VCNT(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_2misc(s, a, gen_helper_neon_cnt_u8);
+}
+
+static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 2) {
+        return false;
+    }
+    /* TODO: FP16 : size == 1 */
+    return do_2misc(s, a, gen_helper_vfp_abss);
+}
+
+static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 2) {
+        return false;
+    }
+    /* TODO: FP16 : size == 1 */
+    return do_2misc(s, a, gen_helper_vfp_negs);
+}
+
+static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 2) {
+        return false;
+    }
+    return do_2misc(s, a, gen_helper_recpe_u32);
+}
+
+static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 2) {
+        return false;
+    }
+    return do_2misc(s, a, gen_helper_rsqrte_u32);
+}
+
+#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
+    static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
+    {                                                   \
+        FUNC(d, cpu_env, m);                            \
+    }
+
+WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
+WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
+WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
+WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
+WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
+WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
+
+static bool trans_VQABS(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_VQABS_s8,
+        gen_VQABS_s16,
+        gen_VQABS_s32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_VQNEG_s8,
+        gen_VQNEG_s16,
+        gen_VQNEG_s32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool do_2misc_fp(DisasContext *s, arg_2misc *a,
+                        NeonGenOneSingleOpFn *fn)
+{
+    int pass;
+    TCGv_ptr fpst;
+
+    /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size != 2) {
+        /* TODO: FP16 will be the size == 1 case */
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = get_fpstatus_ptr(1);
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 tmp = neon_load_reg(a->vm, pass);
+        fn(tmp, tmp, fpst);
+        neon_store_reg(a->vd, pass, tmp);
+    }
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+#define DO_2MISC_FP(INSN, FUNC)                                 \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_2misc_fp(s, a, FUNC);                         \
+    }
+
+DO_2MISC_FP(VRECPE_F, gen_helper_recpe_f32)
+DO_2MISC_FP(VRSQRTE_F, gen_helper_rsqrte_f32)
+DO_2MISC_FP(VCVT_FS, gen_helper_vfp_sitos)
+DO_2MISC_FP(VCVT_FU, gen_helper_vfp_uitos)
+DO_2MISC_FP(VCVT_SF, gen_helper_vfp_tosizs)
+DO_2MISC_FP(VCVT_UF, gen_helper_vfp_touizs)
+
+static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+    return do_2misc_fp(s, a, gen_helper_rints_exact);
+}
+
+#define WRAP_FP_CMP0_FWD(WRAPNAME, FUNC)                        \
+    static void WRAPNAME(TCGv_i32 d, TCGv_i32 m, TCGv_ptr fpst) \
+    {                                                           \
+        TCGv_i32 zero = tcg_const_i32(0);                       \
+        FUNC(d, m, zero, fpst);                                 \
+        tcg_temp_free_i32(zero);                                \
+    }
+#define WRAP_FP_CMP0_REV(WRAPNAME, FUNC)                        \
+    static void WRAPNAME(TCGv_i32 d, TCGv_i32 m, TCGv_ptr fpst) \
+    {                                                           \
+        TCGv_i32 zero = tcg_const_i32(0);                       \
+        FUNC(d, zero, m, fpst);                                 \
+        tcg_temp_free_i32(zero);                                \
+    }
+
+#define DO_FP_CMP0(INSN, FUNC, REV)                             \
+    WRAP_FP_CMP0_##REV(gen_##INSN, FUNC)                        \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_2misc_fp(s, a, gen_##INSN);                   \
+    }
+
+DO_FP_CMP0(VCGT0_F, gen_helper_neon_cgt_f32, FWD)
+DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD)
+DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD)
+DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV)
+DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV)
+
+static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode)
+{
+    /*
+     * Handle a VRINT* operation by iterating 32 bits at a time,
+     * with a specified rounding mode in operation.
+     */
+    int pass;
+    TCGv_ptr fpst;
+    TCGv_i32 tcg_rmode;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size != 2) {
+        /* TODO: FP16 will be the size == 1 case */
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = get_fpstatus_ptr(1);
+    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+    gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 tmp = neon_load_reg(a->vm, pass);
+        gen_helper_rints(tmp, tmp, fpst);
+        neon_store_reg(a->vd, pass, tmp);
+    }
+    gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
+    tcg_temp_free_i32(tcg_rmode);
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+#define DO_VRINT(INSN, RMODE)                                   \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_vrint(s, a, RMODE);                           \
+    }
+
+DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
+DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
+DO_VRINT(VRINTZ, FPROUNDING_ZERO)
+DO_VRINT(VRINTM, FPROUNDING_NEGINF)
+DO_VRINT(VRINTP, FPROUNDING_POSINF)
+
+static bool do_vcvt(DisasContext *s, arg_2misc *a, int rmode, bool is_signed)
+{
+    /*
+     * Handle a VCVT* operation by iterating 32 bits at a time,
+     * with a specified rounding mode in operation.
+     */
+    int pass;
+    TCGv_ptr fpst;
+    TCGv_i32 tcg_rmode, tcg_shift;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size != 2) {
+        /* TODO: FP16 will be the size == 1 case */
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = get_fpstatus_ptr(1);
+    tcg_shift = tcg_const_i32(0);
+    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+    gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        TCGv_i32 tmp = neon_load_reg(a->vm, pass);
+        if (is_signed) {
+            gen_helper_vfp_tosls(tmp, tmp, tcg_shift, fpst);
+        } else {
+            gen_helper_vfp_touls(tmp, tmp, tcg_shift, fpst);
+        }
+        neon_store_reg(a->vd, pass, tmp);
+    }
+    gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
+    tcg_temp_free_i32(tcg_rmode);
+    tcg_temp_free_i32(tcg_shift);
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+#define DO_VCVT(INSN, RMODE, SIGNED)                            \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_vcvt(s, a, RMODE, SIGNED);                    \
+    }
+
+DO_VCVT(VCVTAU, FPROUNDING_TIEAWAY, false)
+DO_VCVT(VCVTAS, FPROUNDING_TIEAWAY, true)
+DO_VCVT(VCVTNU, FPROUNDING_TIEEVEN, false)
+DO_VCVT(VCVTNS, FPROUNDING_TIEEVEN, true)
+DO_VCVT(VCVTPU, FPROUNDING_POSINF, false)
+DO_VCVT(VCVTPS, FPROUNDING_POSINF, true)
+DO_VCVT(VCVTMU, FPROUNDING_NEGINF, false)
+DO_VCVT(VCVTMS, FPROUNDING_NEGINF, true)
+
+static bool trans_VSWP(DisasContext *s, arg_2misc *a)
+{
+    TCGv_i64 rm, rd;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size != 0) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rm = tcg_temp_new_i64();
+    rd = tcg_temp_new_i64();
+    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
+        neon_load_reg64(rm, a->vm + pass);
+        neon_load_reg64(rd, a->vd + pass);
+        neon_store_reg64(rm, a->vd + pass);
+        neon_store_reg64(rd, a->vm + pass);
+    }
+    tcg_temp_free_i64(rm);
+    tcg_temp_free_i64(rd);
+
+    return true;
+}
+static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
+{
+    TCGv_i32 rd, tmp;
+
+    rd = tcg_temp_new_i32();
+    tmp = tcg_temp_new_i32();
+
+    tcg_gen_shli_i32(rd, t0, 8);
+    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
+    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
+    tcg_gen_or_i32(rd, rd, tmp);
+
+    tcg_gen_shri_i32(t1, t1, 8);
+    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
+    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
+    tcg_gen_or_i32(t1, t1, tmp);
+    tcg_gen_mov_i32(t0, rd);
+
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(rd);
+}
+
+static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
+{
+    TCGv_i32 rd, tmp;
+
+    rd = tcg_temp_new_i32();
+    tmp = tcg_temp_new_i32();
+
+    tcg_gen_shli_i32(rd, t0, 16);
+    tcg_gen_andi_i32(tmp, t1, 0xffff);
+    tcg_gen_or_i32(rd, rd, tmp);
+    tcg_gen_shri_i32(t1, t1, 16);
+    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
+    tcg_gen_or_i32(t1, t1, tmp);
+    tcg_gen_mov_i32(t0, rd);
+
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(rd);
+}
+
+static bool trans_VTRN(DisasContext *s, arg_2misc *a)
+{
+    TCGv_i32 tmp, tmp2;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (a->size == 2) {
+        for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
+            tmp = neon_load_reg(a->vm, pass);
+            tmp2 = neon_load_reg(a->vd, pass + 1);
+            neon_store_reg(a->vm, pass, tmp2);
+            neon_store_reg(a->vd, pass + 1, tmp);
+        }
+    } else {
+        for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+            tmp = neon_load_reg(a->vm, pass);
+            tmp2 = neon_load_reg(a->vd, pass);
+            if (a->size == 0) {
+                gen_neon_trn_u8(tmp, tmp2);
+            } else {
+                gen_neon_trn_u16(tmp, tmp2);
+            }
+            neon_store_reg(a->vm, pass, tmp2);
+            neon_store_reg(a->vd, pass, tmp);
+        }
+    }
+    return true;
+}
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index e1a9017598..bf31b18657 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -119,15 +119,14 @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
         if (s->v7m_lspact) {
             /*
              * Lazy state saving affects external memory and also the NVIC,
-             * so we must mark it as an IO operation for icount.
+             * so we must mark it as an IO operation for icount (and cause
+             * this to be the last insn in the TB).
              */
             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+                s->base.is_jmp = DISAS_UPDATE;
                 gen_io_start();
             }
             gen_helper_v7m_preserve_fp_state(cpu_env);
-            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-                gen_io_end();
-            }
             /*
              * If the preserve_fp_state helper doesn't throw an exception
              * then it will clear LSPACT; we don't need to repeat this for
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 6d18892ade..795964da1f 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -378,9 +378,9 @@ static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 }
 
 /* Swap low and high halfwords.  */
-static void gen_swap_half(TCGv_i32 var)
+static void gen_swap_half(TCGv_i32 dest, TCGv_i32 var)
 {
-    tcg_gen_rotri_i32(var, var, 16);
+    tcg_gen_rotri_i32(dest, var, 16);
 }
 
 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
@@ -1133,25 +1133,6 @@ neon_reg_offset (int reg, int n)
     return vfp_reg_offset(0, sreg);
 }
 
-/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
- * where 0 is the least significant end of the register.
- */
-static inline long
-neon_element_offset(int reg, int element, MemOp size)
-{
-    int element_size = 1 << size;
-    int ofs = element * element_size;
-#ifdef HOST_WORDS_BIGENDIAN
-    /* Calculate the offset assuming fully little-endian,
-     * then XOR to account for the order of the 8-byte units.
-     */
-    if (element_size < 8) {
-        ofs ^= 8 - element_size;
-    }
-#endif
-    return neon_reg_offset(reg, 0) + ofs;
-}
-
 static TCGv_i32 neon_load_reg(int reg, int pass)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
@@ -1159,94 +1140,12 @@ static TCGv_i32 neon_load_reg(int reg, int pass)
     return tmp;
 }
 
-static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
-{
-    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
-
-    switch (mop) {
-    case MO_UB:
-        tcg_gen_ld8u_i32(var, cpu_env, offset);
-        break;
-    case MO_UW:
-        tcg_gen_ld16u_i32(var, cpu_env, offset);
-        break;
-    case MO_UL:
-        tcg_gen_ld_i32(var, cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-}
-
-static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
-{
-    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
-
-    switch (mop) {
-    case MO_UB:
-        tcg_gen_ld8u_i64(var, cpu_env, offset);
-        break;
-    case MO_UW:
-        tcg_gen_ld16u_i64(var, cpu_env, offset);
-        break;
-    case MO_UL:
-        tcg_gen_ld32u_i64(var, cpu_env, offset);
-        break;
-    case MO_Q:
-        tcg_gen_ld_i64(var, cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-}
-
 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
 {
     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
     tcg_temp_free_i32(var);
 }
 
-static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
-{
-    long offset = neon_element_offset(reg, ele, size);
-
-    switch (size) {
-    case MO_8:
-        tcg_gen_st8_i32(var, cpu_env, offset);
-        break;
-    case MO_16:
-        tcg_gen_st16_i32(var, cpu_env, offset);
-        break;
-    case MO_32:
-        tcg_gen_st_i32(var, cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-}
-
-static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
-{
-    long offset = neon_element_offset(reg, ele, size);
-
-    switch (size) {
-    case MO_8:
-        tcg_gen_st8_i64(var, cpu_env, offset);
-        break;
-    case MO_16:
-        tcg_gen_st16_i64(var, cpu_env, offset);
-        break;
-    case MO_32:
-        tcg_gen_st32_i64(var, cpu_env, offset);
-        break;
-    case MO_64:
-        tcg_gen_st_i64(var, cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-}
-
 static inline void neon_load_reg64(TCGv_i64 var, int reg)
 {
     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
@@ -2934,377 +2833,6 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
     gen_rfe(s, pc, load_cpu_field(spsr));
 }
 
-#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
-
-static int gen_neon_unzip(int rd, int rm, int size, int q)
-{
-    TCGv_ptr pd, pm;
-    
-    if (!q && size == 2) {
-        return 1;
-    }
-    pd = vfp_reg_ptr(true, rd);
-    pm = vfp_reg_ptr(true, rm);
-    if (q) {
-        switch (size) {
-        case 0:
-            gen_helper_neon_qunzip8(pd, pm);
-            break;
-        case 1:
-            gen_helper_neon_qunzip16(pd, pm);
-            break;
-        case 2:
-            gen_helper_neon_qunzip32(pd, pm);
-            break;
-        default:
-            abort();
-        }
-    } else {
-        switch (size) {
-        case 0:
-            gen_helper_neon_unzip8(pd, pm);
-            break;
-        case 1:
-            gen_helper_neon_unzip16(pd, pm);
-            break;
-        default:
-            abort();
-        }
-    }
-    tcg_temp_free_ptr(pd);
-    tcg_temp_free_ptr(pm);
-    return 0;
-}
-
-static int gen_neon_zip(int rd, int rm, int size, int q)
-{
-    TCGv_ptr pd, pm;
-
-    if (!q && size == 2) {
-        return 1;
-    }
-    pd = vfp_reg_ptr(true, rd);
-    pm = vfp_reg_ptr(true, rm);
-    if (q) {
-        switch (size) {
-        case 0:
-            gen_helper_neon_qzip8(pd, pm);
-            break;
-        case 1:
-            gen_helper_neon_qzip16(pd, pm);
-            break;
-        case 2:
-            gen_helper_neon_qzip32(pd, pm);
-            break;
-        default:
-            abort();
-        }
-    } else {
-        switch (size) {
-        case 0:
-            gen_helper_neon_zip8(pd, pm);
-            break;
-        case 1:
-            gen_helper_neon_zip16(pd, pm);
-            break;
-        default:
-            abort();
-        }
-    }
-    tcg_temp_free_ptr(pd);
-    tcg_temp_free_ptr(pm);
-    return 0;
-}
-
-static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
-{
-    TCGv_i32 rd, tmp;
-
-    rd = tcg_temp_new_i32();
-    tmp = tcg_temp_new_i32();
-
-    tcg_gen_shli_i32(rd, t0, 8);
-    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
-    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
-    tcg_gen_or_i32(rd, rd, tmp);
-
-    tcg_gen_shri_i32(t1, t1, 8);
-    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
-    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
-    tcg_gen_or_i32(t1, t1, tmp);
-    tcg_gen_mov_i32(t0, rd);
-
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(rd);
-}
-
-static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
-{
-    TCGv_i32 rd, tmp;
-
-    rd = tcg_temp_new_i32();
-    tmp = tcg_temp_new_i32();
-
-    tcg_gen_shli_i32(rd, t0, 16);
-    tcg_gen_andi_i32(tmp, t1, 0xffff);
-    tcg_gen_or_i32(rd, rd, tmp);
-    tcg_gen_shri_i32(t1, t1, 16);
-    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
-    tcg_gen_or_i32(t1, t1, tmp);
-    tcg_gen_mov_i32(t0, rd);
-
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(rd);
-}
-
-static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
-{
-    switch (size) {
-    case 0: gen_helper_neon_narrow_u8(dest, src); break;
-    case 1: gen_helper_neon_narrow_u16(dest, src); break;
-    case 2: tcg_gen_extrl_i64_i32(dest, src); break;
-    default: abort();
-    }
-}
-
-static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
-{
-    switch (size) {
-    case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
-    case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
-    case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
-    default: abort();
-    }
-}
-
-static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
-{
-    switch (size) {
-    case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
-    case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
-    case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
-    default: abort();
-    }
-}
-
-static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
-{
-    switch (size) {
-    case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
-    case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
-    case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
-    default: abort();
-    }
-}
-
-static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
-{
-    if (u) {
-        switch (size) {
-        case 0: gen_helper_neon_widen_u8(dest, src); break;
-        case 1: gen_helper_neon_widen_u16(dest, src); break;
-        case 2: tcg_gen_extu_i32_i64(dest, src); break;
-        default: abort();
-        }
-    } else {
-        switch (size) {
-        case 0: gen_helper_neon_widen_s8(dest, src); break;
-        case 1: gen_helper_neon_widen_s16(dest, src); break;
-        case 2: tcg_gen_ext_i32_i64(dest, src); break;
-        default: abort();
-        }
-    }
-    tcg_temp_free_i32(src);
-}
-
-static inline void gen_neon_addl(int size)
-{
-    switch (size) {
-    case 0: gen_helper_neon_addl_u16(CPU_V001); break;
-    case 1: gen_helper_neon_addl_u32(CPU_V001); break;
-    case 2: tcg_gen_add_i64(CPU_V001); break;
-    default: abort();
-    }
-}
-
-static void gen_neon_narrow_op(int op, int u, int size,
-                               TCGv_i32 dest, TCGv_i64 src)
-{
-    if (op) {
-        if (u) {
-            gen_neon_unarrow_sats(size, dest, src);
-        } else {
-            gen_neon_narrow(size, dest, src);
-        }
-    } else {
-        if (u) {
-            gen_neon_narrow_satu(size, dest, src);
-        } else {
-            gen_neon_narrow_sats(size, dest, src);
-        }
-    }
-}
-
-/* Symbolic constants for op fields for Neon 2-register miscellaneous.
- * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
- * table A7-13.
- */
-#define NEON_2RM_VREV64 0
-#define NEON_2RM_VREV32 1
-#define NEON_2RM_VREV16 2
-#define NEON_2RM_VPADDL 4
-#define NEON_2RM_VPADDL_U 5
-#define NEON_2RM_AESE 6 /* Includes AESD */
-#define NEON_2RM_AESMC 7 /* Includes AESIMC */
-#define NEON_2RM_VCLS 8
-#define NEON_2RM_VCLZ 9
-#define NEON_2RM_VCNT 10
-#define NEON_2RM_VMVN 11
-#define NEON_2RM_VPADAL 12
-#define NEON_2RM_VPADAL_U 13
-#define NEON_2RM_VQABS 14
-#define NEON_2RM_VQNEG 15
-#define NEON_2RM_VCGT0 16
-#define NEON_2RM_VCGE0 17
-#define NEON_2RM_VCEQ0 18
-#define NEON_2RM_VCLE0 19
-#define NEON_2RM_VCLT0 20
-#define NEON_2RM_SHA1H 21
-#define NEON_2RM_VABS 22
-#define NEON_2RM_VNEG 23
-#define NEON_2RM_VCGT0_F 24
-#define NEON_2RM_VCGE0_F 25
-#define NEON_2RM_VCEQ0_F 26
-#define NEON_2RM_VCLE0_F 27
-#define NEON_2RM_VCLT0_F 28
-#define NEON_2RM_VABS_F 30
-#define NEON_2RM_VNEG_F 31
-#define NEON_2RM_VSWP 32
-#define NEON_2RM_VTRN 33
-#define NEON_2RM_VUZP 34
-#define NEON_2RM_VZIP 35
-#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
-#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
-#define NEON_2RM_VSHLL 38
-#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
-#define NEON_2RM_VRINTN 40
-#define NEON_2RM_VRINTX 41
-#define NEON_2RM_VRINTA 42
-#define NEON_2RM_VRINTZ 43
-#define NEON_2RM_VCVT_F16_F32 44
-#define NEON_2RM_VRINTM 45
-#define NEON_2RM_VCVT_F32_F16 46
-#define NEON_2RM_VRINTP 47
-#define NEON_2RM_VCVTAU 48
-#define NEON_2RM_VCVTAS 49
-#define NEON_2RM_VCVTNU 50
-#define NEON_2RM_VCVTNS 51
-#define NEON_2RM_VCVTPU 52
-#define NEON_2RM_VCVTPS 53
-#define NEON_2RM_VCVTMU 54
-#define NEON_2RM_VCVTMS 55
-#define NEON_2RM_VRECPE 56
-#define NEON_2RM_VRSQRTE 57
-#define NEON_2RM_VRECPE_F 58
-#define NEON_2RM_VRSQRTE_F 59
-#define NEON_2RM_VCVT_FS 60
-#define NEON_2RM_VCVT_FU 61
-#define NEON_2RM_VCVT_SF 62
-#define NEON_2RM_VCVT_UF 63
-
-static bool neon_2rm_is_v8_op(int op)
-{
-    /* Return true if this neon 2reg-misc op is ARMv8 and up */
-    switch (op) {
-    case NEON_2RM_VRINTN:
-    case NEON_2RM_VRINTA:
-    case NEON_2RM_VRINTM:
-    case NEON_2RM_VRINTP:
-    case NEON_2RM_VRINTZ:
-    case NEON_2RM_VRINTX:
-    case NEON_2RM_VCVTAU:
-    case NEON_2RM_VCVTAS:
-    case NEON_2RM_VCVTNU:
-    case NEON_2RM_VCVTNS:
-    case NEON_2RM_VCVTPU:
-    case NEON_2RM_VCVTPS:
-    case NEON_2RM_VCVTMU:
-    case NEON_2RM_VCVTMS:
-        return true;
-    default:
-        return false;
-    }
-}
-
-/* Each entry in this array has bit n set if the insn allows
- * size value n (otherwise it will UNDEF). Since unallocated
- * op values will have no bits set they always UNDEF.
- */
-static const uint8_t neon_2rm_sizes[] = {
-    [NEON_2RM_VREV64] = 0x7,
-    [NEON_2RM_VREV32] = 0x3,
-    [NEON_2RM_VREV16] = 0x1,
-    [NEON_2RM_VPADDL] = 0x7,
-    [NEON_2RM_VPADDL_U] = 0x7,
-    [NEON_2RM_AESE] = 0x1,
-    [NEON_2RM_AESMC] = 0x1,
-    [NEON_2RM_VCLS] = 0x7,
-    [NEON_2RM_VCLZ] = 0x7,
-    [NEON_2RM_VCNT] = 0x1,
-    [NEON_2RM_VMVN] = 0x1,
-    [NEON_2RM_VPADAL] = 0x7,
-    [NEON_2RM_VPADAL_U] = 0x7,
-    [NEON_2RM_VQABS] = 0x7,
-    [NEON_2RM_VQNEG] = 0x7,
-    [NEON_2RM_VCGT0] = 0x7,
-    [NEON_2RM_VCGE0] = 0x7,
-    [NEON_2RM_VCEQ0] = 0x7,
-    [NEON_2RM_VCLE0] = 0x7,
-    [NEON_2RM_VCLT0] = 0x7,
-    [NEON_2RM_SHA1H] = 0x4,
-    [NEON_2RM_VABS] = 0x7,
-    [NEON_2RM_VNEG] = 0x7,
-    [NEON_2RM_VCGT0_F] = 0x4,
-    [NEON_2RM_VCGE0_F] = 0x4,
-    [NEON_2RM_VCEQ0_F] = 0x4,
-    [NEON_2RM_VCLE0_F] = 0x4,
-    [NEON_2RM_VCLT0_F] = 0x4,
-    [NEON_2RM_VABS_F] = 0x4,
-    [NEON_2RM_VNEG_F] = 0x4,
-    [NEON_2RM_VSWP] = 0x1,
-    [NEON_2RM_VTRN] = 0x7,
-    [NEON_2RM_VUZP] = 0x7,
-    [NEON_2RM_VZIP] = 0x7,
-    [NEON_2RM_VMOVN] = 0x7,
-    [NEON_2RM_VQMOVN] = 0x7,
-    [NEON_2RM_VSHLL] = 0x7,
-    [NEON_2RM_SHA1SU1] = 0x4,
-    [NEON_2RM_VRINTN] = 0x4,
-    [NEON_2RM_VRINTX] = 0x4,
-    [NEON_2RM_VRINTA] = 0x4,
-    [NEON_2RM_VRINTZ] = 0x4,
-    [NEON_2RM_VCVT_F16_F32] = 0x2,
-    [NEON_2RM_VRINTM] = 0x4,
-    [NEON_2RM_VCVT_F32_F16] = 0x2,
-    [NEON_2RM_VRINTP] = 0x4,
-    [NEON_2RM_VCVTAU] = 0x4,
-    [NEON_2RM_VCVTAS] = 0x4,
-    [NEON_2RM_VCVTNU] = 0x4,
-    [NEON_2RM_VCVTNS] = 0x4,
-    [NEON_2RM_VCVTPU] = 0x4,
-    [NEON_2RM_VCVTPS] = 0x4,
-    [NEON_2RM_VCVTMU] = 0x4,
-    [NEON_2RM_VCVTMS] = 0x4,
-    [NEON_2RM_VRECPE] = 0x4,
-    [NEON_2RM_VRSQRTE] = 0x4,
-    [NEON_2RM_VRECPE_F] = 0x4,
-    [NEON_2RM_VRSQRTE_F] = 0x4,
-    [NEON_2RM_VCVT_FS] = 0x4,
-    [NEON_2RM_VCVT_FU] = 0x4,
-    [NEON_2RM_VCVT_SF] = 0x4,
-    [NEON_2RM_VCVT_UF] = 0x4,
-};
-
 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
                             uint32_t opr_sz, uint32_t max_sz,
                             gen_helper_gvec_3_ptr *fn)
@@ -5016,573 +4544,6 @@ void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
 }
 
-/* Translate a NEON data processing instruction.  Return nonzero if the
-   instruction is invalid.
-   We process data in a mixture of 32-bit and 64-bit chunks.
-   Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
-
-static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
-{
-    int op;
-    int q;
-    int rd, rm, rd_ofs, rm_ofs;
-    int size;
-    int pass;
-    int u;
-    int vec_size;
-    TCGv_i32 tmp, tmp2, tmp3;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return 1;
-    }
-
-    /* FIXME: this access check should not take precedence over UNDEF
-     * for invalid encodings; we will generate incorrect syndrome information
-     * for attempts to execute invalid vfp/neon encodings with FP disabled.
-     */
-    if (s->fp_excp_el) {
-        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
-                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
-        return 0;
-    }
-
-    if (!s->vfp_enabled)
-      return 1;
-    q = (insn & (1 << 6)) != 0;
-    u = (insn >> 24) & 1;
-    VFP_DREG_D(rd, insn);
-    VFP_DREG_M(rm, insn);
-    size = (insn >> 20) & 3;
-    vec_size = q ? 16 : 8;
-    rd_ofs = neon_reg_offset(rd, 0);
-    rm_ofs = neon_reg_offset(rm, 0);
-
-    if ((insn & (1 << 23)) == 0) {
-        /* Three register same length: handled by decodetree */
-        return 1;
-    } else if (insn & (1 << 4)) {
-        /* Two registers and shift or reg and imm: handled by decodetree */
-        return 1;
-    } else { /* (insn & 0x00800010 == 0x00800000) */
-        if (size != 3) {
-            /*
-             * Three registers of different lengths, or two registers and
-             * a scalar: handled by decodetree
-             */
-            return 1;
-        } else { /* size == 3 */
-            if (!u) {
-                /* Extract: handled by decodetree */
-                return 1;
-            } else if ((insn & (1 << 11)) == 0) {
-                /* Two register misc.  */
-                op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
-                size = (insn >> 18) & 3;
-                /* UNDEF for unknown op values and bad op-size combinations */
-                if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
-                    return 1;
-                }
-                if (neon_2rm_is_v8_op(op) &&
-                    !arm_dc_feature(s, ARM_FEATURE_V8)) {
-                    return 1;
-                }
-                if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
-                    q && ((rm | rd) & 1)) {
-                    return 1;
-                }
-                switch (op) {
-                case NEON_2RM_VREV64:
-                    for (pass = 0; pass < (q ? 2 : 1); pass++) {
-                        tmp = neon_load_reg(rm, pass * 2);
-                        tmp2 = neon_load_reg(rm, pass * 2 + 1);
-                        switch (size) {
-                        case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
-                        case 1: gen_swap_half(tmp); break;
-                        case 2: /* no-op */ break;
-                        default: abort();
-                        }
-                        neon_store_reg(rd, pass * 2 + 1, tmp);
-                        if (size == 2) {
-                            neon_store_reg(rd, pass * 2, tmp2);
-                        } else {
-                            switch (size) {
-                            case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
-                            case 1: gen_swap_half(tmp2); break;
-                            default: abort();
-                            }
-                            neon_store_reg(rd, pass * 2, tmp2);
-                        }
-                    }
-                    break;
-                case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
-                case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
-                    for (pass = 0; pass < q + 1; pass++) {
-                        tmp = neon_load_reg(rm, pass * 2);
-                        gen_neon_widen(cpu_V0, tmp, size, op & 1);
-                        tmp = neon_load_reg(rm, pass * 2 + 1);
-                        gen_neon_widen(cpu_V1, tmp, size, op & 1);
-                        switch (size) {
-                        case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
-                        case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
-                        case 2: tcg_gen_add_i64(CPU_V001); break;
-                        default: abort();
-                        }
-                        if (op >= NEON_2RM_VPADAL) {
-                            /* Accumulate.  */
-                            neon_load_reg64(cpu_V1, rd + pass);
-                            gen_neon_addl(size);
-                        }
-                        neon_store_reg64(cpu_V0, rd + pass);
-                    }
-                    break;
-                case NEON_2RM_VTRN:
-                    if (size == 2) {
-                        int n;
-                        for (n = 0; n < (q ? 4 : 2); n += 2) {
-                            tmp = neon_load_reg(rm, n);
-                            tmp2 = neon_load_reg(rd, n + 1);
-                            neon_store_reg(rm, n, tmp2);
-                            neon_store_reg(rd, n + 1, tmp);
-                        }
-                    } else {
-                        goto elementwise;
-                    }
-                    break;
-                case NEON_2RM_VUZP:
-                    if (gen_neon_unzip(rd, rm, size, q)) {
-                        return 1;
-                    }
-                    break;
-                case NEON_2RM_VZIP:
-                    if (gen_neon_zip(rd, rm, size, q)) {
-                        return 1;
-                    }
-                    break;
-                case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
-                    /* also VQMOVUN; op field and mnemonics don't line up */
-                    if (rm & 1) {
-                        return 1;
-                    }
-                    tmp2 = NULL;
-                    for (pass = 0; pass < 2; pass++) {
-                        neon_load_reg64(cpu_V0, rm + pass);
-                        tmp = tcg_temp_new_i32();
-                        gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
-                                           tmp, cpu_V0);
-                        if (pass == 0) {
-                            tmp2 = tmp;
-                        } else {
-                            neon_store_reg(rd, 0, tmp2);
-                            neon_store_reg(rd, 1, tmp);
-                        }
-                    }
-                    break;
-                case NEON_2RM_VSHLL:
-                    if (q || (rd & 1)) {
-                        return 1;
-                    }
-                    tmp = neon_load_reg(rm, 0);
-                    tmp2 = neon_load_reg(rm, 1);
-                    for (pass = 0; pass < 2; pass++) {
-                        if (pass == 1)
-                            tmp = tmp2;
-                        gen_neon_widen(cpu_V0, tmp, size, 1);
-                        tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
-                        neon_store_reg64(cpu_V0, rd + pass);
-                    }
-                    break;
-                case NEON_2RM_VCVT_F16_F32:
-                {
-                    TCGv_ptr fpst;
-                    TCGv_i32 ahp;
-
-                    if (!dc_isar_feature(aa32_fp16_spconv, s) ||
-                        q || (rm & 1)) {
-                        return 1;
-                    }
-                    fpst = get_fpstatus_ptr(true);
-                    ahp = get_ahp_flag();
-                    tmp = neon_load_reg(rm, 0);
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
-                    tmp2 = neon_load_reg(rm, 1);
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
-                    tcg_gen_shli_i32(tmp2, tmp2, 16);
-                    tcg_gen_or_i32(tmp2, tmp2, tmp);
-                    tcg_temp_free_i32(tmp);
-                    tmp = neon_load_reg(rm, 2);
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
-                    tmp3 = neon_load_reg(rm, 3);
-                    neon_store_reg(rd, 0, tmp2);
-                    gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
-                    tcg_gen_shli_i32(tmp3, tmp3, 16);
-                    tcg_gen_or_i32(tmp3, tmp3, tmp);
-                    neon_store_reg(rd, 1, tmp3);
-                    tcg_temp_free_i32(tmp);
-                    tcg_temp_free_i32(ahp);
-                    tcg_temp_free_ptr(fpst);
-                    break;
-                }
-                case NEON_2RM_VCVT_F32_F16:
-                {
-                    TCGv_ptr fpst;
-                    TCGv_i32 ahp;
-                    if (!dc_isar_feature(aa32_fp16_spconv, s) ||
-                        q || (rd & 1)) {
-                        return 1;
-                    }
-                    fpst = get_fpstatus_ptr(true);
-                    ahp = get_ahp_flag();
-                    tmp3 = tcg_temp_new_i32();
-                    tmp = neon_load_reg(rm, 0);
-                    tmp2 = neon_load_reg(rm, 1);
-                    tcg_gen_ext16u_i32(tmp3, tmp);
-                    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
-                    neon_store_reg(rd, 0, tmp3);
-                    tcg_gen_shri_i32(tmp, tmp, 16);
-                    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
-                    neon_store_reg(rd, 1, tmp);
-                    tmp3 = tcg_temp_new_i32();
-                    tcg_gen_ext16u_i32(tmp3, tmp2);
-                    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
-                    neon_store_reg(rd, 2, tmp3);
-                    tcg_gen_shri_i32(tmp2, tmp2, 16);
-                    gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
-                    neon_store_reg(rd, 3, tmp2);
-                    tcg_temp_free_i32(ahp);
-                    tcg_temp_free_ptr(fpst);
-                    break;
-                }
-                case NEON_2RM_AESE: case NEON_2RM_AESMC:
-                    if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
-                        return 1;
-                    }
-                    /*
-                     * Bit 6 is the lowest opcode bit; it distinguishes
-                     * between encryption (AESE/AESMC) and decryption
-                     * (AESD/AESIMC).
-                     */
-                    if (op == NEON_2RM_AESE) {
-                        tcg_gen_gvec_3_ool(vfp_reg_offset(true, rd),
-                                           vfp_reg_offset(true, rd),
-                                           vfp_reg_offset(true, rm),
-                                           16, 16, extract32(insn, 6, 1),
-                                           gen_helper_crypto_aese);
-                    } else {
-                        tcg_gen_gvec_2_ool(vfp_reg_offset(true, rd),
-                                           vfp_reg_offset(true, rm),
-                                           16, 16, extract32(insn, 6, 1),
-                                           gen_helper_crypto_aesmc);
-                    }
-                    break;
-                case NEON_2RM_SHA1H:
-                    if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
-                        return 1;
-                    }
-                    tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
-                                       gen_helper_crypto_sha1h);
-                    break;
-                case NEON_2RM_SHA1SU1:
-                    if ((rm | rd) & 1) {
-                            return 1;
-                    }
-                    /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
-                    if (q) {
-                        if (!dc_isar_feature(aa32_sha2, s)) {
-                            return 1;
-                        }
-                    } else if (!dc_isar_feature(aa32_sha1, s)) {
-                        return 1;
-                    }
-                    tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
-                                       q ? gen_helper_crypto_sha256su0
-                                       : gen_helper_crypto_sha1su1);
-                    break;
-                case NEON_2RM_VMVN:
-                    tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-                case NEON_2RM_VNEG:
-                    tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-                case NEON_2RM_VABS:
-                    tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-
-                case NEON_2RM_VCEQ0:
-                    gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-                case NEON_2RM_VCGT0:
-                    gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-                case NEON_2RM_VCLE0:
-                    gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-                case NEON_2RM_VCGE0:
-                    gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-                case NEON_2RM_VCLT0:
-                    gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
-                    break;
-
-                default:
-                elementwise:
-                    for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                        tmp = neon_load_reg(rm, pass);
-                        switch (op) {
-                        case NEON_2RM_VREV32:
-                            switch (size) {
-                            case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
-                            case 1: gen_swap_half(tmp); break;
-                            default: abort();
-                            }
-                            break;
-                        case NEON_2RM_VREV16:
-                            gen_rev16(tmp, tmp);
-                            break;
-                        case NEON_2RM_VCLS:
-                            switch (size) {
-                            case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
-                            case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
-                            case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
-                            default: abort();
-                            }
-                            break;
-                        case NEON_2RM_VCLZ:
-                            switch (size) {
-                            case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
-                            case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
-                            case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
-                            default: abort();
-                            }
-                            break;
-                        case NEON_2RM_VCNT:
-                            gen_helper_neon_cnt_u8(tmp, tmp);
-                            break;
-                        case NEON_2RM_VQABS:
-                            switch (size) {
-                            case 0:
-                                gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
-                                break;
-                            case 1:
-                                gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
-                                break;
-                            case 2:
-                                gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
-                                break;
-                            default: abort();
-                            }
-                            break;
-                        case NEON_2RM_VQNEG:
-                            switch (size) {
-                            case 0:
-                                gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
-                                break;
-                            case 1:
-                                gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
-                                break;
-                            case 2:
-                                gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
-                                break;
-                            default: abort();
-                            }
-                            break;
-                        case NEON_2RM_VCGT0_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            tmp2 = tcg_const_i32(0);
-                            gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
-                            tcg_temp_free_i32(tmp2);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCGE0_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            tmp2 = tcg_const_i32(0);
-                            gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
-                            tcg_temp_free_i32(tmp2);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCEQ0_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            tmp2 = tcg_const_i32(0);
-                            gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
-                            tcg_temp_free_i32(tmp2);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCLE0_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            tmp2 = tcg_const_i32(0);
-                            gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
-                            tcg_temp_free_i32(tmp2);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCLT0_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            tmp2 = tcg_const_i32(0);
-                            gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
-                            tcg_temp_free_i32(tmp2);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VABS_F:
-                            gen_helper_vfp_abss(tmp, tmp);
-                            break;
-                        case NEON_2RM_VNEG_F:
-                            gen_helper_vfp_negs(tmp, tmp);
-                            break;
-                        case NEON_2RM_VSWP:
-                            tmp2 = neon_load_reg(rd, pass);
-                            neon_store_reg(rm, pass, tmp2);
-                            break;
-                        case NEON_2RM_VTRN:
-                            tmp2 = neon_load_reg(rd, pass);
-                            switch (size) {
-                            case 0: gen_neon_trn_u8(tmp, tmp2); break;
-                            case 1: gen_neon_trn_u16(tmp, tmp2); break;
-                            default: abort();
-                            }
-                            neon_store_reg(rm, pass, tmp2);
-                            break;
-                        case NEON_2RM_VRINTN:
-                        case NEON_2RM_VRINTA:
-                        case NEON_2RM_VRINTM:
-                        case NEON_2RM_VRINTP:
-                        case NEON_2RM_VRINTZ:
-                        {
-                            TCGv_i32 tcg_rmode;
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            int rmode;
-
-                            if (op == NEON_2RM_VRINTZ) {
-                                rmode = FPROUNDING_ZERO;
-                            } else {
-                                rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
-                            }
-
-                            tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
-                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
-                                                      cpu_env);
-                            gen_helper_rints(tmp, tmp, fpstatus);
-                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
-                                                      cpu_env);
-                            tcg_temp_free_ptr(fpstatus);
-                            tcg_temp_free_i32(tcg_rmode);
-                            break;
-                        }
-                        case NEON_2RM_VRINTX:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            gen_helper_rints_exact(tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCVTAU:
-                        case NEON_2RM_VCVTAS:
-                        case NEON_2RM_VCVTNU:
-                        case NEON_2RM_VCVTNS:
-                        case NEON_2RM_VCVTPU:
-                        case NEON_2RM_VCVTPS:
-                        case NEON_2RM_VCVTMU:
-                        case NEON_2RM_VCVTMS:
-                        {
-                            bool is_signed = !extract32(insn, 7, 1);
-                            TCGv_ptr fpst = get_fpstatus_ptr(1);
-                            TCGv_i32 tcg_rmode, tcg_shift;
-                            int rmode = fp_decode_rm[extract32(insn, 8, 2)];
-
-                            tcg_shift = tcg_const_i32(0);
-                            tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
-                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
-                                                      cpu_env);
-
-                            if (is_signed) {
-                                gen_helper_vfp_tosls(tmp, tmp,
-                                                     tcg_shift, fpst);
-                            } else {
-                                gen_helper_vfp_touls(tmp, tmp,
-                                                     tcg_shift, fpst);
-                            }
-
-                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
-                                                      cpu_env);
-                            tcg_temp_free_i32(tcg_rmode);
-                            tcg_temp_free_i32(tcg_shift);
-                            tcg_temp_free_ptr(fpst);
-                            break;
-                        }
-                        case NEON_2RM_VRECPE:
-                            gen_helper_recpe_u32(tmp, tmp);
-                            break;
-                        case NEON_2RM_VRSQRTE:
-                            gen_helper_rsqrte_u32(tmp, tmp);
-                            break;
-                        case NEON_2RM_VRECPE_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            gen_helper_recpe_f32(tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VRSQRTE_F:
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            gen_helper_vfp_sitos(tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            gen_helper_vfp_uitos(tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
-                        {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
-                            gen_helper_vfp_touizs(tmp, tmp, fpstatus);
-                            tcg_temp_free_ptr(fpstatus);
-                            break;
-                        }
-                        default:
-                            /* Reserved op values were caught by the
-                             * neon_2rm_sizes[] check earlier.
-                             */
-                            abort();
-                        }
-                        neon_store_reg(rd, pass, tmp);
-                    }
-                    break;
-                }
-            } else {
-                /* VTBL, VTBX, VDUP: handled by decodetree */
-                return 1;
-            }
-        }
-    }
-    return 0;
-}
-
 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
 {
     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
@@ -8417,7 +7378,7 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
     t1 = load_reg(s, a->rn);
     t2 = load_reg(s, a->rm);
     if (m_swap) {
-        gen_swap_half(t2);
+        gen_swap_half(t2, t2);
     }
     gen_smul_dual(t1, t2);
 
@@ -8475,7 +7436,7 @@ static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
     t1 = load_reg(s, a->rn);
     t2 = load_reg(s, a->rm);
     if (m_swap) {
-        gen_swap_half(t2);
+        gen_swap_half(t2, t2);
     }
     gen_smul_dual(t1, t2);
 
@@ -8824,9 +7785,6 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
             gen_io_start();
         }
         gen_helper_cpsr_write_eret(cpu_env, tmp);
-        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-            gen_io_end();
-        }
         tcg_temp_free_i32(tmp);
         /* Must exit loop to check un-masked IRQs */
         s->base.is_jmp = DISAS_EXIT;
@@ -9283,13 +8241,6 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
         }
         /* fall back to legacy decoder */
 
-        if (((insn >> 25) & 7) == 1) {
-            /* NEON Data processing.  */
-            if (disas_neon_data_insn(s, insn)) {
-                goto illegal_op;
-            }
-            return;
-        }
         if ((insn & 0x0e000f00) == 0x0c000100) {
             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
                 /* iWMMXt register transfer.  */
@@ -9477,11 +8428,8 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
             break;
         }
         if (((insn >> 24) & 3) == 3) {
-            /* Translate into the equivalent ARM encoding.  */
-            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
-            if (disas_neon_data_insn(s, insn)) {
-                goto illegal_op;
-            }
+            /* Neon DP, but failed disas_neon_dp() */
+            goto illegal_op;
         } else if (((insn >> 8) & 0xe) == 10) {
             /* VFP, but failed disas_vfp.  */
             goto illegal_op;
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 62ed5c4780..19650a9e2d 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -363,6 +363,7 @@ typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
                         uint32_t, uint32_t, uint32_t);
 
 /* Function prototype for gen_ functions for calling Neon helpers */
+typedef void NeonGenOneOpFn(TCGv_i32, TCGv_i32);
 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
@@ -372,9 +373,10 @@ typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
 typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32);
-typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
-typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
-typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
+typedef void NeonGenOneSingleOpFn(TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoSingleOpFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoDoubleOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
+typedef void NeonGenOne64OpFn(TCGv_i64, TCGv_i64);
 typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
 typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
 typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);