summary refs log tree commit diff stats
path: root/target-arm
diff options
context:
space:
mode:
Diffstat (limited to 'target-arm')
-rw-r--r--target-arm/cpu-qom.h11
-rw-r--r--target-arm/cpu.c59
-rw-r--r--target-arm/cpu.h13
-rw-r--r--target-arm/helper.c33
-rw-r--r--target-arm/helper.h5
-rw-r--r--target-arm/kvm-consts.h64
-rw-r--r--target-arm/kvm.c243
-rw-r--r--target-arm/kvm_arm.h55
-rw-r--r--target-arm/translate.c302
9 files changed, 718 insertions, 67 deletions
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index b55306a3c3..f32178a9db 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -91,6 +91,17 @@ typedef struct ARMCPU {
     /* GPIO outputs for generic timer */
     qemu_irq gt_timer_outputs[NUM_GTIMERS];
 
+    /* 'compatible' string for this CPU for Linux device trees */
+    const char *dtb_compatible;
+
+    /* Should CPU start in PSCI powered-off state? */
+    bool start_powered_off;
+
+    /* [QEMU_]KVM_ARM_TARGET_* constant for this CPU, or
+     * QEMU_KVM_ARM_TARGET_NONE if the kernel doesn't support this CPU type.
+     */
+    uint32_t kvm_target;
+
     /* The instance init functions for implementation-specific subclasses
      * set these fields to specify the implementation-dependent values of
      * various constant registers and reset values of non-constant
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index d40f2a7a4f..0635e78ec2 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -20,6 +20,7 @@
 
 #include "cpu.h"
 #include "qemu-common.h"
+#include "hw/qdev-properties.h"
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/loader.h"
 #endif
@@ -217,6 +218,13 @@ static void arm_cpu_initfn(Object *obj)
                        ARRAY_SIZE(cpu->gt_timer_outputs));
 #endif
 
+    /* DTB consumers generally don't in fact care what the 'compatible'
+     * string is, so always provide some string and trust that a hypothetical
+     * picky DTB consumer will also provide a helpful error message.
+     */
+    cpu->dtb_compatible = "qemu,unknown";
+    cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
+
     if (tcg_enabled() && !inited) {
         inited = true;
         arm_translate_init();
@@ -318,6 +326,8 @@ static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
 static void arm926_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,arm926";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_VFP);
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
@@ -331,6 +341,8 @@ static void arm926_initfn(Object *obj)
 static void arm946_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,arm946";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_MPU);
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
@@ -342,6 +354,8 @@ static void arm946_initfn(Object *obj)
 static void arm1026_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,arm1026";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_VFP);
     set_feature(&cpu->env, ARM_FEATURE_AUXCR);
@@ -374,6 +388,8 @@ static void arm1136_r2_initfn(Object *obj)
      * for 1136_r2 (in particular r0p2 does not actually implement most
      * of the ID registers).
      */
+
+    cpu->dtb_compatible = "arm,arm1136";
     set_feature(&cpu->env, ARM_FEATURE_V6);
     set_feature(&cpu->env, ARM_FEATURE_VFP);
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
@@ -403,6 +419,8 @@ static void arm1136_r2_initfn(Object *obj)
 static void arm1136_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,arm1136";
     set_feature(&cpu->env, ARM_FEATURE_V6K);
     set_feature(&cpu->env, ARM_FEATURE_V6);
     set_feature(&cpu->env, ARM_FEATURE_VFP);
@@ -433,6 +451,8 @@ static void arm1136_initfn(Object *obj)
 static void arm1176_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,arm1176";
     set_feature(&cpu->env, ARM_FEATURE_V6K);
     set_feature(&cpu->env, ARM_FEATURE_VFP);
     set_feature(&cpu->env, ARM_FEATURE_VAPA);
@@ -463,6 +483,8 @@ static void arm1176_initfn(Object *obj)
 static void arm11mpcore_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,arm11mpcore";
     set_feature(&cpu->env, ARM_FEATURE_V6K);
     set_feature(&cpu->env, ARM_FEATURE_VFP);
     set_feature(&cpu->env, ARM_FEATURE_VAPA);
@@ -516,6 +538,8 @@ static const ARMCPRegInfo cortexa8_cp_reginfo[] = {
 static void cortex_a8_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,cortex-a8";
     set_feature(&cpu->env, ARM_FEATURE_V7);
     set_feature(&cpu->env, ARM_FEATURE_VFP3);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
@@ -580,6 +604,8 @@ static const ARMCPRegInfo cortexa9_cp_reginfo[] = {
 static void cortex_a9_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,cortex-a9";
     set_feature(&cpu->env, ARM_FEATURE_V7);
     set_feature(&cpu->env, ARM_FEATURE_VFP3);
     set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
@@ -649,6 +675,8 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
 static void cortex_a15_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,cortex-a15";
     set_feature(&cpu->env, ARM_FEATURE_V7);
     set_feature(&cpu->env, ARM_FEATURE_VFP4);
     set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
@@ -658,6 +686,7 @@ static void cortex_a15_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
     set_feature(&cpu->env, ARM_FEATURE_LPAE);
+    cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15;
     cpu->midr = 0x412fc0f1;
     cpu->reset_fpsid = 0x410430f0;
     cpu->mvfr0 = 0x10110222;
@@ -697,6 +726,8 @@ static void ti925t_initfn(Object *obj)
 static void sa1100_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "intel,sa1100";
     set_feature(&cpu->env, ARM_FEATURE_STRONGARM);
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
     cpu->midr = 0x4401A11B;
@@ -715,6 +746,8 @@ static void sa1110_initfn(Object *obj)
 static void pxa250_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     cpu->midr = 0x69052100;
@@ -725,6 +758,8 @@ static void pxa250_initfn(Object *obj)
 static void pxa255_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     cpu->midr = 0x69052d00;
@@ -735,6 +770,8 @@ static void pxa255_initfn(Object *obj)
 static void pxa260_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     cpu->midr = 0x69052903;
@@ -745,6 +782,8 @@ static void pxa260_initfn(Object *obj)
 static void pxa261_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     cpu->midr = 0x69052d05;
@@ -755,6 +794,8 @@ static void pxa261_initfn(Object *obj)
 static void pxa262_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     cpu->midr = 0x69052d06;
@@ -765,6 +806,8 @@ static void pxa262_initfn(Object *obj)
 static void pxa270a0_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
@@ -776,6 +819,8 @@ static void pxa270a0_initfn(Object *obj)
 static void pxa270a1_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
@@ -787,6 +832,8 @@ static void pxa270a1_initfn(Object *obj)
 static void pxa270b0_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
@@ -798,6 +845,8 @@ static void pxa270b0_initfn(Object *obj)
 static void pxa270b1_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
@@ -809,6 +858,8 @@ static void pxa270b1_initfn(Object *obj)
 static void pxa270c0_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
@@ -820,6 +871,8 @@ static void pxa270c0_initfn(Object *obj)
 static void pxa270c5_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "marvell,xscale";
     set_feature(&cpu->env, ARM_FEATURE_V5);
     set_feature(&cpu->env, ARM_FEATURE_XSCALE);
     set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
@@ -894,6 +947,11 @@ static const ARMCPUInfo arm_cpus[] = {
 #endif
 };
 
+static Property arm_cpu_properties[] = {
+    DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void arm_cpu_class_init(ObjectClass *oc, void *data)
 {
     ARMCPUClass *acc = ARM_CPU_CLASS(oc);
@@ -902,6 +960,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
 
     acc->parent_realize = dc->realize;
     dc->realize = arm_cpu_realizefn;
+    dc->props = arm_cpu_properties;
 
     acc->parent_reset = cc->reset;
     cc->reset = arm_cpu_reset;
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 9f110f15b6..c3f007fc53 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -21,6 +21,8 @@
 
 #include "config.h"
 
+#include "kvm-consts.h"
+
 #if defined(TARGET_AARCH64)
   /* AArch64 definitions */
 #  define TARGET_LONG_BITS 64
@@ -497,17 +499,6 @@ void armv7m_nvic_complete_irq(void *opaque, int irq);
     (((cp) << 16) | ((is64) << 15) | ((crn) << 11) |    \
      ((crm) << 7) | ((opc1) << 3) | (opc2))
 
-/* Note that these must line up with the KVM/ARM register
- * ID field definitions (kvm.c will check this, but we
- * can't just use the KVM defines here as the kvm headers
- * are unavailable to non-KVM-specific files)
- */
-#define CP_REG_SIZE_SHIFT 52
-#define CP_REG_SIZE_MASK       0x00f0000000000000ULL
-#define CP_REG_SIZE_U32        0x0020000000000000ULL
-#define CP_REG_SIZE_U64        0x0030000000000000ULL
-#define CP_REG_ARM             0x4000000000000000ULL
-
 /* Convert a full 64 bit KVM register ID to the truncated 32 bit
  * version used as a key for the coprocessor register hashtable
  */
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 3445813465..5e5e5aad2b 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1173,7 +1173,7 @@ static int vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     int maskshift = extract32(value, 0, 3);
 
-    if (arm_feature(env, ARM_FEATURE_LPAE)) {
+    if (arm_feature(env, ARM_FEATURE_LPAE) && (value & (1 << 31))) {
         value &= ~((7 << 19) | (3 << 14) | (0xf << 3));
     } else {
         value &= 7;
@@ -1842,6 +1842,12 @@ void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf)
     (*cpu_fprintf)(f, "Available CPUs:\n");
     g_slist_foreach(list, arm_cpu_list_entry, &s);
     g_slist_free(list);
+#ifdef CONFIG_KVM
+    /* The 'host' CPU type is dynamically registered only if KVM is
+     * enabled, so we have to special-case it here:
+     */
+    (*cpu_fprintf)(f, "  host (only available in KVM mode)\n");
+#endif
 }
 
 static void arm_cpu_add_definition(gpointer data, gpointer user_data)
@@ -4079,3 +4085,28 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
     float_status *fpst = fpstp;
     return float64_muladd(a, b, c, 0, fpst);
 }
+
+/* ARMv8 VMAXNM/VMINNM */
+float32 VFP_HELPER(maxnm, s)(float32 a, float32 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    return float32_maxnum(a, b, fpst);
+}
+
+float64 VFP_HELPER(maxnm, d)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    return float64_maxnum(a, b, fpst);
+}
+
+float32 VFP_HELPER(minnm, s)(float32 a, float32 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    return float32_minnum(a, b, fpst);
+}
+
+float64 VFP_HELPER(minnm, d)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    return float64_minnum(a, b, fpst);
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index cac9564f5f..d459a39e46 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -132,6 +132,11 @@ DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env)
 DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
 DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
 
+DEF_HELPER_3(vfp_maxnmd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxnms, f32, f32, f32, ptr)
+DEF_HELPER_3(vfp_minnmd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minnms, f32, f32, f32, ptr)
+
 DEF_HELPER_3(recps_f32, f32, f32, f32, env)
 DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
 DEF_HELPER_2(recpe_f32, f32, f32, env)
diff --git a/target-arm/kvm-consts.h b/target-arm/kvm-consts.h
new file mode 100644
index 0000000000..2bba0bd198
--- /dev/null
+++ b/target-arm/kvm-consts.h
@@ -0,0 +1,64 @@
+/*
+ * KVM ARM ABI constant definitions
+ *
+ * Copyright (c) 2013 Linaro Limited
+ *
+ * Provide versions of KVM constant defines that can be used even
+ * when CONFIG_KVM is not set and we don't have access to the
+ * KVM headers. If CONFIG_KVM is set, we do a compile-time check
+ * that we haven't got out of sync somehow.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef ARM_KVM_CONSTS_H
+#define ARM_KVM_CONSTS_H
+
+#ifdef CONFIG_KVM
+#include "qemu/compiler.h"
+#include <linux/kvm.h>
+
+#define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(X != Y)
+
+#else
+#define MISMATCH_CHECK(X, Y)
+#endif
+
+#define CP_REG_SIZE_SHIFT 52
+#define CP_REG_SIZE_MASK       0x00f0000000000000ULL
+#define CP_REG_SIZE_U32        0x0020000000000000ULL
+#define CP_REG_SIZE_U64        0x0030000000000000ULL
+#define CP_REG_ARM             0x4000000000000000ULL
+
+MISMATCH_CHECK(CP_REG_SIZE_SHIFT, KVM_REG_SIZE_SHIFT)
+MISMATCH_CHECK(CP_REG_SIZE_MASK, KVM_REG_SIZE_MASK)
+MISMATCH_CHECK(CP_REG_SIZE_U32, KVM_REG_SIZE_U32)
+MISMATCH_CHECK(CP_REG_SIZE_U64, KVM_REG_SIZE_U64)
+MISMATCH_CHECK(CP_REG_ARM, KVM_REG_ARM)
+
+#define PSCI_FN_BASE 0x95c1ba5e
+#define PSCI_FN(n) (PSCI_FN_BASE + (n))
+#define PSCI_FN_CPU_SUSPEND PSCI_FN(0)
+#define PSCI_FN_CPU_OFF PSCI_FN(1)
+#define PSCI_FN_CPU_ON PSCI_FN(2)
+#define PSCI_FN_MIGRATE PSCI_FN(3)
+
+MISMATCH_CHECK(PSCI_FN_CPU_SUSPEND, KVM_PSCI_FN_CPU_SUSPEND)
+MISMATCH_CHECK(PSCI_FN_CPU_OFF, KVM_PSCI_FN_CPU_OFF)
+MISMATCH_CHECK(PSCI_FN_CPU_ON, KVM_PSCI_FN_CPU_ON)
+MISMATCH_CHECK(PSCI_FN_MIGRATE, KVM_PSCI_FN_MIGRATE)
+
+#define QEMU_KVM_ARM_TARGET_CORTEX_A15 0
+
+/* There's no kernel define for this: sentinel value which
+ * matches no KVM target value for either 64 or 32 bit
+ */
+#define QEMU_KVM_ARM_TARGET_NONE UINT_MAX
+
+#ifndef TARGET_AARCH64
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A15, KVM_ARM_TARGET_CORTEX_A15)
+#endif
+
+#undef MISMATCH_CHECK
+
+#endif
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 6e5cd36fae..f865dac871 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -23,25 +23,240 @@
 #include "cpu.h"
 #include "hw/arm/arm.h"
 
-/* Check that cpu.h's idea of coprocessor fields matches KVM's */
-#if (CP_REG_SIZE_SHIFT != KVM_REG_SIZE_SHIFT) || \
-    (CP_REG_SIZE_MASK != KVM_REG_SIZE_MASK) ||   \
-    (CP_REG_SIZE_U32 != KVM_REG_SIZE_U32) || \
-    (CP_REG_SIZE_U64 != KVM_REG_SIZE_U64) || \
-    (CP_REG_ARM != KVM_REG_ARM)
-#error mismatch between cpu.h and KVM header definitions
-#endif
-
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
 
+bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
+                                      int *fdarray,
+                                      struct kvm_vcpu_init *init)
+{
+    int ret, kvmfd = -1, vmfd = -1, cpufd = -1;
+
+    kvmfd = qemu_open("/dev/kvm", O_RDWR);
+    if (kvmfd < 0) {
+        goto err;
+    }
+    vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
+    if (vmfd < 0) {
+        goto err;
+    }
+    cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
+    if (cpufd < 0) {
+        goto err;
+    }
+
+    ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init);
+    if (ret >= 0) {
+        ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
+        if (ret < 0) {
+            goto err;
+        }
+    } else {
+        /* Old kernel which doesn't know about the
+         * PREFERRED_TARGET ioctl: we know it will only support
+         * creating one kind of guest CPU which is its preferred
+         * CPU type.
+         */
+        while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
+            init->target = *cpus_to_try++;
+            memset(init->features, 0, sizeof(init->features));
+            ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
+            if (ret >= 0) {
+                break;
+            }
+        }
+        if (ret < 0) {
+            goto err;
+        }
+    }
+
+    fdarray[0] = kvmfd;
+    fdarray[1] = vmfd;
+    fdarray[2] = cpufd;
+
+    return true;
+
+err:
+    if (cpufd >= 0) {
+        close(cpufd);
+    }
+    if (vmfd >= 0) {
+        close(vmfd);
+    }
+    if (kvmfd >= 0) {
+        close(kvmfd);
+    }
+
+    return false;
+}
+
+void kvm_arm_destroy_scratch_host_vcpu(int *fdarray)
+{
+    int i;
+
+    for (i = 2; i >= 0; i--) {
+        close(fdarray[i]);
+    }
+}
+
+static inline void set_feature(uint64_t *features, int feature)
+{
+    *features |= 1ULL << feature;
+}
+
+bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc)
+{
+    /* Identify the feature bits corresponding to the host CPU, and
+     * fill out the ARMHostCPUClass fields accordingly. To do this
+     * we have to create a scratch VM, create a single CPU inside it,
+     * and then query that CPU for the relevant ID registers.
+     */
+    int i, ret, fdarray[3];
+    uint32_t midr, id_pfr0, id_isar0, mvfr1;
+    uint64_t features = 0;
+    /* Old kernels may not know about the PREFERRED_TARGET ioctl: however
+     * we know these will only support creating one kind of guest CPU,
+     * which is its preferred CPU type.
+     */
+    static const uint32_t cpus_to_try[] = {
+        QEMU_KVM_ARM_TARGET_CORTEX_A15,
+        QEMU_KVM_ARM_TARGET_NONE
+    };
+    struct kvm_vcpu_init init;
+    struct kvm_one_reg idregs[] = {
+        {
+            .id = KVM_REG_ARM | KVM_REG_SIZE_U32
+            | ENCODE_CP_REG(15, 0, 0, 0, 0, 0),
+            .addr = (uintptr_t)&midr,
+        },
+        {
+            .id = KVM_REG_ARM | KVM_REG_SIZE_U32
+            | ENCODE_CP_REG(15, 0, 0, 1, 0, 0),
+            .addr = (uintptr_t)&id_pfr0,
+        },
+        {
+            .id = KVM_REG_ARM | KVM_REG_SIZE_U32
+            | ENCODE_CP_REG(15, 0, 0, 2, 0, 0),
+            .addr = (uintptr_t)&id_isar0,
+        },
+        {
+            .id = KVM_REG_ARM | KVM_REG_SIZE_U32
+            | KVM_REG_ARM_VFP | KVM_REG_ARM_VFP_MVFR1,
+            .addr = (uintptr_t)&mvfr1,
+        },
+    };
+
+    if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
+        return false;
+    }
+
+    ahcc->target = init.target;
+
+    /* This is not strictly blessed by the device tree binding docs yet,
+     * but in practice the kernel does not care about this string so
+     * there is no point maintaining an KVM_ARM_TARGET_* -> string table.
+     */
+    ahcc->dtb_compatible = "arm,arm-v7";
+
+    for (i = 0; i < ARRAY_SIZE(idregs); i++) {
+        ret = ioctl(fdarray[2], KVM_GET_ONE_REG, &idregs[i]);
+        if (ret) {
+            break;
+        }
+    }
+
+    kvm_arm_destroy_scratch_host_vcpu(fdarray);
+
+    if (ret) {
+        return false;
+    }
+
+    /* Now we've retrieved all the register information we can
+     * set the feature bits based on the ID register fields.
+     * We can assume any KVM supporting CPU is at least a v7
+     * with VFPv3, LPAE and the generic timers; this in turn implies
+     * most of the other feature bits, but a few must be tested.
+     */
+    set_feature(&features, ARM_FEATURE_V7);
+    set_feature(&features, ARM_FEATURE_VFP3);
+    set_feature(&features, ARM_FEATURE_LPAE);
+    set_feature(&features, ARM_FEATURE_GENERIC_TIMER);
+
+    switch (extract32(id_isar0, 24, 4)) {
+    case 1:
+        set_feature(&features, ARM_FEATURE_THUMB_DIV);
+        break;
+    case 2:
+        set_feature(&features, ARM_FEATURE_ARM_DIV);
+        set_feature(&features, ARM_FEATURE_THUMB_DIV);
+        break;
+    default:
+        break;
+    }
+
+    if (extract32(id_pfr0, 12, 4) == 1) {
+        set_feature(&features, ARM_FEATURE_THUMB2EE);
+    }
+    if (extract32(mvfr1, 20, 4) == 1) {
+        set_feature(&features, ARM_FEATURE_VFP_FP16);
+    }
+    if (extract32(mvfr1, 12, 4) == 1) {
+        set_feature(&features, ARM_FEATURE_NEON);
+    }
+    if (extract32(mvfr1, 28, 4) == 1) {
+        /* FMAC support implies VFPv4 */
+        set_feature(&features, ARM_FEATURE_VFP4);
+    }
+
+    ahcc->features = features;
+
+    return true;
+}
+
+static void kvm_arm_host_cpu_class_init(ObjectClass *oc, void *data)
+{
+    ARMHostCPUClass *ahcc = ARM_HOST_CPU_CLASS(oc);
+
+    /* All we really need to set up for the 'host' CPU
+     * is the feature bits -- we rely on the fact that the
+     * various ID register values in ARMCPU are only used for
+     * TCG CPUs.
+     */
+    if (!kvm_arm_get_host_cpu_features(ahcc)) {
+        fprintf(stderr, "Failed to retrieve host CPU features!\n");
+        abort();
+    }
+}
+
+static void kvm_arm_host_cpu_initfn(Object *obj)
+{
+    ARMHostCPUClass *ahcc = ARM_HOST_CPU_GET_CLASS(obj);
+    ARMCPU *cpu = ARM_CPU(obj);
+    CPUARMState *env = &cpu->env;
+
+    cpu->kvm_target = ahcc->target;
+    cpu->dtb_compatible = ahcc->dtb_compatible;
+    env->features = ahcc->features;
+}
+
+static const TypeInfo host_arm_cpu_type_info = {
+    .name = TYPE_ARM_HOST_CPU,
+    .parent = TYPE_ARM_CPU,
+    .instance_init = kvm_arm_host_cpu_initfn,
+    .class_init = kvm_arm_host_cpu_class_init,
+    .class_size = sizeof(ARMHostCPUClass),
+};
+
 int kvm_arch_init(KVMState *s)
 {
     /* For ARM interrupt delivery is always asynchronous,
      * whether we are using an in-kernel VGIC or not.
      */
     kvm_async_interrupts_allowed = true;
+
+    type_register_static(&host_arm_cpu_type_info);
+
     return 0;
 }
 
@@ -86,8 +301,16 @@ int kvm_arch_init_vcpu(CPUState *cs)
     struct kvm_reg_list *rlp;
     ARMCPU *cpu = ARM_CPU(cs);
 
-    init.target = KVM_ARM_TARGET_CORTEX_A15;
+    if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE) {
+        fprintf(stderr, "KVM is not supported for this guest CPU type\n");
+        return -EINVAL;
+    }
+
+    init.target = cpu->kvm_target;
     memset(init.features, 0, sizeof(init.features));
+    if (cpu->start_powered_off) {
+        init.features[0] = 1 << KVM_ARM_VCPU_POWER_OFF;
+    }
     ret = kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
     if (ret) {
         return ret;
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index 5d14887e66..cd3d13ca2d 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -62,4 +62,59 @@ bool write_list_to_kvmstate(ARMCPU *cpu);
  */
 bool write_kvmstate_to_list(ARMCPU *cpu);
 
+#ifdef CONFIG_KVM
+/**
+ * kvm_arm_create_scratch_host_vcpu:
+ * @cpus_to_try: array of QEMU_KVM_ARM_TARGET_* values (terminated with
+ * QEMU_KVM_ARM_TARGET_NONE) to try as fallback if the kernel does not
+ * know the PREFERRED_TARGET ioctl
+ * @fdarray: filled in with kvmfd, vmfd, cpufd file descriptors in that order
+ * @init: filled in with the necessary values for creating a host vcpu
+ *
+ * Create a scratch vcpu in its own VM of the type preferred by the host
+ * kernel (as would be used for '-cpu host'), for purposes of probing it
+ * for capabilities.
+ *
+ * Returns: true on success (and fdarray and init are filled in),
+ * false on failure (and fdarray and init are not valid).
+ */
+bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
+                                      int *fdarray,
+                                      struct kvm_vcpu_init *init);
+
+/**
+ * kvm_arm_destroy_scratch_host_vcpu:
+ * @fdarray: array of fds as set up by kvm_arm_create_scratch_host_vcpu
+ *
+ * Tear down the scratch vcpu created by kvm_arm_create_scratch_host_vcpu.
+ */
+void kvm_arm_destroy_scratch_host_vcpu(int *fdarray);
+
+#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
+#define ARM_HOST_CPU_CLASS(klass) \
+    OBJECT_CLASS_CHECK(ARMHostCPUClass, (klass), TYPE_ARM_HOST_CPU)
+#define ARM_HOST_CPU_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(ARMHostCPUClass, (obj), TYPE_ARM_HOST_CPU)
+
+typedef struct ARMHostCPUClass {
+    /*< private >*/
+    ARMCPUClass parent_class;
+    /*< public >*/
+
+    uint64_t features;
+    uint32_t target;
+    const char *dtb_compatible;
+} ARMHostCPUClass;
+
+/**
+ * kvm_arm_get_host_cpu_features:
+ * @ahcc: ARMHostCPUClass to fill in
+ *
+ * Probe the capabilities of the host kernel's preferred CPU and fill
+ * in the ARMHostCPUClass struct accordingly.
+ */
+bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc);
+
+#endif
+
 #endif
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 5f003e785e..8c479ff9a8 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -825,63 +825,57 @@ static inline void store_reg_from_load(CPUARMState *env, DisasContext *s,
  * extended if we're a 64 bit core) and  data is also
  * 32 bits unless specifically doing a 64 bit access.
  * These functions work like tcg_gen_qemu_{ld,st}* except
- * that their arguments are TCGv_i32 rather than TCGv.
+ * that the address argument is TCGv_i32 rather than TCGv.
  */
 #if TARGET_LONG_BITS == 32
 
-#define DO_GEN_LD(OP)                                                    \
-static inline void gen_aa32_##OP(TCGv_i32 val, TCGv_i32 addr, int index) \
+#define DO_GEN_LD(SUFF, OPC)                                             \
+static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
 {                                                                        \
-    tcg_gen_qemu_##OP(val, addr, index);                                 \
+    tcg_gen_qemu_ld_i32(val, addr, index, OPC);                          \
 }
 
-#define DO_GEN_ST(OP)                                                    \
-static inline void gen_aa32_##OP(TCGv_i32 val, TCGv_i32 addr, int index) \
+#define DO_GEN_ST(SUFF, OPC)                                             \
+static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
 {                                                                        \
-    tcg_gen_qemu_##OP(val, addr, index);                                 \
+    tcg_gen_qemu_st_i32(val, addr, index, OPC);                          \
 }
 
 static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
 {
-    tcg_gen_qemu_ld64(val, addr, index);
+    tcg_gen_qemu_ld_i64(val, addr, index, MO_TEQ);
 }
 
 static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
 {
-    tcg_gen_qemu_st64(val, addr, index);
+    tcg_gen_qemu_st_i64(val, addr, index, MO_TEQ);
 }
 
 #else
 
-#define DO_GEN_LD(OP)                                                    \
-static inline void gen_aa32_##OP(TCGv_i32 val, TCGv_i32 addr, int index) \
+#define DO_GEN_LD(SUFF, OPC)                                             \
+static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
 {                                                                        \
     TCGv addr64 = tcg_temp_new();                                        \
-    TCGv val64 = tcg_temp_new();                                         \
     tcg_gen_extu_i32_i64(addr64, addr);                                  \
-    tcg_gen_qemu_##OP(val64, addr64, index);                             \
+    tcg_gen_qemu_ld_i32(val, addr64, index, OPC);                        \
     tcg_temp_free(addr64);                                               \
-    tcg_gen_trunc_i64_i32(val, val64);                                   \
-    tcg_temp_free(val64);                                                \
 }
 
-#define DO_GEN_ST(OP)                                                    \
-static inline void gen_aa32_##OP(TCGv_i32 val, TCGv_i32 addr, int index) \
+#define DO_GEN_ST(SUFF, OPC)                                             \
+static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
 {                                                                        \
     TCGv addr64 = tcg_temp_new();                                        \
-    TCGv val64 = tcg_temp_new();                                         \
     tcg_gen_extu_i32_i64(addr64, addr);                                  \
-    tcg_gen_extu_i32_i64(val64, val);                                    \
-    tcg_gen_qemu_##OP(val64, addr64, index);                             \
+    tcg_gen_qemu_st_i32(val, addr64, index, OPC);                        \
     tcg_temp_free(addr64);                                               \
-    tcg_temp_free(val64);                                                \
 }
 
 static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
 {
     TCGv addr64 = tcg_temp_new();
     tcg_gen_extu_i32_i64(addr64, addr);
-    tcg_gen_qemu_ld64(val, addr64, index);
+    tcg_gen_qemu_ld_i64(val, addr64, index, MO_TEQ);
     tcg_temp_free(addr64);
 }
 
@@ -889,20 +883,20 @@ static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
 {
     TCGv addr64 = tcg_temp_new();
     tcg_gen_extu_i32_i64(addr64, addr);
-    tcg_gen_qemu_st64(val, addr64, index);
+    tcg_gen_qemu_st_i64(val, addr64, index, MO_TEQ);
     tcg_temp_free(addr64);
 }
 
 #endif
 
-DO_GEN_LD(ld8s)
-DO_GEN_LD(ld8u)
-DO_GEN_LD(ld16s)
-DO_GEN_LD(ld16u)
-DO_GEN_LD(ld32u)
-DO_GEN_ST(st8)
-DO_GEN_ST(st16)
-DO_GEN_ST(st32)
+DO_GEN_LD(8s, MO_SB)
+DO_GEN_LD(8u, MO_UB)
+DO_GEN_LD(16s, MO_TESW)
+DO_GEN_LD(16u, MO_TEUW)
+DO_GEN_LD(32u, MO_TEUL)
+DO_GEN_ST(8, MO_UB)
+DO_GEN_ST(16, MO_TEUW)
+DO_GEN_ST(32, MO_TEUL)
 
 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 {
@@ -2614,6 +2608,189 @@ static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size)
     return tmp;
 }
 
+static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
+                       uint32_t dp)
+{
+    uint32_t cc = extract32(insn, 20, 2);
+
+    if (dp) {
+        TCGv_i64 frn, frm, dest;
+        TCGv_i64 tmp, zero, zf, nf, vf;
+
+        zero = tcg_const_i64(0);
+
+        frn = tcg_temp_new_i64();
+        frm = tcg_temp_new_i64();
+        dest = tcg_temp_new_i64();
+
+        zf = tcg_temp_new_i64();
+        nf = tcg_temp_new_i64();
+        vf = tcg_temp_new_i64();
+
+        tcg_gen_extu_i32_i64(zf, cpu_ZF);
+        tcg_gen_ext_i32_i64(nf, cpu_NF);
+        tcg_gen_ext_i32_i64(vf, cpu_VF);
+
+        tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
+        tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
+        switch (cc) {
+        case 0: /* eq: Z */
+            tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
+                                frn, frm);
+            break;
+        case 1: /* vs: V */
+            tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
+                                frn, frm);
+            break;
+        case 2: /* ge: N == V -> N ^ V == 0 */
+            tmp = tcg_temp_new_i64();
+            tcg_gen_xor_i64(tmp, vf, nf);
+            tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
+                                frn, frm);
+            tcg_temp_free_i64(tmp);
+            break;
+        case 3: /* gt: !Z && N == V */
+            tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
+                                frn, frm);
+            tmp = tcg_temp_new_i64();
+            tcg_gen_xor_i64(tmp, vf, nf);
+            tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
+                                dest, frm);
+            tcg_temp_free_i64(tmp);
+            break;
+        }
+        tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
+        tcg_temp_free_i64(frn);
+        tcg_temp_free_i64(frm);
+        tcg_temp_free_i64(dest);
+
+        tcg_temp_free_i64(zf);
+        tcg_temp_free_i64(nf);
+        tcg_temp_free_i64(vf);
+
+        tcg_temp_free_i64(zero);
+    } else {
+        TCGv_i32 frn, frm, dest;
+        TCGv_i32 tmp, zero;
+
+        zero = tcg_const_i32(0);
+
+        frn = tcg_temp_new_i32();
+        frm = tcg_temp_new_i32();
+        dest = tcg_temp_new_i32();
+        tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
+        tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
+        switch (cc) {
+        case 0: /* eq: Z */
+            tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
+                                frn, frm);
+            break;
+        case 1: /* vs: V */
+            tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
+                                frn, frm);
+            break;
+        case 2: /* ge: N == V -> N ^ V == 0 */
+            tmp = tcg_temp_new_i32();
+            tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+            tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
+                                frn, frm);
+            tcg_temp_free_i32(tmp);
+            break;
+        case 3: /* gt: !Z && N == V */
+            tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
+                                frn, frm);
+            tmp = tcg_temp_new_i32();
+            tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+            tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
+                                dest, frm);
+            tcg_temp_free_i32(tmp);
+            break;
+        }
+        tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
+        tcg_temp_free_i32(frn);
+        tcg_temp_free_i32(frm);
+        tcg_temp_free_i32(dest);
+
+        tcg_temp_free_i32(zero);
+    }
+
+    return 0;
+}
+
+static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
+                            uint32_t rm, uint32_t dp)
+{
+    uint32_t vmin = extract32(insn, 6, 1);
+    TCGv_ptr fpst = get_fpstatus_ptr(0);
+
+    if (dp) {
+        TCGv_i64 frn, frm, dest;
+
+        frn = tcg_temp_new_i64();
+        frm = tcg_temp_new_i64();
+        dest = tcg_temp_new_i64();
+
+        tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
+        tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
+        if (vmin) {
+            gen_helper_vfp_minnmd(dest, frn, frm, fpst);
+        } else {
+            gen_helper_vfp_maxnmd(dest, frn, frm, fpst);
+        }
+        tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
+        tcg_temp_free_i64(frn);
+        tcg_temp_free_i64(frm);
+        tcg_temp_free_i64(dest);
+    } else {
+        TCGv_i32 frn, frm, dest;
+
+        frn = tcg_temp_new_i32();
+        frm = tcg_temp_new_i32();
+        dest = tcg_temp_new_i32();
+
+        tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
+        tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
+        if (vmin) {
+            gen_helper_vfp_minnms(dest, frn, frm, fpst);
+        } else {
+            gen_helper_vfp_maxnms(dest, frn, frm, fpst);
+        }
+        tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
+        tcg_temp_free_i32(frn);
+        tcg_temp_free_i32(frm);
+        tcg_temp_free_i32(dest);
+    }
+
+    tcg_temp_free_ptr(fpst);
+    return 0;
+}
+
+static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
+{
+    uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
+
+    if (!arm_feature(env, ARM_FEATURE_V8)) {
+        return 1;
+    }
+
+    if (dp) {
+        VFP_DREG_D(rd, insn);
+        VFP_DREG_N(rn, insn);
+        VFP_DREG_M(rm, insn);
+    } else {
+        rd = VFP_SREG_D(insn);
+        rn = VFP_SREG_N(insn);
+        rm = VFP_SREG_M(insn);
+    }
+
+    if ((insn & 0x0f800e50) == 0x0e000a00) {
+        return handle_vsel(insn, rd, rn, rm, dp);
+    } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
+        return handle_vminmaxnm(insn, rd, rn, rm, dp);
+    }
+    return 1;
+}
+
 /* Disassemble a VFP instruction.  Returns nonzero if an error occurred
    (ie. an undefined instruction).  */
 static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
@@ -2636,6 +2813,14 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
             && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
             return 1;
     }
+
+    if (extract32(insn, 28, 4) == 0xf) {
+        /* Encodings with T=1 (Thumb) or unconditional (ARM):
+         * only used in v8 and above.
+         */
+        return disas_vfp_v8_insn(env, s, insn);
+    }
+
     dp = ((insn & 0xf00) == 0xb00);
     switch ((insn >> 24) & 0xf) {
     case 0xe:
@@ -4362,7 +4547,7 @@ static void gen_neon_narrow_op(int op, int u, int size,
 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
-#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */
+#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
 
 static const uint8_t neon_3r_sizes[] = {
     [NEON_3R_VHADD] = 0x7,
@@ -4395,7 +4580,7 @@ static const uint8_t neon_3r_sizes[] = {
     [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
     [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
     [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
-    [NEON_3R_VRECPS_VRSQRTS] = 0x5, /* size bit 1 encodes op */
+    [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
 };
 
 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
@@ -4656,8 +4841,9 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                 return 1;
             }
             break;
-        case NEON_3R_VRECPS_VRSQRTS:
-            if (u) {
+        case NEON_3R_FLOAT_MISC:
+            /* VMAXNM/VMINNM in ARMv8 */
+            if (u && !arm_feature(env, ARM_FEATURE_V8)) {
                 return 1;
             }
             break;
@@ -4946,11 +5132,23 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
             tcg_temp_free_ptr(fpstatus);
             break;
         }
-        case NEON_3R_VRECPS_VRSQRTS:
-            if (size == 0)
-                gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
-            else
-                gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
+        case NEON_3R_FLOAT_MISC:
+            if (u) {
+                /* VMAXNM/VMINNM */
+                TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                if (size == 0) {
+                    gen_helper_vfp_maxnms(tmp, tmp, tmp2, fpstatus);
+                } else {
+                    gen_helper_vfp_minnms(tmp, tmp, tmp2, fpstatus);
+                }
+                tcg_temp_free_ptr(fpstatus);
+            } else {
+                if (size == 0) {
+                    gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
+                } else {
+                    gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
+              }
+            }
             break;
         case NEON_3R_VFM:
         {
@@ -6296,9 +6494,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
 	    return disas_dsp_insn(env, s, insn);
 	}
 	return 1;
-    case 10:
-    case 11:
-	return disas_vfp_insn (env, s, insn);
     default:
         break;
     }
@@ -6753,6 +6948,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                 goto illegal_op;
             return;
         }
+        if ((insn & 0x0f000e10) == 0x0e000a00) {
+            /* VFP.  */
+            if (disas_vfp_insn(env, s, insn)) {
+                goto illegal_op;
+            }
+            return;
+        }
         if (((insn & 0x0f30f000) == 0x0510f000) ||
             ((insn & 0x0f30f010) == 0x0710f000)) {
             if ((insn & (1 << 22)) == 0) {
@@ -8033,9 +8235,15 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
         case 0xc:
         case 0xd:
         case 0xe:
-            /* Coprocessor.  */
-            if (disas_coproc_insn(env, s, insn))
+            if (((insn >> 8) & 0xe) == 10) {
+                /* VFP.  */
+                if (disas_vfp_insn(env, s, insn)) {
+                    goto illegal_op;
+                }
+            } else if (disas_coproc_insn(env, s, insn)) {
+                /* Coprocessor.  */
                 goto illegal_op;
+            }
             break;
         case 0xf:
             /* swi */
@@ -8765,6 +8973,10 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
             if (disas_neon_data_insn(env, s, insn))
                 goto illegal_op;
+        } else if (((insn >> 8) & 0xe) == 10) {
+            if (disas_vfp_insn(env, s, insn)) {
+                goto illegal_op;
+            }
         } else {
             if (insn & (1 << 28))
                 goto illegal_op;