summary refs log tree commit diff stats
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-01-18 11:46:27 +0000
committerPeter Maydell <peter.maydell@linaro.org>2018-01-18 11:46:27 +0000
commit6e03cc5cf0dac9ec40dce7e3500b442761bc8e96 (patch)
treebd3c87b8fa5fe429916f73cc0d61f9e8e3a104d8 /hw
parentae7313e7fd73a6221c1c8b1bc862ded53de6a174 (diff)
parent2e569845bd314fc1dde83d65dc9b87e71b4d29b4 (diff)
downloadfocaccia-qemu-6e03cc5cf0dac9ec40dce7e3500b442761bc8e96.tar.gz
focaccia-qemu-6e03cc5cf0dac9ec40dce7e3500b442761bc8e96.zip
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.12-20180117' into staging
ppc patch queue 2017-01-17

Another pull request for ppc related patches.  The most interesting
thing here is the new capabilities framework for the pseries machine
type.  This gives us better handling of several existing
incompatibilities between TCG, PR and HV KVM, as well as new ones that
arise with POWER9.  Further, it will allow reasonable handling of the
advertisement of features necessary to mitigate the recent CVEs
(Spectre and Meltdown).

In addition there's:
     * Improvide handling of different "vsmt" modes
     * Significant enhancements to the "pnv" machine type
     * Assorted other bugfixes

# gpg: Signature made Wed 17 Jan 2018 02:21:50 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.12-20180117: (22 commits)
  target-ppc: Fix booke206 tlbwe TLB instruction
  target/ppc: add support for POWER9 HILE
  ppc/pnv: change initrd address
  ppc/pnv: fix XSCOM core addressing on POWER9
  ppc/pnv: introduce pnv*_is_power9() helpers
  ppc/pnv: change core mask for POWER9
  ppc/pnv: use POWER9 DD2 processor
  tests/boot-serial-test: fix powernv support
  ppc/pnv: Update skiboot firmware image
  spapr: Adjust default VSMT value for better migration compatibility
  spapr: Allow some cases where we can't set VSMT mode in the kernel
  target/ppc: Clarify compat mode max_threads value
  ppc: Change Power9 compat table to support at most 8 threads/core
  spapr: Remove unnecessary 'options' field from sPAPRCapabilityInfo
  hw/ppc/spapr_caps: Rework spapr_caps to use uint8 internal representation
  spapr: Handle Decimal Floating Point (DFP) as an optional capability
  spapr: Handle VMX/VSX presence as an spapr capability flag
  target/ppc: Clean up probing of VMX, VSX and DFP availability on KVM
  spapr: Validate capabilities on migration
  spapr: Treat Hardware Transactional Memory (HTM) as an optional capability
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/ppc/Makefile.objs2
-rw-r--r--hw/ppc/pnv.c23
-rw-r--r--hw/ppc/pnv_core.c2
-rw-r--r--hw/ppc/pnv_xscom.c8
-rw-r--r--hw/ppc/spapr.c115
-rw-r--r--hw/ppc/spapr_caps.c345
6 files changed, 452 insertions, 43 deletions
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 7efc686748..1faff853b7 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -1,7 +1,7 @@
 # shared objects
 obj-y += ppc.o ppc_booke.o fdt.o
 # IBM pSeries (sPAPR)
-obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
+obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
 obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 9475e8479c..98ee3c607a 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -53,7 +53,7 @@
 #define FW_MAX_SIZE             0x00400000
 
 #define KERNEL_LOAD_ADDR        0x20000000
-#define INITRD_LOAD_ADDR        0x40000000
+#define INITRD_LOAD_ADDR        0x60000000
 
 static const char *pnv_chip_core_typename(const PnvChip *o)
 {
@@ -707,9 +707,9 @@ static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id)
 #define POWER8_CORE_MASK   (0x7e7eull)
 
 /*
- * POWER9 has 24 cores, ids starting at 0x20
+ * POWER9 has 24 cores, ids starting at 0x0
  */
-#define POWER9_CORE_MASK   (0xffffff00000000ull)
+#define POWER9_CORE_MASK   (0xffffffffffffffull)
 
 static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
 {
@@ -721,7 +721,6 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
     k->cores_mask = POWER8E_CORE_MASK;
     k->core_pir = pnv_chip_core_pir_p8;
     k->xscom_base = 0x003fc0000000000ull;
-    k->xscom_core_base = 0x10000000ull;
     dc->desc = "PowerNV Chip POWER8E";
 }
 
@@ -735,7 +734,6 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data)
     k->cores_mask = POWER8_CORE_MASK;
     k->core_pir = pnv_chip_core_pir_p8;
     k->xscom_base = 0x003fc0000000000ull;
-    k->xscom_core_base = 0x10000000ull;
     dc->desc = "PowerNV Chip POWER8";
 }
 
@@ -749,7 +747,6 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
     k->cores_mask = POWER8_CORE_MASK;
     k->core_pir = pnv_chip_core_pir_p8;
     k->xscom_base = 0x003fc0000000000ull;
-    k->xscom_core_base = 0x10000000ull;
     dc->desc = "PowerNV Chip POWER8NVL";
 }
 
@@ -759,11 +756,10 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
     PnvChipClass *k = PNV_CHIP_CLASS(klass);
 
     k->chip_type = PNV_CHIP_POWER9;
-    k->chip_cfam_id = 0x100d104980000000ull; /* P9 Nimbus DD1.0 */
+    k->chip_cfam_id = 0x220d104900008000ull; /* P9 Nimbus DD2.0 */
     k->cores_mask = POWER9_CORE_MASK;
     k->core_pir = pnv_chip_core_pir_p9;
     k->xscom_base = 0x00603fc00000000ull;
-    k->xscom_core_base = 0x0ull;
     dc->desc = "PowerNV Chip POWER9";
 }
 
@@ -887,6 +883,7 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp)
              && (i < chip->nr_cores); core_hwid++) {
         char core_name[32];
         void *pnv_core = chip->cores + i * typesize;
+        uint64_t xscom_core_base;
 
         if (!(chip->cores_mask & (1ull << core_hwid))) {
             continue;
@@ -910,9 +907,13 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp)
         object_unref(OBJECT(pnv_core));
 
         /* Each core has an XSCOM MMIO region */
-        pnv_xscom_add_subregion(chip,
-                                PNV_XSCOM_EX_CORE_BASE(pcc->xscom_core_base,
-                                                       core_hwid),
+        if (!pnv_chip_is_power9(chip)) {
+            xscom_core_base = PNV_XSCOM_EX_BASE(core_hwid);
+        } else {
+            xscom_core_base = PNV_XSCOM_P9_EC_BASE(core_hwid);
+        }
+
+        pnv_xscom_add_subregion(chip, xscom_core_base,
                                 &PNV_CORE(pnv_core)->xscom_regs);
         i++;
     }
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 7e8a76df44..cbb64ad9e7 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -192,7 +192,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
 
     snprintf(name, sizeof(name), "xscom-core.%d", cc->core_id);
     pnv_xscom_region_init(&pc->xscom_regs, OBJECT(dev), &pnv_core_xscom_ops,
-                          pc, name, PNV_XSCOM_EX_CORE_SIZE);
+                          pc, name, PNV_XSCOM_EX_SIZE);
     return;
 
 err:
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
index e51d634f40..99c40efecd 100644
--- a/hw/ppc/pnv_xscom.c
+++ b/hw/ppc/pnv_xscom.c
@@ -51,10 +51,9 @@ static void xscom_complete(CPUState *cs, uint64_t hmer_bits)
 
 static uint32_t pnv_xscom_pcba(PnvChip *chip, uint64_t addr)
 {
-    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
-
     addr &= (PNV_XSCOM_SIZE - 1);
-    if (pcc->chip_type == PNV_CHIP_POWER9) {
+
+    if (pnv_chip_is_power9(chip)) {
         return addr >> 3;
     } else {
         return ((addr >> 4) & ~0xfull) | ((addr >> 3) & 0xf);
@@ -231,7 +230,6 @@ int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset)
     int xscom_offset;
     ForeachPopulateArgs args;
     char *name;
-    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
 
     name = g_strdup_printf("xscom@%" PRIx64, be64_to_cpu(reg[0]));
     xscom_offset = fdt_add_subnode(fdt, root_offset, name);
@@ -242,7 +240,7 @@ int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset)
     _FDT((fdt_setprop_cell(fdt, xscom_offset, "#size-cells", 1)));
     _FDT((fdt_setprop(fdt, xscom_offset, "reg", reg, sizeof(reg))));
 
-    if (pcc->chip_type == PNV_CHIP_POWER9) {
+    if (pnv_chip_is_power9(chip)) {
         _FDT((fdt_setprop(fdt, xscom_offset, "compatible", compat_p9,
                           sizeof(compat_p9))));
     } else {
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index dfd352c473..499ab647d8 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -253,7 +253,9 @@ static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu)
 }
 
 /* Populate the "ibm,pa-features" property */
-static void spapr_populate_pa_features(PowerPCCPU *cpu, void *fdt, int offset,
+static void spapr_populate_pa_features(sPAPRMachineState *spapr,
+                                       PowerPCCPU *cpu,
+                                       void *fdt, int offset,
                                        bool legacy_guest)
 {
     CPUPPCState *env = &cpu->env;
@@ -318,7 +320,7 @@ static void spapr_populate_pa_features(PowerPCCPU *cpu, void *fdt, int offset,
          */
         pa_features[3] |= 0x20;
     }
-    if (kvmppc_has_cap_htm() && pa_size > 24) {
+    if ((spapr_get_cap(spapr, SPAPR_CAP_HTM) != 0) && pa_size > 24) {
         pa_features[24] |= 0x80;    /* Transactional memory support */
     }
     if (legacy_guest && pa_size > 40) {
@@ -343,7 +345,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
         PowerPCCPU *cpu = POWERPC_CPU(cs);
         DeviceClass *dc = DEVICE_GET_CLASS(cs);
         int index = spapr_vcpu_id(cpu);
-        int compat_smt = MIN(smp_threads, ppc_compat_max_threads(cpu));
+        int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu));
 
         if ((index % smt) != 0) {
             continue;
@@ -384,8 +386,8 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
             return ret;
         }
 
-        spapr_populate_pa_features(cpu, fdt, offset,
-                                         spapr->cas_legacy_guest_workaround);
+        spapr_populate_pa_features(spapr, cpu, fdt, offset,
+                                   spapr->cas_legacy_guest_workaround);
     }
     return ret;
 }
@@ -501,7 +503,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
     size_t page_sizes_prop_size;
     uint32_t vcpus_per_socket = smp_threads * smp_cores;
     uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
-    int compat_smt = MIN(smp_threads, ppc_compat_max_threads(cpu));
+    int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu));
     sPAPRDRConnector *drc;
     int drc_index;
     uint32_t radix_AP_encodings[PPC_PAGE_SIZES_MAX_SZ];
@@ -555,20 +557,22 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
                           segs, sizeof(segs))));
     }
 
-    /* Advertise VMX/VSX (vector extensions) if available
-     *   0 / no property == no vector extensions
+    /* Advertise VSX (vector extensions) if available
      *   1               == VMX / Altivec available
-     *   2               == VSX available */
-    if (env->insns_flags & PPC_ALTIVEC) {
-        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
-
-        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
+     *   2               == VSX available
+     *
+     * Only CPUs for which we create core types in spapr_cpu_core.c
+     * are possible, and all of those have VMX */
+    if (spapr_get_cap(spapr, SPAPR_CAP_VSX) != 0) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 2)));
+    } else {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 1)));
     }
 
     /* Advertise DFP (Decimal Floating Point) if available
      *   0 / no property == no DFP
      *   1               == DFP available */
-    if (env->insns_flags2 & PPC2_DFP) {
+    if (spapr_get_cap(spapr, SPAPR_CAP_DFP) != 0) {
         _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
     }
 
@@ -579,7 +583,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
                           page_sizes_prop, page_sizes_prop_size)));
     }
 
-    spapr_populate_pa_features(cpu, fdt, offset, false);
+    spapr_populate_pa_features(spapr, cpu, fdt, offset, false);
 
     _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
                            cs->cpu_index / vcpus_per_socket)));
@@ -1466,6 +1470,8 @@ static void spapr_machine_reset(void)
     /* Check for unknown sysbus devices */
     foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
 
+    spapr_caps_reset(spapr);
+
     first_ppc_cpu = POWERPC_CPU(first_cpu);
     if (kvm_enabled() && kvmppc_has_cap_mmu_radix() &&
         ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0,
@@ -1580,11 +1586,28 @@ static bool spapr_vga_init(PCIBus *pci_bus, Error **errp)
     }
 }
 
+static int spapr_pre_load(void *opaque)
+{
+    int rc;
+
+    rc = spapr_caps_pre_load(opaque);
+    if (rc) {
+        return rc;
+    }
+
+    return 0;
+}
+
 static int spapr_post_load(void *opaque, int version_id)
 {
     sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
     int err = 0;
 
+    err = spapr_caps_post_migration(spapr);
+    if (err) {
+        return err;
+    }
+
     if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
         CPUState *cs;
         CPU_FOREACH(cs) {
@@ -1616,6 +1639,18 @@ static int spapr_post_load(void *opaque, int version_id)
     return err;
 }
 
+static int spapr_pre_save(void *opaque)
+{
+    int rc;
+
+    rc = spapr_caps_pre_save(opaque);
+    if (rc) {
+        return rc;
+    }
+
+    return 0;
+}
+
 static bool version_before_3(void *opaque, int version_id)
 {
     return version_id < 3;
@@ -1736,7 +1771,9 @@ static const VMStateDescription vmstate_spapr = {
     .name = "spapr",
     .version_id = 3,
     .minimum_version_id = 1,
+    .pre_load = spapr_pre_load,
     .post_load = spapr_post_load,
+    .pre_save = spapr_pre_save,
     .fields = (VMStateField[]) {
         /* used to be @next_irq */
         VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
@@ -1751,6 +1788,9 @@ static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_ov5_cas,
         &vmstate_spapr_patb_entry,
         &vmstate_spapr_pending_events,
+        &vmstate_spapr_cap_htm,
+        &vmstate_spapr_cap_vsx,
+        &vmstate_spapr_cap_dfp,
         NULL
     }
 };
@@ -2265,26 +2305,43 @@ static void spapr_set_vsmt_mode(sPAPRMachineState *spapr, Error **errp)
         }
         /* In this case, spapr->vsmt has been set by the command line */
     } else {
-        /* Choose a VSMT mode that may be higher than necessary but is
-         * likely to be compatible with hosts that don't have VSMT. */
-        spapr->vsmt = MAX(kvm_smt, smp_threads);
+        /*
+         * Default VSMT value is tricky, because we need it to be as
+         * consistent as possible (for migration), but this requires
+         * changing it for at least some existing cases.  We pick 8 as
+         * the value that we'd get with KVM on POWER8, the
+         * overwhelmingly common case in production systems.
+         */
+        spapr->vsmt = 8;
     }
 
     /* KVM: If necessary, set the SMT mode: */
     if (kvm_enabled() && (spapr->vsmt != kvm_smt)) {
         ret = kvmppc_set_smt_threads(spapr->vsmt);
         if (ret) {
+            /* Looks like KVM isn't able to change VSMT mode */
             error_setg(&local_err,
                        "Failed to set KVM's VSMT mode to %d (errno %d)",
                        spapr->vsmt, ret);
-            if (!vsmt_user) {
-                error_append_hint(&local_err, "On PPC, a VM with %d threads/"
-                             "core on a host with %d threads/core requires "
-                             " the use of VSMT mode %d.\n",
-                             smp_threads, kvm_smt, spapr->vsmt);
+            /* We can live with that if the default one is big enough
+             * for the number of threads, and a submultiple of the one
+             * we want.  In this case we'll waste some vcpu ids, but
+             * behaviour will be correct */
+            if ((kvm_smt >= smp_threads) && ((spapr->vsmt % kvm_smt) == 0)) {
+                warn_report_err(local_err);
+                local_err = NULL;
+                goto out;
+            } else {
+                if (!vsmt_user) {
+                    error_append_hint(&local_err,
+                                      "On PPC, a VM with %d threads/core"
+                                      " on a host with %d threads/core"
+                                      " requires the use of VSMT mode %d.\n",
+                                      smp_threads, kvm_smt, spapr->vsmt);
+                }
+                kvmppc_hint_smt_possible(&local_err);
+                goto out;
             }
-            kvmppc_hint_smt_possible(&local_err);
-            goto out;
         }
     }
     /* else TCG: nothing to do currently */
@@ -3819,6 +3876,11 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
      * in which LMBs are represented and hot-added
      */
     mc->numa_mem_align_shift = 28;
+
+    smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON;
+    smc->default_caps.caps[SPAPR_CAP_DFP] = SPAPR_CAP_ON;
+    spapr_caps_add_properties(smc, &error_abort);
 }
 
 static const TypeInfo spapr_machine_info = {
@@ -3896,7 +3958,10 @@ static void spapr_machine_2_11_instance_options(MachineState *machine)
 
 static void spapr_machine_2_11_class_options(MachineClass *mc)
 {
+    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
     spapr_machine_2_12_class_options(mc);
+    smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON;
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_11);
 }
 
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
new file mode 100644
index 0000000000..d5c9ce774a
--- /dev/null
+++ b/hw/ppc/spapr_caps.c
@@ -0,0 +1,345 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition capabilities handling
+ *
+ * Copyright (c) 2017 David Gibson, Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "sysemu/hw_accel.h"
+#include "target/ppc/cpu.h"
+#include "cpu-models.h"
+#include "kvm_ppc.h"
+
+#include "hw/ppc/spapr.h"
+
+typedef struct sPAPRCapabilityInfo {
+    const char *name;
+    const char *description;
+    int index;
+
+    /* Getter and Setter Function Pointers */
+    ObjectPropertyAccessor *get;
+    ObjectPropertyAccessor *set;
+    const char *type;
+    /* Make sure the virtual hardware can support this capability */
+    void (*apply)(sPAPRMachineState *spapr, uint8_t val, Error **errp);
+} sPAPRCapabilityInfo;
+
+static void spapr_cap_get_bool(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    sPAPRCapabilityInfo *cap = opaque;
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+    bool value = spapr_get_cap(spapr, cap->index) == SPAPR_CAP_ON;
+
+    visit_type_bool(v, name, &value, errp);
+}
+
+static void spapr_cap_set_bool(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    sPAPRCapabilityInfo *cap = opaque;
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+    bool value;
+    Error *local_err = NULL;
+
+    visit_type_bool(v, name, &value, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    spapr->cmd_line_caps[cap->index] = true;
+    spapr->eff.caps[cap->index] = value ? SPAPR_CAP_ON : SPAPR_CAP_OFF;
+}
+
+static void cap_htm_apply(sPAPRMachineState *spapr, uint8_t val, Error **errp)
+{
+    if (!val) {
+        /* TODO: We don't support disabling htm yet */
+        return;
+    }
+    if (tcg_enabled()) {
+        error_setg(errp,
+                   "No Transactional Memory support in TCG, try cap-htm=off");
+    } else if (kvm_enabled() && !kvmppc_has_cap_htm()) {
+        error_setg(errp,
+"KVM implementation does not support Transactional Memory, try cap-htm=off"
+            );
+    }
+}
+
+static void cap_vsx_apply(sPAPRMachineState *spapr, uint8_t val, Error **errp)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+    CPUPPCState *env = &cpu->env;
+
+    if (!val) {
+        /* TODO: We don't support disabling vsx yet */
+        return;
+    }
+    /* Allowable CPUs in spapr_cpu_core.c should already have gotten
+     * rid of anything that doesn't do VMX */
+    g_assert(env->insns_flags & PPC_ALTIVEC);
+    if (!(env->insns_flags2 & PPC2_VSX)) {
+        error_setg(errp, "VSX support not available, try cap-vsx=off");
+    }
+}
+
+static void cap_dfp_apply(sPAPRMachineState *spapr, uint8_t val, Error **errp)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+    CPUPPCState *env = &cpu->env;
+
+    if (!val) {
+        /* TODO: We don't support disabling dfp yet */
+        return;
+    }
+    if (!(env->insns_flags2 & PPC2_DFP)) {
+        error_setg(errp, "DFP support not available, try cap-dfp=off");
+    }
+}
+
+
+sPAPRCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
+    [SPAPR_CAP_HTM] = {
+        .name = "htm",
+        .description = "Allow Hardware Transactional Memory (HTM)",
+        .index = SPAPR_CAP_HTM,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_htm_apply,
+    },
+    [SPAPR_CAP_VSX] = {
+        .name = "vsx",
+        .description = "Allow Vector Scalar Extensions (VSX)",
+        .index = SPAPR_CAP_VSX,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_vsx_apply,
+    },
+    [SPAPR_CAP_DFP] = {
+        .name = "dfp",
+        .description = "Allow Decimal Floating Point (DFP)",
+        .index = SPAPR_CAP_DFP,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_dfp_apply,
+    },
+};
+
+static sPAPRCapabilities default_caps_with_cpu(sPAPRMachineState *spapr,
+                                               CPUState *cs)
+{
+    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    sPAPRCapabilities caps;
+
+    caps = smc->default_caps;
+
+    if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_07,
+                          0, spapr->max_compat_pvr)) {
+        caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
+    }
+
+    if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_06,
+                          0, spapr->max_compat_pvr)) {
+        caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_OFF;
+        caps.caps[SPAPR_CAP_DFP] = SPAPR_CAP_OFF;
+    }
+
+    return caps;
+}
+
+int spapr_caps_pre_load(void *opaque)
+{
+    sPAPRMachineState *spapr = opaque;
+
+    /* Set to default so we can tell if this came in with the migration */
+    spapr->mig = spapr->def;
+    return 0;
+}
+
+int spapr_caps_pre_save(void *opaque)
+{
+    sPAPRMachineState *spapr = opaque;
+
+    spapr->mig = spapr->eff;
+    return 0;
+}
+
+/* This has to be called from the top-level spapr post_load, not the
+ * caps specific one.  Otherwise it wouldn't be called when the source
+ * caps are all defaults, which could still conflict with overridden
+ * caps on the destination */
+int spapr_caps_post_migration(sPAPRMachineState *spapr)
+{
+    int i;
+    bool ok = true;
+    sPAPRCapabilities dstcaps = spapr->eff;
+    sPAPRCapabilities srccaps;
+
+    srccaps = default_caps_with_cpu(spapr, first_cpu);
+    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+        /* If not default value then assume came in with the migration */
+        if (spapr->mig.caps[i] != spapr->def.caps[i]) {
+            srccaps.caps[i] = spapr->mig.caps[i];
+        }
+    }
+
+    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+        sPAPRCapabilityInfo *info = &capability_table[i];
+
+        if (srccaps.caps[i] > dstcaps.caps[i]) {
+            error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)",
+                         info->name, srccaps.caps[i], dstcaps.caps[i]);
+            ok = false;
+        }
+
+        if (srccaps.caps[i] < dstcaps.caps[i]) {
+            warn_report("cap-%s lower level (%d) in incoming stream than on destination (%d)",
+                         info->name, srccaps.caps[i], dstcaps.caps[i]);
+        }
+    }
+
+    return ok ? 0 : -EINVAL;
+}
+
+static bool spapr_cap_htm_needed(void *opaque)
+{
+    sPAPRMachineState *spapr = opaque;
+
+    return spapr->cmd_line_caps[SPAPR_CAP_HTM] &&
+           (spapr->eff.caps[SPAPR_CAP_HTM] != spapr->def.caps[SPAPR_CAP_HTM]);
+}
+
+const VMStateDescription vmstate_spapr_cap_htm = {
+    .name = "spapr/cap/htm",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_cap_htm_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(mig.caps[SPAPR_CAP_HTM], sPAPRMachineState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static bool spapr_cap_vsx_needed(void *opaque)
+{
+    sPAPRMachineState *spapr = opaque;
+
+    return spapr->cmd_line_caps[SPAPR_CAP_VSX] &&
+           (spapr->eff.caps[SPAPR_CAP_VSX] != spapr->def.caps[SPAPR_CAP_VSX]);
+}
+
+const VMStateDescription vmstate_spapr_cap_vsx = {
+    .name = "spapr/cap/vsx",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_cap_vsx_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(mig.caps[SPAPR_CAP_VSX], sPAPRMachineState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static bool spapr_cap_dfp_needed(void *opaque)
+{
+    sPAPRMachineState *spapr = opaque;
+
+    return spapr->cmd_line_caps[SPAPR_CAP_DFP] &&
+           (spapr->eff.caps[SPAPR_CAP_DFP] != spapr->def.caps[SPAPR_CAP_DFP]);
+}
+
+const VMStateDescription vmstate_spapr_cap_dfp = {
+    .name = "spapr/cap/dfp",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_cap_dfp_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(mig.caps[SPAPR_CAP_DFP], sPAPRMachineState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+void spapr_caps_reset(sPAPRMachineState *spapr)
+{
+    sPAPRCapabilities default_caps;
+    int i;
+
+    /* First compute the actual set of caps we're running with.. */
+    default_caps = default_caps_with_cpu(spapr, first_cpu);
+
+    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+        /* Store the defaults */
+        spapr->def.caps[i] = default_caps.caps[i];
+        /* If not set on the command line then apply the default value */
+        if (!spapr->cmd_line_caps[i]) {
+            spapr->eff.caps[i] = default_caps.caps[i];
+        }
+    }
+
+    /* .. then apply those caps to the virtual hardware */
+
+    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+        sPAPRCapabilityInfo *info = &capability_table[i];
+
+        /*
+         * If the apply function can't set the desired level and thinks it's
+         * fatal, it should cause that.
+         */
+        info->apply(spapr, spapr->eff.caps[i], &error_fatal);
+    }
+}
+
+void spapr_caps_add_properties(sPAPRMachineClass *smc, Error **errp)
+{
+    Error *local_err = NULL;
+    ObjectClass *klass = OBJECT_CLASS(smc);
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(capability_table); i++) {
+        sPAPRCapabilityInfo *cap = &capability_table[i];
+        const char *name = g_strdup_printf("cap-%s", cap->name);
+        char *desc;
+
+        object_class_property_add(klass, name, cap->type,
+                                  cap->get, cap->set,
+                                  NULL, cap, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+
+        desc = g_strdup_printf("%s", cap->description);
+        object_class_property_set_description(klass, name, desc, &local_err);
+        g_free(desc);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}