7 files changed, 1266 insertions, 1 deletions
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index a799c83815..f8481d959f 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -7,6 +7,7 @@ common-obj-$(CONFIG_SOFTMMU) += fw-path-provider.o
 common-obj-y += irq.o
 common-obj-y += hotplug.o
 common-obj-$(CONFIG_SOFTMMU) += nmi.o
+common-obj-$(CONFIG_SOFTMMU) += vm-change-state-handler.o
 
 common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o
 common-obj-$(CONFIG_XILINX_AXI) += stream.o
@@ -22,3 +23,7 @@ common-obj-$(CONFIG_SOFTMMU) += split-irq.o
 common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o
 common-obj-$(CONFIG_SOFTMMU) += generic-loader.o
 common-obj-$(CONFIG_SOFTMMU) += null-machine.o
+
+obj-$(CONFIG_SOFTMMU) += machine-qmp-cmds.o
+obj-$(CONFIG_SOFTMMU) += numa.o
+common-obj-$(CONFIG_SOFTMMU) += machine-hmp-cmds.o
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
new file mode 100644
index 0000000000..1f66bda346
--- /dev/null
+++ b/hw/core/machine-hmp-cmds.c
@@ -0,0 +1,167 @@
+/*
+ * HMP commands related to machines and CPUs
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "qapi/error.h"
+#include "qapi/qapi-builtin-visit.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/string-output-visitor.h"
+#include "qemu/error-report.h"
+#include "sysemu/numa.h"
+
+void hmp_info_cpus(Monitor *mon, const QDict *qdict)
+{
+    CpuInfoFastList *cpu_list, *cpu;
+
+    cpu_list = qmp_query_cpus_fast(NULL);
+
+    for (cpu = cpu_list; cpu; cpu = cpu->next) {
+        int active = ' ';
+
+        if (cpu->value->cpu_index == monitor_get_cpu_index()) {
+            active = '*';
+        }
+
+        monitor_printf(mon, "%c CPU #%" PRId64 ":", active,
+                       cpu->value->cpu_index);
+        monitor_printf(mon, " thread_id=%" PRId64 "\n", cpu->value->thread_id);
+    }
+
+    qapi_free_CpuInfoFastList(cpu_list);
+}
+
+void hmp_cpu_add(Monitor *mon, const QDict *qdict)
+{
+    int cpuid;
+    Error *err = NULL;
+
+    error_report("cpu_add is deprecated, please use device_add instead");
+
+    cpuid = qdict_get_int(qdict, "id");
+    qmp_cpu_add(cpuid, &err);
+    hmp_handle_error(mon, &err);
+}
+
+void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    HotpluggableCPUList *l = qmp_query_hotpluggable_cpus(&err);
+    HotpluggableCPUList *saved = l;
+    CpuInstanceProperties *c;
+
+    if (err != NULL) {
+        hmp_handle_error(mon, &err);
+        return;
+    }
+
+    monitor_printf(mon, "Hotpluggable CPUs:\n");
+    while (l) {
+        monitor_printf(mon, "  type: \"%s\"\n", l->value->type);
+        monitor_printf(mon, "  vcpus_count: \"%" PRIu64 "\"\n",
+                       l->value->vcpus_count);
+        if (l->value->has_qom_path) {
+            monitor_printf(mon, "  qom_path: \"%s\"\n", l->value->qom_path);
+        }
+
+        c = l->value->props;
+        monitor_printf(mon, "  CPUInstance Properties:\n");
+        if (c->has_node_id) {
+            monitor_printf(mon, "    node-id: \"%" PRIu64 "\"\n", c->node_id);
+        }
+        if (c->has_socket_id) {
+            monitor_printf(mon, "    socket-id: \"%" PRIu64 "\"\n", c->socket_id);
+        }
+        if (c->has_die_id) {
+            monitor_printf(mon, "    die-id: \"%" PRIu64 "\"\n", c->die_id);
+        }
+        if (c->has_core_id) {
+            monitor_printf(mon, "    core-id: \"%" PRIu64 "\"\n", c->core_id);
+        }
+        if (c->has_thread_id) {
+            monitor_printf(mon, "    thread-id: \"%" PRIu64 "\"\n", c->thread_id);
+        }
+
+        l = l->next;
+    }
+
+    qapi_free_HotpluggableCPUList(saved);
+}
+
+void hmp_info_memdev(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    MemdevList *memdev_list = qmp_query_memdev(&err);
+    MemdevList *m = memdev_list;
+    Visitor *v;
+    char *str;
+
+    while (m) {
+        v = string_output_visitor_new(false, &str);
+        visit_type_uint16List(v, NULL, &m->value->host_nodes, NULL);
+        monitor_printf(mon, "memory backend: %s\n", m->value->id);
+        monitor_printf(mon, "  size:  %" PRId64 "\n", m->value->size);
+        monitor_printf(mon, "  merge: %s\n",
+                       m->value->merge ? "true" : "false");
+        monitor_printf(mon, "  dump: %s\n",
+                       m->value->dump ? "true" : "false");
+        monitor_printf(mon, "  prealloc: %s\n",
+                       m->value->prealloc ? "true" : "false");
+        monitor_printf(mon, "  policy: %s\n",
+                       HostMemPolicy_str(m->value->policy));
+        visit_complete(v, &str);
+        monitor_printf(mon, "  host nodes: %s\n", str);
+
+        g_free(str);
+        visit_free(v);
+        m = m->next;
+    }
+
+    monitor_printf(mon, "\n");
+
+    qapi_free_MemdevList(memdev_list);
+    hmp_handle_error(mon, &err);
+}
+
+void hmp_info_numa(Monitor *mon, const QDict *qdict)
+{
+    int i;
+    NumaNodeMem *node_mem;
+    CpuInfoList *cpu_list, *cpu;
+
+    cpu_list = qmp_query_cpus(&error_abort);
+    node_mem = g_new0(NumaNodeMem, nb_numa_nodes);
+
+    query_numa_node_mem(node_mem);
+    monitor_printf(mon, "%d nodes\n", nb_numa_nodes);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        monitor_printf(mon, "node %d cpus:", i);
+        for (cpu = cpu_list; cpu; cpu = cpu->next) {
+            if (cpu->value->has_props && cpu->value->props->has_node_id &&
+                cpu->value->props->node_id == i) {
+                monitor_printf(mon, " %" PRIi64, cpu->value->CPU);
+            }
+        }
+        monitor_printf(mon, "\n");
+        monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i,
+                       node_mem[i].node_mem >> 20);
+        monitor_printf(mon, "node %d plugged: %" PRId64 " MB\n", i,
+                       node_mem[i].node_plugged_mem >> 20);
+    }
+    qapi_free_CpuInfoList(cpu_list);
+    g_free(node_mem);
+}
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
new file mode 100644
index 0000000000..5bd95b8ab0
--- /dev/null
+++ b/hw/core/machine-qmp-cmds.c
@@ -0,0 +1,330 @@
+/*
+ * QMP commands related to machines and CPUs
+ *
+ * Copyright (C) 2014 Red Hat Inc
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/boards.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/hw_accel.h"
+#include "sysemu/numa.h"
+#include "sysemu/sysemu.h"
+
+CpuInfoList *qmp_query_cpus(Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    CpuInfoList *head = NULL, *cur_item = NULL;
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        CpuInfoList *info;
+#if defined(TARGET_I386)
+        X86CPU *x86_cpu = X86_CPU(cpu);
+        CPUX86State *env = &x86_cpu->env;
+#elif defined(TARGET_PPC)
+        PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
+        CPUPPCState *env = &ppc_cpu->env;
+#elif defined(TARGET_SPARC)
+        SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
+        CPUSPARCState *env = &sparc_cpu->env;
+#elif defined(TARGET_RISCV)
+        RISCVCPU *riscv_cpu = RISCV_CPU(cpu);
+        CPURISCVState *env = &riscv_cpu->env;
+#elif defined(TARGET_MIPS)
+        MIPSCPU *mips_cpu = MIPS_CPU(cpu);
+        CPUMIPSState *env = &mips_cpu->env;
+#elif defined(TARGET_TRICORE)
+        TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
+        CPUTriCoreState *env = &tricore_cpu->env;
+#elif defined(TARGET_S390X)
+        S390CPU *s390_cpu = S390_CPU(cpu);
+        CPUS390XState *env = &s390_cpu->env;
+#endif
+
+        cpu_synchronize_state(cpu);
+
+        info = g_malloc0(sizeof(*info));
+        info->value = g_malloc0(sizeof(*info->value));
+        info->value->CPU = cpu->cpu_index;
+        info->value->current = (cpu == first_cpu);
+        info->value->halted = cpu->halted;
+        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
+        info->value->thread_id = cpu->thread_id;
+#if defined(TARGET_I386)
+        info->value->arch = CPU_INFO_ARCH_X86;
+        info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
+#elif defined(TARGET_PPC)
+        info->value->arch = CPU_INFO_ARCH_PPC;
+        info->value->u.ppc.nip = env->nip;
+#elif defined(TARGET_SPARC)
+        info->value->arch = CPU_INFO_ARCH_SPARC;
+        info->value->u.q_sparc.pc = env->pc;
+        info->value->u.q_sparc.npc = env->npc;
+#elif defined(TARGET_MIPS)
+        info->value->arch = CPU_INFO_ARCH_MIPS;
+        info->value->u.q_mips.PC = env->active_tc.PC;
+#elif defined(TARGET_TRICORE)
+        info->value->arch = CPU_INFO_ARCH_TRICORE;
+        info->value->u.tricore.PC = env->PC;
+#elif defined(TARGET_S390X)
+        info->value->arch = CPU_INFO_ARCH_S390;
+        info->value->u.s390.cpu_state = env->cpu_state;
+#elif defined(TARGET_RISCV)
+        info->value->arch = CPU_INFO_ARCH_RISCV;
+        info->value->u.riscv.pc = env->pc;
+#else
+        info->value->arch = CPU_INFO_ARCH_OTHER;
+#endif
+        info->value->has_props = !!mc->cpu_index_to_instance_props;
+        if (info->value->has_props) {
+            CpuInstanceProperties *props;
+            props = g_malloc0(sizeof(*props));
+            *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
+            info->value->props = props;
+        }
+
+        /* XXX: waiting for the qapi to support GSList */
+        if (!cur_item) {
+            head = cur_item = info;
+        } else {
+            cur_item->next = info;
+            cur_item = info;
+        }
+    }
+
+    return head;
+}
+
+static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target)
+{
+    /*
+     * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the
+     * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script.
+     */
+    switch (target) {
+    case SYS_EMU_TARGET_I386:
+    case SYS_EMU_TARGET_X86_64:
+        return CPU_INFO_ARCH_X86;
+
+    case SYS_EMU_TARGET_PPC:
+    case SYS_EMU_TARGET_PPC64:
+        return CPU_INFO_ARCH_PPC;
+
+    case SYS_EMU_TARGET_SPARC:
+    case SYS_EMU_TARGET_SPARC64:
+        return CPU_INFO_ARCH_SPARC;
+
+    case SYS_EMU_TARGET_MIPS:
+    case SYS_EMU_TARGET_MIPSEL:
+    case SYS_EMU_TARGET_MIPS64:
+    case SYS_EMU_TARGET_MIPS64EL:
+        return CPU_INFO_ARCH_MIPS;
+
+    case SYS_EMU_TARGET_TRICORE:
+        return CPU_INFO_ARCH_TRICORE;
+
+    case SYS_EMU_TARGET_S390X:
+        return CPU_INFO_ARCH_S390;
+
+    case SYS_EMU_TARGET_RISCV32:
+    case SYS_EMU_TARGET_RISCV64:
+        return CPU_INFO_ARCH_RISCV;
+
+    default:
+        return CPU_INFO_ARCH_OTHER;
+    }
+}
+
+static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
+{
+#ifdef TARGET_S390X
+    S390CPU *s390_cpu = S390_CPU(cpu);
+    CPUS390XState *env = &s390_cpu->env;
+
+    info->cpu_state = env->cpu_state;
+#else
+    abort();
+#endif
+}
+
+/*
+ * fast means: we NEVER interrupt vCPU threads to retrieve
+ * information from KVM.
+ */
+CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    CpuInfoFastList *head = NULL, *cur_item = NULL;
+    SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME,
+                                          -1, &error_abort);
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        CpuInfoFastList *info = g_malloc0(sizeof(*info));
+        info->value = g_malloc0(sizeof(*info->value));
+
+        info->value->cpu_index = cpu->cpu_index;
+        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
+        info->value->thread_id = cpu->thread_id;
+
+        info->value->has_props = !!mc->cpu_index_to_instance_props;
+        if (info->value->has_props) {
+            CpuInstanceProperties *props;
+            props = g_malloc0(sizeof(*props));
+            *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
+            info->value->props = props;
+        }
+
+        info->value->arch = sysemu_target_to_cpuinfo_arch(target);
+        info->value->target = target;
+        if (target == SYS_EMU_TARGET_S390X) {
+            cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu);
+        }
+
+        if (!cur_item) {
+            head = cur_item = info;
+        } else {
+            cur_item->next = info;
+            cur_item = info;
+        }
+    }
+
+    return head;
+}
+
+MachineInfoList *qmp_query_machines(Error **errp)
+{
+    GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false);
+    MachineInfoList *mach_list = NULL;
+
+    for (el = machines; el; el = el->next) {
+        MachineClass *mc = el->data;
+        MachineInfoList *entry;
+        MachineInfo *info;
+
+        info = g_malloc0(sizeof(*info));
+        if (mc->is_default) {
+            info->has_is_default = true;
+            info->is_default = true;
+        }
+
+        if (mc->alias) {
+            info->has_alias = true;
+            info->alias = g_strdup(mc->alias);
+        }
+
+        info->name = g_strdup(mc->name);
+        info->cpu_max = !mc->max_cpus ? 1 : mc->max_cpus;
+        info->hotpluggable_cpus = mc->has_hotpluggable_cpus;
+        info->numa_mem_supported = mc->numa_mem_supported;
+        info->deprecated = !!mc->deprecation_reason;
+
+        entry = g_malloc0(sizeof(*entry));
+        entry->value = info;
+        entry->next = mach_list;
+        mach_list = entry;
+    }
+
+    g_slist_free(machines);
+    return mach_list;
+}
+
+CurrentMachineParams *qmp_query_current_machine(Error **errp)
+{
+    CurrentMachineParams *params = g_malloc0(sizeof(*params));
+    params->wakeup_suspend_support = qemu_wakeup_suspend_enabled();
+
+    return params;
+}
+
+HotpluggableCPUList *qmp_query_hotpluggable_cpus(Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+
+    if (!mc->has_hotpluggable_cpus) {
+        error_setg(errp, QERR_FEATURE_DISABLED, "query-hotpluggable-cpus");
+        return NULL;
+    }
+
+    return machine_query_hotpluggable_cpus(ms);
+}
+
+void qmp_cpu_add(int64_t id, Error **errp)
+{
+    MachineClass *mc;
+
+    mc = MACHINE_GET_CLASS(current_machine);
+    if (mc->hot_add_cpu) {
+        mc->hot_add_cpu(current_machine, id, errp);
+    } else {
+        error_setg(errp, "Not supported");
+    }
+}
+
+void qmp_set_numa_node(NumaOptions *cmd, Error **errp)
+{
+    if (!runstate_check(RUN_STATE_PRECONFIG)) {
+        error_setg(errp, "The command is permitted only in '%s' state",
+                   RunState_str(RUN_STATE_PRECONFIG));
+         return;
+    }
+
+    set_numa_options(MACHINE(qdev_get_machine()), cmd, errp);
+}
+
+static int query_memdev(Object *obj, void *opaque)
+{
+    MemdevList **list = opaque;
+    MemdevList *m = NULL;
+
+    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+        m = g_malloc0(sizeof(*m));
+
+        m->value = g_malloc0(sizeof(*m->value));
+
+        m->value->id = object_get_canonical_path_component(obj);
+        m->value->has_id = !!m->value->id;
+
+        m->value->size = object_property_get_uint(obj, "size",
+                                                  &error_abort);
+        m->value->merge = object_property_get_bool(obj, "merge",
+                                                   &error_abort);
+        m->value->dump = object_property_get_bool(obj, "dump",
+                                                  &error_abort);
+        m->value->prealloc = object_property_get_bool(obj,
+                                                      "prealloc",
+                                                      &error_abort);
+        m->value->policy = object_property_get_enum(obj,
+                                                    "policy",
+                                                    "HostMemPolicy",
+                                                    &error_abort);
+        object_property_get_uint16List(obj, "host-nodes",
+                                       &m->value->host_nodes,
+                                       &error_abort);
+
+        m->next = *list;
+        *list = m;
+    }
+
+    return 0;
+}
+
+MemdevList *qmp_query_memdev(Error **errp)
+{
+    Object *obj = object_get_objects_root();
+    MemdevList *list = NULL;
+
+    object_child_foreach(obj, query_memdev, &list);
+    return list;
+}
diff --git a/hw/core/machine.c b/hw/core/machine.c
index ea5a01aa49..2be19ec0cd 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -11,6 +11,9 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/option.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/replay.h"
 #include "qemu/units.h"
 #include "hw/boards.h"
 #include "qapi/error.h"
@@ -30,6 +33,7 @@ GlobalProperty hw_compat_4_0[] = {
     { "bochs-display",  "edid", "false" },
     { "virtio-vga",     "edid", "false" },
     { "virtio-gpu-pci", "edid", "false" },
+    { "virtio-device", "use-started", "false" },
 };
 const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0);
 
@@ -682,6 +686,11 @@ void machine_set_cpu_numa_node(MachineState *machine,
             return;
         }
 
+        if (props->has_die_id && !slot->props.has_die_id) {
+            error_setg(errp, "die-id is not supported");
+            return;
+        }
+
         /* skip slots with explicit mismatch */
         if (props->has_thread_id && props->thread_id != slot->props.thread_id) {
                 continue;
@@ -691,6 +700,10 @@ void machine_set_cpu_numa_node(MachineState *machine,
                 continue;
         }
 
+        if (props->has_die_id && props->die_id != slot->props.die_id) {
+                continue;
+        }
+
         if (props->has_socket_id && props->socket_id != slot->props.socket_id) {
                 continue;
         }
@@ -716,6 +729,78 @@ void machine_set_cpu_numa_node(MachineState *machine,
     }
 }
 
+static void smp_parse(MachineState *ms, QemuOpts *opts)
+{
+    if (opts) {
+        unsigned cpus    = qemu_opt_get_number(opts, "cpus", 0);
+        unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
+        unsigned cores   = qemu_opt_get_number(opts, "cores", 0);
+        unsigned threads = qemu_opt_get_number(opts, "threads", 0);
+
+        /* compute missing values, prefer sockets over cores over threads */
+        if (cpus == 0 || sockets == 0) {
+            cores = cores > 0 ? cores : 1;
+            threads = threads > 0 ? threads : 1;
+            if (cpus == 0) {
+                sockets = sockets > 0 ? sockets : 1;
+                cpus = cores * threads * sockets;
+            } else {
+                ms->smp.max_cpus =
+                        qemu_opt_get_number(opts, "maxcpus", cpus);
+                sockets = ms->smp.max_cpus / (cores * threads);
+            }
+        } else if (cores == 0) {
+            threads = threads > 0 ? threads : 1;
+            cores = cpus / (sockets * threads);
+            cores = cores > 0 ? cores : 1;
+        } else if (threads == 0) {
+            threads = cpus / (cores * sockets);
+            threads = threads > 0 ? threads : 1;
+        } else if (sockets * cores * threads < cpus) {
+            error_report("cpu topology: "
+                         "sockets (%u) * cores (%u) * threads (%u) < "
+                         "smp_cpus (%u)",
+                         sockets, cores, threads, cpus);
+            exit(1);
+        }
+
+        ms->smp.max_cpus =
+                qemu_opt_get_number(opts, "maxcpus", cpus);
+
+        if (ms->smp.max_cpus < cpus) {
+            error_report("maxcpus must be equal to or greater than smp");
+            exit(1);
+        }
+
+        if (sockets * cores * threads > ms->smp.max_cpus) {
+            error_report("cpu topology: "
+                         "sockets (%u) * cores (%u) * threads (%u) > "
+                         "maxcpus (%u)",
+                         sockets, cores, threads,
+                         ms->smp.max_cpus);
+            exit(1);
+        }
+
+        if (sockets * cores * threads != ms->smp.max_cpus) {
+            warn_report("Invalid CPU topology deprecated: "
+                        "sockets (%u) * cores (%u) * threads (%u) "
+                        "!= maxcpus (%u)",
+                        sockets, cores, threads,
+                        ms->smp.max_cpus);
+        }
+
+        ms->smp.cpus = cpus;
+        ms->smp.cores = cores;
+        ms->smp.threads = threads;
+    }
+
+    if (ms->smp.cpus > 1) {
+        Error *blocker = NULL;
+        error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp");
+        replay_add_blocker(blocker);
+    }
+}
+
 static void machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -723,6 +808,7 @@ static void machine_class_init(ObjectClass *oc, void *data)
     /* Default 128 MB as guest ram size */
     mc->default_ram_size = 128 * MiB;
     mc->rom_file_has_mr = true;
+    mc->smp_parse = smp_parse;
 
     /* numa node memory size aligned on 8MB by default.
      * On Linux, each node's border has to be 8MB aligned
@@ -948,6 +1034,9 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
     if (cpu->props.has_socket_id) {
         g_string_append_printf(s, "socket-id: %"PRId64, cpu->props.socket_id);
     }
+    if (cpu->props.has_die_id) {
+        g_string_append_printf(s, "die-id: %"PRId64, cpu->props.die_id);
+    }
     if (cpu->props.has_core_id) {
         if (s->len) {
             g_string_append_printf(s, ", ");
diff --git a/hw/core/numa.c b/hw/core/numa.c
new file mode 100644
index 0000000000..a11431483c
--- /dev/null
+++ b/hw/core/numa.c
@@ -0,0 +1,613 @@
+/*
+ * NUMA parameter parsing routines
+ *
+ * Copyright (c) 2014 Fujitsu Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/numa.h"
+#include "exec/cpu-common.h"
+#include "exec/ramlist.h"
+#include "qemu/bitmap.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/opts-visitor.h"
+#include "qapi/qapi-visit-machine.h"
+#include "sysemu/qtest.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/mem/memory-device.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
+#include "qemu/cutils.h"
+
+QemuOptsList qemu_numa_opts = {
+    .name = "numa",
+    .implied_opt_name = "type",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head),
+    .desc = { { 0 } } /* validated with OptsVisitor */
+};
+
+static int have_memdevs;
+static int have_mem;
+static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one.
+                             * For all nodes, nodeid < max_numa_nodeid
+                             */
+int nb_numa_nodes;
+bool have_numa_distance;
+NodeInfo numa_info[MAX_NODES];
+
+
+static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
+                            Error **errp)
+{
+    Error *err = NULL;
+    uint16_t nodenr;
+    uint16List *cpus = NULL;
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    unsigned int max_cpus = ms->smp.max_cpus;
+
+    if (node->has_nodeid) {
+        nodenr = node->nodeid;
+    } else {
+        nodenr = nb_numa_nodes;
+    }
+
+    if (nodenr >= MAX_NODES) {
+        error_setg(errp, "Max number of NUMA nodes reached: %"
+                   PRIu16 "", nodenr);
+        return;
+    }
+
+    if (numa_info[nodenr].present) {
+        error_setg(errp, "Duplicate NUMA nodeid: %" PRIu16, nodenr);
+        return;
+    }
+
+    if (!mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id) {
+        error_setg(errp, "NUMA is not supported by this machine-type");
+        return;
+    }
+    for (cpus = node->cpus; cpus; cpus = cpus->next) {
+        CpuInstanceProperties props;
+        if (cpus->value >= max_cpus) {
+            error_setg(errp,
+                       "CPU index (%" PRIu16 ")"
+                       " should be smaller than maxcpus (%d)",
+                       cpus->value, max_cpus);
+            return;
+        }
+        props = mc->cpu_index_to_instance_props(ms, cpus->value);
+        props.node_id = nodenr;
+        props.has_node_id = true;
+        machine_set_cpu_numa_node(ms, &props, &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+    }
+
+    have_memdevs = have_memdevs ? : node->has_memdev;
+    have_mem = have_mem ? : node->has_mem;
+    if ((node->has_mem && have_memdevs) || (node->has_memdev && have_mem)) {
+        error_setg(errp, "numa configuration should use either mem= or memdev=,"
+                   "mixing both is not allowed");
+        return;
+    }
+
+    if (node->has_mem) {
+        numa_info[nodenr].node_mem = node->mem;
+        if (!qtest_enabled()) {
+            warn_report("Parameter -numa node,mem is deprecated,"
+                        " use -numa node,memdev instead");
+        }
+    }
+    if (node->has_memdev) {
+        Object *o;
+        o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL);
+        if (!o) {
+            error_setg(errp, "memdev=%s is ambiguous", node->memdev);
+            return;
+        }
+
+        object_ref(o);
+        numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL);
+        numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
+    }
+    numa_info[nodenr].present = true;
+    max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
+    nb_numa_nodes++;
+}
+
+static void parse_numa_distance(NumaDistOptions *dist, Error **errp)
+{
+    uint16_t src = dist->src;
+    uint16_t dst = dist->dst;
+    uint8_t val = dist->val;
+
+    if (src >= MAX_NODES || dst >= MAX_NODES) {
+        error_setg(errp, "Parameter '%s' expects an integer between 0 and %d",
+                   src >= MAX_NODES ? "src" : "dst", MAX_NODES - 1);
+        return;
+    }
+
+    if (!numa_info[src].present || !numa_info[dst].present) {
+        error_setg(errp, "Source/Destination NUMA node is missing. "
+                   "Please use '-numa node' option to declare it first.");
+        return;
+    }
+
+    if (val < NUMA_DISTANCE_MIN) {
+        error_setg(errp, "NUMA distance (%" PRIu8 ") is invalid, "
+                   "it shouldn't be less than %d.",
+                   val, NUMA_DISTANCE_MIN);
+        return;
+    }
+
+    if (src == dst && val != NUMA_DISTANCE_MIN) {
+        error_setg(errp, "Local distance of node %d should be %d.",
+                   src, NUMA_DISTANCE_MIN);
+        return;
+    }
+
+    numa_info[src].distance[dst] = val;
+    have_numa_distance = true;
+}
+
+void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
+{
+    Error *err = NULL;
+
+    switch (object->type) {
+    case NUMA_OPTIONS_TYPE_NODE:
+        parse_numa_node(ms, &object->u.node, &err);
+        if (err) {
+            goto end;
+        }
+        break;
+    case NUMA_OPTIONS_TYPE_DIST:
+        parse_numa_distance(&object->u.dist, &err);
+        if (err) {
+            goto end;
+        }
+        break;
+    case NUMA_OPTIONS_TYPE_CPU:
+        if (!object->u.cpu.has_node_id) {
+            error_setg(&err, "Missing mandatory node-id property");
+            goto end;
+        }
+        if (!numa_info[object->u.cpu.node_id].present) {
+            error_setg(&err, "Invalid node-id=%" PRId64 ", NUMA node must be "
+                "defined with -numa node,nodeid=ID before it's used with "
+                "-numa cpu,node-id=ID", object->u.cpu.node_id);
+            goto end;
+        }
+
+        machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
+                                  &err);
+        break;
+    default:
+        abort();
+    }
+
+end:
+    error_propagate(errp, err);
+}
+
+static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
+{
+    NumaOptions *object = NULL;
+    MachineState *ms = MACHINE(opaque);
+    Error *err = NULL;
+    Visitor *v = opts_visitor_new(opts);
+
+    visit_type_NumaOptions(v, NULL, &object, &err);
+    visit_free(v);
+    if (err) {
+        goto end;
+    }
+
+    /* Fix up legacy suffix-less format */
+    if ((object->type == NUMA_OPTIONS_TYPE_NODE) && object->u.node.has_mem) {
+        const char *mem_str = qemu_opt_get(opts, "mem");
+        qemu_strtosz_MiB(mem_str, NULL, &object->u.node.mem);
+    }
+
+    set_numa_options(ms, object, &err);
+
+end:
+    qapi_free_NumaOptions(object);
+    if (err) {
+        error_propagate(errp, err);
+        return -1;
+    }
+
+    return 0;
+}
+
+/* If all node pair distances are symmetric, then only distances
+ * in one direction are enough. If there is even one asymmetric
+ * pair, though, then all distances must be provided. The
+ * distance from a node to itself is always NUMA_DISTANCE_MIN,
+ * so providing it is never necessary.
+ */
+static void validate_numa_distance(void)
+{
+    int src, dst;
+    bool is_asymmetrical = false;
+
+    for (src = 0; src < nb_numa_nodes; src++) {
+        for (dst = src; dst < nb_numa_nodes; dst++) {
+            if (numa_info[src].distance[dst] == 0 &&
+                numa_info[dst].distance[src] == 0) {
+                if (src != dst) {
+                    error_report("The distance between node %d and %d is "
+                                 "missing, at least one distance value "
+                                 "between each nodes should be provided.",
+                                 src, dst);
+                    exit(EXIT_FAILURE);
+                }
+            }
+
+            if (numa_info[src].distance[dst] != 0 &&
+                numa_info[dst].distance[src] != 0 &&
+                numa_info[src].distance[dst] !=
+                numa_info[dst].distance[src]) {
+                is_asymmetrical = true;
+            }
+        }
+    }
+
+    if (is_asymmetrical) {
+        for (src = 0; src < nb_numa_nodes; src++) {
+            for (dst = 0; dst < nb_numa_nodes; dst++) {
+                if (src != dst && numa_info[src].distance[dst] == 0) {
+                    error_report("At least one asymmetrical pair of "
+                            "distances is given, please provide distances "
+                            "for both directions of all node pairs.");
+                    exit(EXIT_FAILURE);
+                }
+            }
+        }
+    }
+}
+
+static void complete_init_numa_distance(void)
+{
+    int src, dst;
+
+    /* Fixup NUMA distance by symmetric policy because if it is an
+     * asymmetric distance table, it should be a complete table and
+     * there would not be any missing distance except local node, which
+     * is verified by validate_numa_distance above.
+     */
+    for (src = 0; src < nb_numa_nodes; src++) {
+        for (dst = 0; dst < nb_numa_nodes; dst++) {
+            if (numa_info[src].distance[dst] == 0) {
+                if (src == dst) {
+                    numa_info[src].distance[dst] = NUMA_DISTANCE_MIN;
+                } else {
+                    numa_info[src].distance[dst] = numa_info[dst].distance[src];
+                }
+            }
+        }
+    }
+}
+
+void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+                                 int nb_nodes, ram_addr_t size)
+{
+    int i;
+    uint64_t usedmem = 0;
+
+    /* Align each node according to the alignment
+     * requirements of the machine class
+     */
+
+    for (i = 0; i < nb_nodes - 1; i++) {
+        nodes[i].node_mem = (size / nb_nodes) &
+                            ~((1 << mc->numa_mem_align_shift) - 1);
+        usedmem += nodes[i].node_mem;
+    }
+    nodes[i].node_mem = size - usedmem;
+}
+
+void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+                                  int nb_nodes, ram_addr_t size)
+{
+    int i;
+    uint64_t usedmem = 0, node_mem;
+    uint64_t granularity = size / nb_nodes;
+    uint64_t propagate = 0;
+
+    for (i = 0; i < nb_nodes - 1; i++) {
+        node_mem = (granularity + propagate) &
+                   ~((1 << mc->numa_mem_align_shift) - 1);
+        propagate = granularity + propagate - node_mem;
+        nodes[i].node_mem = node_mem;
+        usedmem += node_mem;
+    }
+    nodes[i].node_mem = size - usedmem;
+}
+
+void numa_complete_configuration(MachineState *ms)
+{
+    int i;
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+
+    /*
+     * If memory hotplug is enabled (slots > 0) but without '-numa'
+     * options explicitly on CLI, guestes will break.
+     *
+     *   Windows: won't enable memory hotplug without SRAT table at all
+     *
+     *   Linux: if QEMU is started with initial memory all below 4Gb
+     *   and no SRAT table present, guest kernel will use nommu DMA ops,
+     *   which breaks 32bit hw drivers when memory is hotplugged and
+     *   guest tries to use it with that drivers.
+     *
+     * Enable NUMA implicitly by adding a new NUMA node automatically.
+     */
+    if (ms->ram_slots > 0 && nb_numa_nodes == 0 &&
+        mc->auto_enable_numa_with_memhp) {
+            NumaNodeOptions node = { };
+            parse_numa_node(ms, &node, &error_abort);
+    }
+
+    assert(max_numa_nodeid <= MAX_NODES);
+
+    /* No support for sparse NUMA node IDs yet: */
+    for (i = max_numa_nodeid - 1; i >= 0; i--) {
+        /* Report large node IDs first, to make mistakes easier to spot */
+        if (!numa_info[i].present) {
+            error_report("numa: Node ID missing: %d", i);
+            exit(1);
+        }
+    }
+
+    /* This must be always true if all nodes are present: */
+    assert(nb_numa_nodes == max_numa_nodeid);
+
+    if (nb_numa_nodes > 0) {
+        uint64_t numa_total;
+
+        if (nb_numa_nodes > MAX_NODES) {
+            nb_numa_nodes = MAX_NODES;
+        }
+
+        /* If no memory size is given for any node, assume the default case
+         * and distribute the available memory equally across all nodes
+         */
+        for (i = 0; i < nb_numa_nodes; i++) {
+            if (numa_info[i].node_mem != 0) {
+                break;
+            }
+        }
+        if (i == nb_numa_nodes) {
+            assert(mc->numa_auto_assign_ram);
+            mc->numa_auto_assign_ram(mc, numa_info, nb_numa_nodes, ram_size);
+            if (!qtest_enabled()) {
+                warn_report("Default splitting of RAM between nodes is deprecated,"
+                            " Use '-numa node,memdev' to explictly define RAM"
+                            " allocation per node");
+            }
+        }
+
+        numa_total = 0;
+        for (i = 0; i < nb_numa_nodes; i++) {
+            numa_total += numa_info[i].node_mem;
+        }
+        if (numa_total != ram_size) {
+            error_report("total memory for NUMA nodes (0x%" PRIx64 ")"
+                         " should equal RAM size (0x" RAM_ADDR_FMT ")",
+                         numa_total, ram_size);
+            exit(1);
+        }
+
+        /* QEMU needs at least all unique node pair distances to build
+         * the whole NUMA distance table. QEMU treats the distance table
+         * as symmetric by default, i.e. distance A->B == distance B->A.
+         * Thus, QEMU is able to complete the distance table
+         * initialization even though only distance A->B is provided and
+         * distance B->A is not. QEMU knows the distance of a node to
+         * itself is always 10, so A->A distances may be omitted. When
+         * the distances of two nodes of a pair differ, i.e. distance
+         * A->B != distance B->A, then that means the distance table is
+         * asymmetric. In this case, the distances for both directions
+         * of all node pairs are required.
+         */
+        if (have_numa_distance) {
+            /* Validate enough NUMA distance information was provided. */
+            validate_numa_distance();
+
+            /* Validation succeeded, now fill in any missing distances. */
+            complete_init_numa_distance();
+        }
+    }
+}
+
+void parse_numa_opts(MachineState *ms)
+{
+    qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, &error_fatal);
+}
+
+void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp)
+{
+    int node_id = object_property_get_int(OBJECT(dev), "node-id", &error_abort);
+
+    if (node_id == CPU_UNSET_NUMA_NODE_ID) {
+        /* due to bug in libvirt, it doesn't pass node-id from props on
+         * device_add as expected, so we have to fix it up here */
+        if (slot->props.has_node_id) {
+            object_property_set_int(OBJECT(dev), slot->props.node_id,
+                                    "node-id", errp);
+        }
+    } else if (node_id != slot->props.node_id) {
+        error_setg(errp, "invalid node-id, must be %"PRId64,
+                   slot->props.node_id);
+    }
+}
+
+static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
+                                           const char *name,
+                                           uint64_t ram_size)
+{
+    if (mem_path) {
+#ifdef __linux__
+        Error *err = NULL;
+        memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, 0,
+                                         mem_path, &err);
+        if (err) {
+            error_report_err(err);
+            if (mem_prealloc) {
+                exit(1);
+            }
+            warn_report("falling back to regular RAM allocation");
+            error_printf("This is deprecated. Make sure that -mem-path "
+                         " specified path has sufficient resources to allocate"
+                         " -m specified RAM amount");
+            /* Legacy behavior: if allocation failed, fall back to
+             * regular RAM allocation.
+             */
+            mem_path = NULL;
+            memory_region_init_ram_nomigrate(mr, owner, name, ram_size, &error_fatal);
+        }
+#else
+        fprintf(stderr, "-mem-path not supported on this host\n");
+        exit(1);
+#endif
+    } else {
+        memory_region_init_ram_nomigrate(mr, owner, name, ram_size, &error_fatal);
+    }
+    vmstate_register_ram_global(mr);
+}
+
+void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
+                                          const char *name,
+                                          uint64_t ram_size)
+{
+    uint64_t addr = 0;
+    int i;
+
+    if (nb_numa_nodes == 0 || !have_memdevs) {
+        allocate_system_memory_nonnuma(mr, owner, name, ram_size);
+        return;
+    }
+
+    memory_region_init(mr, owner, name, ram_size);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        uint64_t size = numa_info[i].node_mem;
+        HostMemoryBackend *backend = numa_info[i].node_memdev;
+        if (!backend) {
+            continue;
+        }
+        MemoryRegion *seg = host_memory_backend_get_memory(backend);
+
+        if (memory_region_is_mapped(seg)) {
+            char *path = object_get_canonical_path_component(OBJECT(backend));
+            error_report("memory backend %s is used multiple times. Each "
+                         "-numa option must use a different memdev value.",
+                         path);
+            g_free(path);
+            exit(1);
+        }
+
+        host_memory_backend_set_mapped(backend, true);
+        memory_region_add_subregion(mr, addr, seg);
+        vmstate_register_ram_global(seg);
+        addr += size;
+    }
+}
+
+static void numa_stat_memory_devices(NumaNodeMem node_mem[])
+{
+    MemoryDeviceInfoList *info_list = qmp_memory_device_list();
+    MemoryDeviceInfoList *info;
+    PCDIMMDeviceInfo     *pcdimm_info;
+    VirtioPMEMDeviceInfo *vpi;
+
+    for (info = info_list; info; info = info->next) {
+        MemoryDeviceInfo *value = info->value;
+
+        if (value) {
+            switch (value->type) {
+            case MEMORY_DEVICE_INFO_KIND_DIMM:
+            case MEMORY_DEVICE_INFO_KIND_NVDIMM:
+                pcdimm_info = value->type == MEMORY_DEVICE_INFO_KIND_DIMM ?
+                              value->u.dimm.data : value->u.nvdimm.data;
+                node_mem[pcdimm_info->node].node_mem += pcdimm_info->size;
+                node_mem[pcdimm_info->node].node_plugged_mem +=
+                    pcdimm_info->size;
+                break;
+            case MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM:
+                vpi = value->u.virtio_pmem.data;
+                /* TODO: once we support numa, assign to right node */
+                node_mem[0].node_mem += vpi->size;
+                node_mem[0].node_plugged_mem += vpi->size;
+                break;
+            default:
+                g_assert_not_reached();
+            }
+        }
+    }
+    qapi_free_MemoryDeviceInfoList(info_list);
+}
+
+void query_numa_node_mem(NumaNodeMem node_mem[])
+{
+    int i;
+
+    if (nb_numa_nodes <= 0) {
+        return;
+    }
+
+    numa_stat_memory_devices(node_mem);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        node_mem[i].node_mem += numa_info[i].node_mem;
+    }
+}
+
+void ram_block_notifier_add(RAMBlockNotifier *n)
+{
+    QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next);
+}
+
+void ram_block_notifier_remove(RAMBlockNotifier *n)
+{
+    QLIST_REMOVE(n, next);
+}
+
+void ram_block_notify_add(void *host, size_t size)
+{
+    RAMBlockNotifier *notifier;
+
+    QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
+        notifier->ram_block_added(notifier, host, size);
+    }
+}
+
+void ram_block_notify_remove(void *host, size_t size)
+{
+    RAMBlockNotifier *notifier;
+
+    QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
+        notifier->ram_block_removed(notifier, host, size);
+    }
+}
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index f9b6efe509..94ebc0a4a1 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -29,7 +29,7 @@
 #include "hw/qdev.h"
 #include "sysemu/sysemu.h"
 #include "qapi/error.h"
-#include "qapi/qapi-events-misc.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
diff --git a/hw/core/vm-change-state-handler.c b/hw/core/vm-change-state-handler.c
new file mode 100644
index 0000000000..f814813bdd
--- /dev/null
+++ b/hw/core/vm-change-state-handler.c
@@ -0,0 +1,61 @@
+/*
+ *  qdev vm change state handlers
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License,
+ *  or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev.h"
+
+static int qdev_get_dev_tree_depth(DeviceState *dev)
+{
+    int depth;
+
+    for (depth = 0; dev; depth++) {
+        BusState *bus = dev->parent_bus;
+
+        if (!bus) {
+            break;
+        }
+
+        dev = bus->parent;
+    }
+
+    return depth;
+}
+
+/**
+ * qdev_add_vm_change_state_handler:
+ * @dev: the device that owns this handler
+ * @cb: the callback function to be invoked
+ * @opaque: user data passed to the callback function
+ *
+ * This function works like qemu_add_vm_change_state_handler() except callbacks
+ * are invoked in qdev tree depth order.  Ordering is desirable when callbacks
+ * of children depend on their parent's callback having completed first.
+ *
+ * For example, when qdev_add_vm_change_state_handler() is used, a host
+ * controller's callback is invoked before the children on its bus when the VM
+ * starts running.  The order is reversed when the VM stops running.
+ *
+ * Returns: an entry to be freed with qemu_del_vm_change_state_handler()
+ */
+VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
+                                                     VMChangeStateHandler *cb,
+                                                     void *opaque)
+{
+    int depth = qdev_get_dev_tree_depth(dev);
+
+    return qemu_add_vm_change_state_handler_prio(cb, opaque, depth);
+}