summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rwxr-xr-xconfigure6
-rw-r--r--cpu-exec.c6
-rw-r--r--hw/arm/virt-acpi-build.c4
-rw-r--r--hw/arm/virt.c16
-rw-r--r--hw/core/machine.c37
-rw-r--r--hw/i386/acpi-build.c3
-rw-r--r--hw/i386/pc.c21
-rw-r--r--hw/ppc/spapr.c41
-rw-r--r--hw/ppc/spapr_cpu_core.c4
-rw-r--r--include/exec/exec-all.h2
-rw-r--r--include/exec/tb-hash.h12
-rw-r--r--include/hw/i386/pc.h42
-rw-r--r--include/qemu/atomic.h34
-rw-r--r--include/qom/cpu.h2
-rw-r--r--include/sysemu/numa.h1
-rw-r--r--monitor.c11
-rw-r--r--numa.c43
-rwxr-xr-xscripts/device-crash-test624
-rw-r--r--scripts/qemu.py17
-rw-r--r--target/alpha/translate.c30
-rw-r--r--target/arm/cpu.c2
-rw-r--r--target/arm/cpu.h2
-rw-r--r--target/arm/translate-a64.c5
-rw-r--r--target/arm/translate.c21
-rw-r--r--target/arm/translate.h4
-rw-r--r--target/hppa/translate.c8
-rw-r--r--target/i386/cpu.c2
-rw-r--r--target/i386/cpu.h1
-rw-r--r--target/i386/translate.c43
-rw-r--r--target/mips/translate.c4
-rw-r--r--target/nios2/translate.c2
-rw-r--r--target/ppc/cpu.h1
-rw-r--r--target/s390x/translate.c17
-rw-r--r--tcg-runtime.c32
-rw-r--r--tcg/README8
-rw-r--r--tcg/aarch64/tcg-target.h1
-rw-r--r--tcg/aarch64/tcg-target.inc.c22
-rw-r--r--tcg/arm/tcg-target.h1
-rw-r--r--tcg/arm/tcg-target.inc.c54
-rw-r--r--tcg/i386/tcg-target.h1
-rw-r--r--tcg/i386/tcg-target.inc.c24
-rw-r--r--tcg/ia64/tcg-target.h1
-rw-r--r--tcg/mips/tcg-target.h1
-rw-r--r--tcg/mips/tcg-target.inc.c13
-rw-r--r--tcg/ppc/tcg-target.h1
-rw-r--r--tcg/ppc/tcg-target.inc.c7
-rw-r--r--tcg/s390/tcg-target.h1
-rw-r--r--tcg/s390/tcg-target.inc.c24
-rw-r--r--tcg/sparc/tcg-target.h1
-rw-r--r--tcg/sparc/tcg-target.inc.c11
-rw-r--r--tcg/tcg-op.c12
-rw-r--r--tcg/tcg-op.h11
-rw-r--r--tcg/tcg-opc.h1
-rw-r--r--tcg/tcg-runtime.h2
-rw-r--r--tcg/tcg.c5
-rw-r--r--tcg/tcg.h1
-rw-r--r--tcg/tci/tcg-target.h1
-rw-r--r--tests/test-x86-cpuid-compat.c38
58 files changed, 1127 insertions, 215 deletions
diff --git a/configure b/configure
index fbb6a93c99..13e040d28c 100755
--- a/configure
+++ b/configure
@@ -1213,12 +1213,12 @@ case "$cpu" in
            LDFLAGS="-m64 $LDFLAGS"
            ;;
     sparc)
-           LDFLAGS="-m32 $LDFLAGS"
-           CPU_CFLAGS="-m32 -mcpu=ultrasparc"
+           CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc"
+           LDFLAGS="-m32 -mv8plus $LDFLAGS"
            ;;
     sparc64)
-           LDFLAGS="-m64 $LDFLAGS"
            CPU_CFLAGS="-m64 -mcpu=ultrasparc"
+           LDFLAGS="-m64 $LDFLAGS"
            ;;
     s390)
            CPU_CFLAGS="-m31"
diff --git a/cpu-exec.c b/cpu-exec.c
index 63a56d0407..5b181c18ed 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -309,10 +309,8 @@ static bool tb_cmp(const void *p, const void *d)
     return false;
 }
 
-static TranslationBlock *tb_htable_lookup(CPUState *cpu,
-                                          target_ulong pc,
-                                          target_ulong cs_base,
-                                          uint32_t flags)
+TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
+                                   target_ulong cs_base, uint32_t flags)
 {
     tb_page_addr_t phys_pc;
     struct tb_desc desc;
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 2079828c22..3d78ff68e6 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -496,12 +496,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
     srat->reserved1 = cpu_to_le32(1);
 
     for (i = 0; i < cpu_list->len; ++i) {
-        int node_id = cpu_list->cpus[i].props.has_node_id ?
-            cpu_list->cpus[i].props.node_id : 0;
         core = acpi_data_push(table_data, sizeof(*core));
         core->type = ACPI_SRAT_PROCESSOR_GICC;
         core->length = sizeof(*core);
-        core->proximity = cpu_to_le32(node_id);
+        core->proximity = cpu_to_le32(cpu_list->cpus[i].props.node_id);
         core->acpi_processor_uid = cpu_to_le32(i);
         core->flags = cpu_to_le32(1);
     }
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 4db2d4207c..010f7244bf 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1372,7 +1372,6 @@ static void machvirt_init(MachineState *machine)
     for (n = 0; n < possible_cpus->len; n++) {
         Object *cpuobj;
         CPUState *cs;
-        int node_id;
 
         if (n >= smp_cpus) {
             break;
@@ -1385,19 +1384,8 @@ static void machvirt_init(MachineState *machine)
         cs = CPU(cpuobj);
         cs->cpu_index = n;
 
-        node_id = possible_cpus->cpus[cs->cpu_index].props.node_id;
-        if (!possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
-            /* by default CPUState::numa_node was 0 if it's not set via CLI
-             * keep it this way for now but in future we probably should
-             * refuse to start up with incomplete numa mapping */
-             node_id = 0;
-        }
-        if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) {
-            cs->numa_node = node_id;
-        } else {
-            /* CPU isn't device_add compatible yet, this shouldn't happen */
-            error_setg(&error_abort, "user set node-id not implemented");
-        }
+        numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj),
+                          &error_fatal);
 
         if (!vms->secure) {
             object_property_set_bool(cpuobj, false, "has_el3", NULL);
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 3adebf14c4..2e7e9778cd 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -701,26 +701,43 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
     return g_string_free(s, false);
 }
 
-static void machine_numa_validate(MachineState *machine)
+static void machine_numa_finish_init(MachineState *machine)
 {
     int i;
+    bool default_mapping;
     GString *s = g_string_new(NULL);
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(machine);
 
     assert(nb_numa_nodes);
     for (i = 0; i < possible_cpus->len; i++) {
+        if (possible_cpus->cpus[i].props.has_node_id) {
+            break;
+        }
+    }
+    default_mapping = (i == possible_cpus->len);
+
+    for (i = 0; i < possible_cpus->len; i++) {
         const CPUArchId *cpu_slot = &possible_cpus->cpus[i];
 
-        /* at this point numa mappings are initilized by CLI options
-         * or with default mappings so it's sufficient to list
-         * all not yet mapped CPUs here */
-        /* TODO: make it hard error in future */
         if (!cpu_slot->props.has_node_id) {
-            char *cpu_str = cpu_slot_to_string(cpu_slot);
-            g_string_append_printf(s, "%sCPU %d [%s]", s->len ? ", " : "", i,
-                                   cpu_str);
-            g_free(cpu_str);
+            /* fetch default mapping from board and enable it */
+            CpuInstanceProperties props = cpu_slot->props;
+
+            if (!default_mapping) {
+                /* record slots with not set mapping,
+                 * TODO: make it hard error in future */
+                char *cpu_str = cpu_slot_to_string(cpu_slot);
+                g_string_append_printf(s, "%sCPU %d [%s]",
+                                       s->len ? ", " : "", i, cpu_str);
+                g_free(cpu_str);
+
+                /* non mapped cpus used to fallback to node 0 */
+                props.node_id = 0;
+            }
+
+            props.has_node_id = true;
+            machine_set_cpu_numa_node(machine, &props, &error_fatal);
         }
     }
     if (s->len && !qtest_enabled()) {
@@ -738,7 +755,7 @@ void machine_run_board_init(MachineState *machine)
     MachineClass *machine_class = MACHINE_GET_CLASS(machine);
 
     if (nb_numa_nodes) {
-        machine_numa_validate(machine);
+        machine_numa_finish_init(machine);
     }
     machine_class->init(machine);
 }
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 82bd44f38e..ce74c84460 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2335,8 +2335,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
     srat->reserved1 = cpu_to_le32(1);
 
     for (i = 0; i < apic_ids->len; i++) {
-        int node_id = apic_ids->cpus[i].props.has_node_id ?
-            apic_ids->cpus[i].props.node_id : 0;
+        int node_id = apic_ids->cpus[i].props.node_id;
         uint32_t apic_id = apic_ids->cpus[i].arch_id;
 
         if (apic_id < 255) {
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 107a34125b..5b8c6fbbea 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -788,9 +788,7 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
     for (i = 0; i < cpus->len; i++) {
         unsigned int apic_id = cpus->cpus[i].arch_id;
         assert(apic_id < pcms->apic_id_limit);
-        if (cpus->cpus[i].props.has_node_id) {
-            numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id);
-        }
+        numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id);
     }
     for (i = 0; i < nb_numa_nodes; i++) {
         numa_fw_cfg[pcms->apic_id_limit + 1 + i] =
@@ -1893,7 +1891,6 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
                             DeviceState *dev, Error **errp)
 {
     int idx;
-    int node_id;
     CPUState *cs;
     CPUArchId *cpu_slot;
     X86CPUTopoInfo topo;
@@ -1984,21 +1981,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
     cs = CPU(cpu);
     cs->cpu_index = idx;
 
-    node_id = cpu_slot->props.node_id;
-    if (!cpu_slot->props.has_node_id) {
-        /* by default CPUState::numa_node was 0 if it's not set via CLI
-         * keep it this way for now but in future we probably should
-         * refuse to start up with incomplete numa mapping */
-        node_id = 0;
-    }
-    if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) {
-        cs->numa_node = node_id;
-    } else if (cs->numa_node != node_id) {
-            error_setg(errp, "node-id %d must match numa node specified"
-                "with -numa option for cpu-index %d",
-                cs->numa_node, cs->cpu_index);
-            return;
-    }
+    numa_cpu_pre_plug(cpu_slot, dev, errp);
 }
 
 static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 86e622834f..91b4057933 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -183,25 +183,19 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
     return ret;
 }
 
-static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs)
+static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu)
 {
-    int ret = 0;
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
     int index = ppc_get_vcpu_dt_id(cpu);
     uint32_t associativity[] = {cpu_to_be32(0x5),
                                 cpu_to_be32(0x0),
                                 cpu_to_be32(0x0),
                                 cpu_to_be32(0x0),
-                                cpu_to_be32(cs->numa_node),
+                                cpu_to_be32(cpu->node_id),
                                 cpu_to_be32(index)};
 
     /* Advertise NUMA via ibm,associativity */
-    if (nb_numa_nodes > 1) {
-        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
+    return fdt_setprop(fdt, offset, "ibm,associativity", associativity,
                           sizeof(associativity));
-    }
-
-    return ret;
 }
 
 /* Populate the "ibm,pa-features" property */
@@ -326,9 +320,11 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
             return ret;
         }
 
-        ret = spapr_fixup_cpu_numa_dt(fdt, offset, cs);
-        if (ret < 0) {
-            return ret;
+        if (nb_numa_nodes > 1) {
+            ret = spapr_fixup_cpu_numa_dt(fdt, offset, cpu);
+            if (ret < 0) {
+                return ret;
+            }
         }
 
         ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt);
@@ -541,7 +537,9 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
     _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
                       pft_size_prop, sizeof(pft_size_prop))));
 
-    _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs));
+    if (nb_numa_nodes > 1) {
+        _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cpu));
+    }
 
     _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
 
@@ -2920,11 +2918,9 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
     Error *local_err = NULL;
     CPUCore *cc = CPU_CORE(dev);
-    sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev);
     char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model);
     const char *type = object_get_typename(OBJECT(dev));
     CPUArchId *core_slot;
-    int node_id;
     int index;
 
     if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
@@ -2965,20 +2961,7 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
-    node_id = core_slot->props.node_id;
-    if (!core_slot->props.has_node_id) {
-        /* by default CPUState::numa_node was 0 if it's not set via CLI
-         * keep it this way for now but in future we probably should
-         * refuse to start up with incomplete numa mapping */
-        node_id = 0;
-    }
-    if (sc->node_id == CPU_UNSET_NUMA_NODE_ID) {
-        sc->node_id = node_id;
-    } else if (sc->node_id != node_id) {
-        error_setg(&local_err, "node-id %d must match numa node specified"
-            "with -numa option for cpu-index %d", sc->node_id, cc->core_id);
-        goto out;
-    }
+    numa_cpu_pre_plug(core_slot, dev, &local_err);
 
 out:
     g_free(base_core_type);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index ff7058ecc0..029a14120e 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -184,15 +184,17 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < cc->nr_threads; i++) {
         char id[32];
         CPUState *cs;
+        PowerPCCPU *cpu;
 
         obj = sc->threads + i * size;
 
         object_initialize(obj, size, typename);
         cs = CPU(obj);
+        cpu = POWERPC_CPU(cs);
         cs->cpu_index = cc->core_id + i;
 
         /* Set NUMA node for the threads belonged to core  */
-        cs->numa_node = sc->node_id;
+        cpu->node_id = sc->node_id;
 
         snprintf(id, sizeof(id), "thread[%d]", i);
         object_property_add_child(OBJECT(sc), id, obj, &local_err);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index bcde1e6a14..87ae10bcc9 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -368,6 +368,8 @@ struct TranslationBlock {
 void tb_free(TranslationBlock *tb);
 void tb_flush(CPUState *cpu);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
+TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
+                                   target_ulong cs_base, uint32_t flags);
 
 #if defined(USE_DIRECT_JUMP)
 
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index 2c27490cb8..b1fe2d0161 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -22,6 +22,8 @@
 
 #include "exec/tb-hash-xx.h"
 
+#ifdef CONFIG_SOFTMMU
+
 /* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
    addresses on the same page.  The top bits are the same.  This allows
    TLB invalidation to quickly clear a subset of the hash table.  */
@@ -45,6 +47,16 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
            | (tmp & TB_JMP_ADDR_MASK));
 }
 
+#else
+
+/* In user-mode we can get better hashing because we do not have a TLB */
+static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
+{
+    return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1);
+}
+
+#endif /* CONFIG_SOFTMMU */
+
 static inline
 uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags)
 {
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index e447f5d8f4..d071c9c0e9 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -566,75 +566,75 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
         .value    = "off",\
     },{\
         .driver   = "qemu64" "-" TYPE_X86_CPU,\
-        .property = "level",\
+        .property = "min-level",\
         .value    = stringify(4),\
     },{\
         .driver   = "kvm64" "-" TYPE_X86_CPU,\
-        .property = "level",\
+        .property = "min-level",\
         .value    = stringify(5),\
     },{\
         .driver   = "pentium3" "-" TYPE_X86_CPU,\
-        .property = "level",\
+        .property = "min-level",\
         .value    = stringify(2),\
     },{\
         .driver   = "n270" "-" TYPE_X86_CPU,\
-        .property = "level",\
+        .property = "min-level",\
         .value    = stringify(5),\
     },{\
         .driver   = "Conroe" "-" TYPE_X86_CPU,\
-        .property = "level",\
+        .property = "min-level",\
         .value    = stringify(4),\
     },{\
         .driver   = "Penryn" "-" TYPE_X86_CPU,\
-        .property = "level",\
+        .property = "min-level",\
         .value    = stringify(4),\
     },{\
         .driver   = "Nehalem" "-" TYPE_X86_CPU,\
-        .property = "level",\
+        .property = "min-level",\
         .value    = stringify(4),\
     },{\
         .driver   = "n270" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver   = "Penryn" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver   = "Conroe" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver   = "Nehalem" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver   = "Westmere" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver   = "SandyBridge" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver   = "IvyBridge" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver   = "Haswell" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver   = "Haswell-noTSX" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver   = "Broadwell" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver   = "Broadwell-noTSX" "-" TYPE_X86_CPU,\
-        .property = "xlevel",\
+        .property = "min-xlevel",\
         .value    = stringify(0x8000000a),\
     },{\
         .driver = TYPE_X86_CPU,\
@@ -860,7 +860,7 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
         .value    = stringify(2),\
     },{\
         .driver   = "Conroe-" TYPE_X86_CPU,\
-        .property = "level",\
+        .property = "min-level",\
         .value    = stringify(2),\
     },{\
         .driver   = "Penryn-" TYPE_X86_CPU,\
@@ -868,7 +868,7 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
         .value    = stringify(2),\
     },{\
         .driver   = "Penryn-" TYPE_X86_CPU,\
-        .property = "level",\
+        .property = "min-level",\
         .value    = stringify(2),\
     },{\
         .driver   = "Nehalem-" TYPE_X86_CPU,\
@@ -876,7 +876,7 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
         .value    = stringify(2),\
     },{\
         .driver   = "Nehalem-" TYPE_X86_CPU,\
-        .property = "level",\
+        .property = "min-level",\
         .value    = stringify(2),\
     },{\
         .driver   = "virtio-net-pci",\
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 878fa0700d..e07c7972ab 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -88,6 +88,24 @@
 #define smp_read_barrier_depends()   barrier()
 #endif
 
+/* Sanity check that the size of an atomic operation isn't "overly large".
+ * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not
+ * want to use them because we ought not need them, and this lets us do a
+ * bit of sanity checking that other 32-bit hosts might build.
+ *
+ * That said, we have a problem on 64-bit ILP32 hosts in that in order to
+ * sync with TCG_OVERSIZED_GUEST, this must match TCG_TARGET_REG_BITS.
+ * We'd prefer not want to pull in everything else TCG related, so handle
+ * those few cases by hand.
+ *
+ * Note that x32 is fully detected with __x64_64__ + _ILP32, and that for
+ * Sparc we always force the use of sparcv9 in configure.
+ */
+#if defined(__x86_64__) || defined(__sparc__)
+# define ATOMIC_REG_SIZE  8
+#else
+# define ATOMIC_REG_SIZE  sizeof(void *)
+#endif
 
 /* Weak atomic operations prevent the compiler moving other
  * loads/stores past the atomic operation load/store. However there is
@@ -104,7 +122,7 @@
 
 #define atomic_read(ptr)                              \
     ({                                                \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
     atomic_read__nocheck(ptr);                        \
     })
 
@@ -112,7 +130,7 @@
     __atomic_store_n(ptr, i, __ATOMIC_RELAXED)
 
 #define atomic_set(ptr, i)  do {                      \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
     atomic_set__nocheck(ptr, i);                      \
 } while(0)
 
@@ -130,27 +148,27 @@
 
 #define atomic_rcu_read(ptr)                          \
     ({                                                \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
     typeof_strip_qual(*ptr) _val;                     \
     atomic_rcu_read__nocheck(ptr, &_val);             \
     _val;                                             \
     })
 
 #define atomic_rcu_set(ptr, i) do {                   \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
+    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
     __atomic_store_n(ptr, i, __ATOMIC_RELEASE);       \
 } while(0)
 
 #define atomic_load_acquire(ptr)                        \
     ({                                                  \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
+    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);  \
     typeof_strip_qual(*ptr) _val;                       \
     __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE);        \
     _val;                                               \
     })
 
 #define atomic_store_release(ptr, i)  do {              \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
+    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);  \
     __atomic_store_n(ptr, i, __ATOMIC_RELEASE);         \
 } while(0)
 
@@ -162,7 +180,7 @@
 })
 
 #define atomic_xchg(ptr, i)    ({                           \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));       \
+    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);      \
     atomic_xchg__nocheck(ptr, i);                           \
 })
 
@@ -175,7 +193,7 @@
 })
 
 #define atomic_cmpxchg(ptr, old, new)    ({                             \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));                   \
+    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);                  \
     atomic_cmpxchg__nocheck(ptr, old, new);                             \
 })
 
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 55214ce131..89ddb686fb 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -265,7 +265,6 @@ struct qemu_work_item;
  * @cpu_index: CPU index (informative).
  * @nr_cores: Number of cores within this CPU package.
  * @nr_threads: Number of threads within this CPU.
- * @numa_node: NUMA node this CPU is belonging to.
  * @host_tid: Host thread ID.
  * @running: #true if CPU is currently running (lockless).
  * @has_waiter: #true if a CPU is currently waiting for the cpu_exec_end;
@@ -314,7 +313,6 @@ struct CPUState {
 
     int nr_cores;
     int nr_threads;
-    int numa_node;
 
     struct QemuThread *thread;
 #ifdef _WIN32
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 7ffde5b119..610eece211 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -35,4 +35,5 @@ void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
                                  int nb_nodes, ram_addr_t size);
 void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
                                   int nb_nodes, ram_addr_t size);
+void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp);
 #endif
diff --git a/monitor.c b/monitor.c
index 75e7cd26d0..1e63ace2d4 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1696,23 +1696,26 @@ static void hmp_info_mtree(Monitor *mon, const QDict *qdict)
 static void hmp_info_numa(Monitor *mon, const QDict *qdict)
 {
     int i;
-    CPUState *cpu;
     uint64_t *node_mem;
+    CpuInfoList *cpu_list, *cpu;
 
+    cpu_list = qmp_query_cpus(&error_abort);
     node_mem = g_new0(uint64_t, nb_numa_nodes);
     query_numa_node_mem(node_mem);
     monitor_printf(mon, "%d nodes\n", nb_numa_nodes);
     for (i = 0; i < nb_numa_nodes; i++) {
         monitor_printf(mon, "node %d cpus:", i);
-        CPU_FOREACH(cpu) {
-            if (cpu->numa_node == i) {
-                monitor_printf(mon, " %d", cpu->cpu_index);
+        for (cpu = cpu_list; cpu; cpu = cpu->next) {
+            if (cpu->value->has_props && cpu->value->props->has_node_id &&
+                cpu->value->props->node_id == i) {
+                monitor_printf(mon, " %" PRIi64, cpu->value->CPU);
             }
         }
         monitor_printf(mon, "\n");
         monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i,
                        node_mem[i] >> 20);
     }
+    qapi_free_CpuInfoList(cpu_list);
     g_free(node_mem);
 }
 
diff --git a/numa.c b/numa.c
index be50c62aa9..65701cb6c8 100644
--- a/numa.c
+++ b/numa.c
@@ -426,7 +426,6 @@ void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
 void parse_numa_opts(MachineState *ms)
 {
     int i;
-    const CPUArchIdList *possible_cpus;
     MachineClass *mc = MACHINE_GET_CLASS(ms);
 
     if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, NULL)) {
@@ -484,31 +483,6 @@ void parse_numa_opts(MachineState *ms)
 
         numa_set_mem_ranges();
 
-        /* assign CPUs to nodes using board provided default mapping */
-        if (!mc->cpu_index_to_instance_props || !mc->possible_cpu_arch_ids) {
-            error_report("default CPUs to NUMA node mapping isn't supported");
-            exit(1);
-        }
-
-        possible_cpus = mc->possible_cpu_arch_ids(ms);
-        for (i = 0; i < possible_cpus->len; i++) {
-            if (possible_cpus->cpus[i].props.has_node_id) {
-                break;
-            }
-        }
-
-        /* no CPUs are assigned to NUMA nodes */
-        if (i == possible_cpus->len) {
-            for (i = 0; i < max_cpus; i++) {
-                CpuInstanceProperties props;
-                /* fetch default mapping from board and enable it */
-                props = mc->cpu_index_to_instance_props(ms, i);
-                props.has_node_id = true;
-
-                machine_set_cpu_numa_node(ms, &props, &error_fatal);
-            }
-        }
-
         /* QEMU needs at least all unique node pair distances to build
          * the whole NUMA distance table. QEMU treats the distance table
          * as symmetric by default, i.e. distance A->B == distance B->A.
@@ -533,6 +507,23 @@ void parse_numa_opts(MachineState *ms)
     }
 }
 
+void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp)
+{
+    int node_id = object_property_get_int(OBJECT(dev), "node-id", &error_abort);
+
+    if (node_id == CPU_UNSET_NUMA_NODE_ID) {
+        /* due to bug in libvirt, it doesn't pass node-id from props on
+         * device_add as expected, so we have to fix it up here */
+        if (slot->props.has_node_id) {
+            object_property_set_int(OBJECT(dev), slot->props.node_id,
+                                    "node-id", errp);
+        }
+    } else if (node_id != slot->props.node_id) {
+        error_setg(errp, "node-id=%d must match numa node specified "
+                   "with -numa option", node_id);
+    }
+}
+
 static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
                                            const char *name,
                                            uint64_t ram_size)
diff --git a/scripts/device-crash-test b/scripts/device-crash-test
new file mode 100755
index 0000000000..5f90e9bb54
--- /dev/null
+++ b/scripts/device-crash-test
@@ -0,0 +1,624 @@
+#!/usr/bin/env python2.7
+#
+#  Copyright (c) 2017 Red Hat Inc
+#
+# Author:
+#  Eduardo Habkost <ehabkost@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+"""
+Run QEMU with all combinations of -machine and -device types,
+check for crashes and unexpected errors.
+"""
+
+import sys
+import os
+import glob
+import logging
+import traceback
+import re
+import random
+import argparse
+from itertools import chain
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'scripts'))
+from qemu import QEMUMachine
+
+logger = logging.getLogger('device-crash-test')
+dbg = logger.debug
+
+
+# Purposes of the following whitelist:
+# * Avoiding verbose log messages when we find known non-fatal
+#   (exitcode=1) errors
+# * Avoiding fatal errors when we find known crashes
+# * Skipping machines/devices that are known not to work out of
+#   the box, when running in --quick mode
+#
+# Keeping the whitelist updated is desirable, but not required,
+# because unexpected cases where QEMU exits with exitcode=1 will
+# just trigger a INFO message.
+
+# Valid whitelist entry keys:
+# * accel: regexp, full match only
+# * machine: regexp, full match only
+# * device: regexp, full match only
+# * log: regexp, partial match allowed
+# * exitcode: if not present, defaults to 1. If None, matches any exitcode
+# * warn: if True, matching failures will be logged as warnings
+# * expected: if True, QEMU is expected to always fail every time
+#   when testing the corresponding test case
+# * loglevel: log level of log output when there's a match.
+ERROR_WHITELIST = [
+    # Machines that won't work out of the box:
+    #             MACHINE                         | ERROR MESSAGE
+    {'machine':'niagara', 'expected':True},       # Unable to load a firmware for -M niagara
+    {'machine':'boston', 'expected':True},        # Please provide either a -kernel or -bios argument
+    {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null)
+
+    # devices that don't work out of the box because they require extra options to "-device DEV":
+    #            DEVICE                                    | ERROR MESSAGE
+    {'device':'.*-(i386|x86_64)-cpu', 'expected':True},    # CPU socket-id is not set
+    {'device':'ARM,bitband-memory', 'expected':True},      # source-memory property not set
+    {'device':'arm.cortex-a9-global-timer', 'expected':True}, # a9_gtimer_realize: num-cpu must be between 1 and 4
+    {'device':'arm_mptimer', 'expected':True},             # num-cpu must be between 1 and 4
+    {'device':'armv7m', 'expected':True},                  # memory property was not set
+    {'device':'aspeed.scu', 'expected':True},              # Unknown silicon revision: 0x0
+    {'device':'aspeed.sdmc', 'expected':True},             # Unknown silicon revision: 0x0
+    {'device':'bcm2835-dma', 'expected':True},             # bcm2835_dma_realize: required dma-mr link not found: Property '.dma-mr' not found
+    {'device':'bcm2835-fb', 'expected':True},              # bcm2835_fb_realize: required vcram-base property not set
+    {'device':'bcm2835-mbox', 'expected':True},            # bcm2835_mbox_realize: required mbox-mr link not found: Property '.mbox-mr' not found
+    {'device':'bcm2835-peripherals', 'expected':True},     # bcm2835_peripherals_realize: required ram link not found: Property '.ram' not found
+    {'device':'bcm2835-property', 'expected':True},        # bcm2835_property_realize: required fb link not found: Property '.fb' not found
+    {'device':'bcm2835_gpio', 'expected':True},            # bcm2835_gpio_realize: required sdhci link not found: Property '.sdbus-sdhci' not found
+    {'device':'bcm2836', 'expected':True},                 # bcm2836_realize: required ram link not found: Property '.ram' not found
+    {'device':'cfi.pflash01', 'expected':True},            # attribute "sector-length" not specified or zero.
+    {'device':'cfi.pflash02', 'expected':True},            # attribute "sector-length" not specified or zero.
+    {'device':'icp', 'expected':True},                     # icp_realize: required link 'xics' not found: Property '.xics' not found
+    {'device':'ics', 'expected':True},                     # ics_base_realize: required link 'xics' not found: Property '.xics' not found
+    # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True
+    {'device':'ide-cd'},                                 # No drive specified
+    {'device':'ide-drive', 'expected':True},               # No drive specified
+    {'device':'ide-hd', 'expected':True},                  # No drive specified
+    {'device':'ipmi-bmc-extern', 'expected':True},         # IPMI external bmc requires chardev attribute
+    {'device':'isa-debugcon', 'expected':True},            # Can't create serial device, empty char device
+    {'device':'isa-ipmi-bt', 'expected':True},             # IPMI device requires a bmc attribute to be set
+    {'device':'isa-ipmi-kcs', 'expected':True},            # IPMI device requires a bmc attribute to be set
+    {'device':'isa-parallel', 'expected':True},            # Can't create serial device, empty char device
+    {'device':'isa-serial', 'expected':True},              # Can't create serial device, empty char device
+    {'device':'ivshmem', 'expected':True},                 # You must specify either 'shm' or 'chardev'
+    {'device':'ivshmem-doorbell', 'expected':True},        # You must specify a 'chardev'
+    {'device':'ivshmem-plain', 'expected':True},           # You must specify a 'memdev'
+    {'device':'kvm-pci-assign', 'expected':True},          # no host device specified
+    {'device':'loader', 'expected':True},                  # please include valid arguments
+    {'device':'nand', 'expected':True},                    # Unsupported NAND block size 0x1
+    {'device':'nvdimm', 'expected':True},                  # 'memdev' property is not set
+    {'device':'nvme', 'expected':True},                    # Device initialization failed
+    {'device':'pc-dimm', 'expected':True},                 # 'memdev' property is not set
+    {'device':'pci-bridge', 'expected':True},              # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
+    {'device':'pci-bridge-seat', 'expected':True},         # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
+    {'device':'pci-serial', 'expected':True},              # Can't create serial device, empty char device
+    {'device':'pci-serial-2x', 'expected':True},           # Can't create serial device, empty char device
+    {'device':'pci-serial-4x', 'expected':True},           # Can't create serial device, empty char device
+    {'device':'pxa2xx-dma', 'expected':True},              # channels value invalid
+    {'device':'pxb', 'expected':True},                     # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
+    {'device':'scsi-block', 'expected':True},              # drive property not set
+    {'device':'scsi-disk', 'expected':True},               # drive property not set
+    {'device':'scsi-generic', 'expected':True},            # drive property not set
+    {'device':'scsi-hd', 'expected':True},                 # drive property not set
+    {'device':'spapr-pci-host-bridge', 'expected':True},   # BUID not specified for PHB
+    {'device':'spapr-pci-vfio-host-bridge', 'expected':True}, # BUID not specified for PHB
+    {'device':'spapr-rng', 'expected':True},               # spapr-rng needs an RNG backend!
+    {'device':'spapr-vty', 'expected':True},               # chardev property not set
+    {'device':'tpm-tis', 'expected':True},                 # tpm_tis: backend driver with id (null) could not be found
+    {'device':'unimplemented-device', 'expected':True},    # property 'size' not specified or zero
+    {'device':'usb-braille', 'expected':True},             # Property chardev is required
+    {'device':'usb-mtp', 'expected':True},                 # x-root property must be configured
+    {'device':'usb-redir', 'expected':True},               # Parameter 'chardev' is missing
+    {'device':'usb-serial', 'expected':True},              # Property chardev is required
+    {'device':'usb-storage', 'expected':True},             # drive property not set
+    {'device':'vfio-amd-xgbe', 'expected':True},           # -device vfio-amd-xgbe: vfio error: wrong host device name
+    {'device':'vfio-calxeda-xgmac', 'expected':True},      # -device vfio-calxeda-xgmac: vfio error: wrong host device name
+    {'device':'vfio-pci', 'expected':True},                # No provided host device
+    {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0
+    {'device':'vhost-scsi.*', 'expected':True},            # vhost-scsi: missing wwpn
+    {'device':'vhost-vsock-device', 'expected':True},      # guest-cid property must be greater than 2
+    {'device':'vhost-vsock-pci', 'expected':True},         # guest-cid property must be greater than 2
+    {'device':'virtio-9p-ccw', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
+    {'device':'virtio-9p-device', 'expected':True},        # 9pfs device couldn't find fsdev with the id = NULL
+    {'device':'virtio-9p-pci', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
+    {'device':'virtio-blk-ccw', 'expected':True},          # drive property not set
+    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
+    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
+    {'device':'virtio-blk-pci', 'expected':True},          # drive property not set
+    {'device':'virtio-crypto-ccw', 'expected':True},       # 'cryptodev' parameter expects a valid object
+    {'device':'virtio-crypto-device', 'expected':True},    # 'cryptodev' parameter expects a valid object
+    {'device':'virtio-crypto-pci', 'expected':True},       # 'cryptodev' parameter expects a valid object
+    {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required
+    {'device':'virtio-input-host-pci', 'expected':True},   # evdev property is required
+    {'device':'xen-pvdevice', 'expected':True},            # Device ID invalid, it must always be supplied
+    {'device':'vhost-vsock-ccw', 'expected':True},         # guest-cid property must be greater than 2
+    {'device':'ALTR.timer', 'expected':True},              # "clock-frequency" property must be provided
+    {'device':'zpci', 'expected':True},                    # target must be defined
+    {'device':'pnv-(occ|icp|lpc)', 'expected':True},       # required link 'xics' not found: Property '.xics' not found
+    {'device':'powernv-cpu-.*', 'expected':True},          # pnv_core_realize: required link 'xics' not found: Property '.xics' not found
+
+    # ioapic devices are already created by pc and will fail:
+    {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed
+    {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True},     # Only 1 ioapics allowed
+
+    # KVM-specific devices shouldn't be tried without accel=kvm:
+    {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True},
+    {'accel':'(?!kvm).*', 'device':'kvm-pci-assign', 'expected':True},
+
+    # xen-specific machines and devices:
+    {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True},
+    {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True},
+
+    # this fails on some machine-types, but not all, so they don't have expected=True:
+    {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide
+
+    # Silence INFO messages for errors that are common on multiple
+    # devices/machines:
+    {'log':r"No '[\w-]+' bus found for device '[\w-]+'"},
+    {'log':r"images* must be given with the 'pflash' parameter"},
+    {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"},
+    {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"},
+    {'log':r"Couldn't find rom image '[\w-]+\.bin'"},
+    {'log':r"speed mismatch trying to attach usb device"},
+    {'log':r"Can't create a second ISA bus"},
+    {'log':r"duplicate fw_cfg file name"},
+    # sysbus-related error messages: most machines reject most dynamic sysbus devices:
+    {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"},
+    {'log':r"Device [\w.,-]+ is not supported by this machine yet"},
+    {'log':r"Device [\w.,-]+ can not be dynamically instantiated"},
+    {'log':r"Platform Bus: Can not fit MMIO region of size "},
+    # other more specific errors we will ignore:
+    {'device':'allwinner-a10', 'log':"Unsupported NIC model:"},
+    {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"},
+    {'log':r"MSI(-X)? is not supported by interrupt controller"},
+    {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"},
+    {'log':r"Ignoring smp_cpus value"},
+    {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"},
+    {'log':r"This CPU requires a smaller page size than the system is using"},
+    {'log':r"MSI-X support is mandatory in the S390 architecture"},
+    {'log':r"rom check and register reset failed"},
+    {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"},
+    {'log':r"Multiple VT220 operator consoles are not supported"},
+    {'log':r"core 0 already populated"},
+    {'log':r"could not find stage1 bootloader"},
+
+    # other exitcode=1 failures not listed above will just generate INFO messages:
+    {'exitcode':1, 'loglevel':logging.INFO},
+
+    # KNOWN CRASHES:
+    # Known crashes will generate error messages, but won't be fatal.
+    # Those entries must be removed once we fix the crashes.
+    {'exitcode':-6, 'log':r"Device 'serial0' is in use", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"spapr_rtas_register: Assertion .*rtas_table\[token\]\.name.* failed", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"qemu_net_client_setup: Assertion `!peer->peer' failed", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r'RAMBlock "[\w.-]+" already registered', 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"find_ram_offset: Assertion `size != 0' failed.", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"puv3_load_kernel: Assertion `kernel_filename != NULL' failed", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"add_cpreg_to_hashtable: code should not be reached", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"qemu_alloc_display: Assertion `surface->image != NULL' failed", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"Unexpected error in error_set_from_qdev_prop_error", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"Object .* is not an instance of type spapr-machine", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"Object .* is not an instance of type generic-pc-machine", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"Object .* is not an instance of type e500-ccsr", 'loglevel':logging.ERROR},
+    {'exitcode':-6, 'log':r"vmstate_register_with_alias_id: Assertion `!se->compat || se->instance_id == 0' failed", 'loglevel':logging.ERROR},
+    {'exitcode':-11, 'device':'stm32f205-soc', 'loglevel':logging.ERROR, 'expected':True},
+    {'exitcode':-11, 'device':'xlnx,zynqmp', 'loglevel':logging.ERROR, 'expected':True},
+    {'exitcode':-11, 'device':'mips-cps', 'loglevel':logging.ERROR, 'expected':True},
+    {'exitcode':-11, 'device':'gus', 'loglevel':logging.ERROR, 'expected':True},
+    {'exitcode':-11, 'device':'a9mpcore_priv', 'loglevel':logging.ERROR, 'expected':True},
+    {'exitcode':-11, 'device':'a15mpcore_priv', 'loglevel':logging.ERROR, 'expected':True},
+    {'exitcode':-11, 'device':'isa-serial', 'loglevel':logging.ERROR, 'expected':True},
+    {'exitcode':-11, 'device':'sb16', 'loglevel':logging.ERROR, 'expected':True},
+    {'exitcode':-11, 'device':'cs4231a', 'loglevel':logging.ERROR, 'expected':True},
+    {'exitcode':-11, 'device':'arm-gicv3', 'loglevel':logging.ERROR, 'expected':True},
+    {'exitcode':-11, 'machine':'isapc', 'device':'.*-iommu', 'loglevel':logging.ERROR, 'expected':True},
+
+    # everything else (including SIGABRT and SIGSEGV) will be a fatal error:
+    {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL},
+]
+
+
+def whitelistTestCaseMatch(wl, t):
+    """Check if a test case specification can match a whitelist entry
+
+    This only checks if a whitelist entry is a candidate match
+    for a given test case, it won't check if the test case
+    results/output match the entry.  See whitelistResultMatch().
+    """
+    return (('machine' not in wl or
+             'machine' not in t or
+             re.match(wl['machine'] + '$', t['machine'])) and
+            ('accel' not in wl or
+             'accel' not in t or
+             re.match(wl['accel'] + '$', t['accel'])) and
+            ('device' not in wl or
+             'device' not in t or
+             re.match(wl['device'] + '$', t['device'])))
+
+
+def whitelistCandidates(t):
+    """Generate the list of candidates that can match a test case"""
+    for i, wl in enumerate(ERROR_WHITELIST):
+        if whitelistTestCaseMatch(wl, t):
+            yield (i, wl)
+
+
+def findExpectedResult(t):
+    """Check if there's an expected=True whitelist entry for a test case
+
+    Returns (i, wl) tuple, where i is the index in
+    ERROR_WHITELIST and wl is the whitelist entry itself.
+    """
+    for i, wl in whitelistCandidates(t):
+        if wl.get('expected'):
+            return (i, wl)
+
+
+def whitelistResultMatch(wl, r):
+    """Check if test case results/output match a whitelist entry
+
+    It is valid to call this function only if
+    whitelistTestCaseMatch() is True for the entry (e.g. on
+    entries returned by whitelistCandidates())
+    """
+    assert whitelistTestCaseMatch(wl, r['testcase'])
+    return ((wl.get('exitcode', 1) is None or
+             r['exitcode'] == wl.get('exitcode', 1)) and
+            ('log' not in wl or
+             re.search(wl['log'], r['log'], re.MULTILINE)))
+
+
+def checkResultWhitelist(r):
+    """Look up whitelist entry for a given test case result
+
+    Returns (i, wl) tuple, where i is the index in
+    ERROR_WHITELIST and wl is the whitelist entry itself.
+    """
+    for i, wl in whitelistCandidates(r['testcase']):
+        if whitelistResultMatch(wl, r):
+            return i, wl
+
+    raise Exception("this should never happen")
+
+
+def qemuOptsEscape(s):
+    """Escape option value QemuOpts"""
+    return s.replace(",", ",,")
+
+
+def formatTestCase(t):
+    """Format test case info as "key=value key=value" for prettier logging output"""
+    return ' '.join('%s=%s' % (k, v) for k, v in t.items())
+
+
+def qomListTypeNames(vm, **kwargs):
+    """Run qom-list-types QMP command, return type names"""
+    types = vm.command('qom-list-types', **kwargs)
+    return [t['name'] for t in types]
+
+
+def infoQDM(vm):
+    """Parse 'info qdm' output"""
+    args = {'command-line': 'info qdm'}
+    devhelp = vm.command('human-monitor-command', **args)
+    for l in devhelp.split('\n'):
+        l = l.strip()
+        if l == '' or l.endswith(':'):
+            continue
+        d = {'name': re.search(r'name "([^"]+)"', l).group(1),
+             'no-user': (re.search(', no-user', l) is not None)}
+        yield d
+
+
+class QemuBinaryInfo(object):
+    def __init__(self, binary, devtype):
+        if devtype is None:
+            devtype = 'device'
+
+        self.binary = binary
+        self._machine_info = {}
+
+        dbg("devtype: %r", devtype)
+        args = ['-S', '-machine', 'none,accel=kvm:tcg']
+        dbg("querying info for QEMU binary: %s", binary)
+        vm = QEMUMachine(binary=binary, args=args)
+        vm.launch()
+        try:
+            self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False))
+            # there's no way to query DeviceClass::user_creatable using QMP,
+            # so use 'info qdm':
+            self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']])
+            self.machines = list(m['name'] for m in vm.command('query-machines'))
+            self.user_devs = self.alldevs.difference(self.no_user_devs)
+            self.kvm_available = vm.command('query-kvm')['enabled']
+        finally:
+            vm.shutdown()
+
+    def machineInfo(self, machine):
+        """Query for information on a specific machine-type
+
+        Results are cached internally, in case the same machine-
+        type is queried multiple times.
+        """
+        if machine in self._machine_info:
+            return self._machine_info[machine]
+
+        mi = {}
+        args = ['-S', '-machine', '%s' % (machine)]
+        dbg("querying machine info for binary=%s machine=%s", self.binary, machine)
+        vm = QEMUMachine(binary=self.binary, args=args)
+        try:
+            vm.launch()
+            mi['runnable'] = True
+        except KeyboardInterrupt:
+            raise
+        except:
+            dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
+            dbg("log: %r", vm.get_log())
+            mi['runnable'] = False
+
+        vm.shutdown()
+        self._machine_info[machine] = mi
+        return mi
+
+
+BINARY_INFO = {}
+
+
+def getBinaryInfo(args, binary):
+    if binary not in BINARY_INFO:
+        BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype)
+    return BINARY_INFO[binary]
+
+
+def checkOneCase(args, testcase):
+    """Check one specific case
+
+    Returns a dictionary containing failure information on error,
+    or None on success
+    """
+    binary = testcase['binary']
+    accel = testcase['accel']
+    machine = testcase['machine']
+    device = testcase['device']
+
+    dbg("will test: %r", testcase)
+
+    args = ['-S', '-machine', '%s,accel=%s' % (machine, accel),
+            '-device', qemuOptsEscape(device)]
+    cmdline = ' '.join([binary] + args)
+    dbg("will launch QEMU: %s", cmdline)
+    vm = QEMUMachine(binary=binary, args=args)
+
+    exc_traceback = None
+    try:
+        vm.launch()
+    except KeyboardInterrupt:
+        raise
+    except:
+        exc_traceback = traceback.format_exc()
+        dbg("Exception while running test case")
+    finally:
+        vm.shutdown()
+        ec = vm.exitcode()
+        log = vm.get_log()
+
+    if exc_traceback is not None or ec != 0:
+        return {'exc_traceback':exc_traceback,
+                'exitcode':ec,
+                'log':log,
+                'testcase':testcase,
+                'cmdline':cmdline}
+
+
+def binariesToTest(args, testcase):
+    if args.qemu:
+        r = args.qemu
+    else:
+        r = glob.glob('./*-softmmu/qemu-system-*')
+    return r
+
+
+def accelsToTest(args, testcase):
+    if getBinaryInfo(args, testcase['binary']).kvm_available:
+        yield 'kvm'
+    yield 'tcg'
+
+
+def machinesToTest(args, testcase):
+    return getBinaryInfo(args, testcase['binary']).machines
+
+
+def devicesToTest(args, testcase):
+    return getBinaryInfo(args, testcase['binary']).user_devs
+
+
+TESTCASE_VARIABLES = [
+    ('binary', binariesToTest),
+    ('accel', accelsToTest),
+    ('machine', machinesToTest),
+    ('device', devicesToTest),
+]
+
+
+def genCases1(args, testcases, var, fn):
+    """Generate new testcases for one variable
+
+    If an existing item already has a variable set, don't
+    generate new items and just return it directly. This
+    allows the "-t" command-line option to be used to choose
+    a specific test case.
+    """
+    for testcase in testcases:
+        if var in testcase:
+            yield testcase.copy()
+        else:
+            for i in fn(args, testcase):
+                t = testcase.copy()
+                t[var] = i
+                yield t
+
+
+def genCases(args, testcase):
+    """Generate test cases for all variables
+    """
+    cases = [testcase.copy()]
+    for var, fn in TESTCASE_VARIABLES:
+        dbg("var: %r, fn: %r", var, fn)
+        cases = genCases1(args, cases, var, fn)
+    return cases
+
+
+def casesToTest(args, testcase):
+    cases = genCases(args, testcase)
+    if args.random:
+        cases = list(cases)
+        cases = random.sample(cases, min(args.random, len(cases)))
+    if args.debug:
+        cases = list(cases)
+        dbg("%d test cases to test", len(cases))
+    if args.shuffle:
+        cases = list(cases)
+        random.shuffle(cases)
+    return cases
+
+
+def logFailure(f, level):
+    t = f['testcase']
+    logger.log(level, "failed: %s", formatTestCase(t))
+    logger.log(level, "cmdline: %s", f['cmdline'])
+    for l in f['log'].strip().split('\n'):
+        logger.log(level, "log: %s", l)
+    logger.log(level, "exit code: %r", f['exitcode'])
+    if f['exc_traceback']:
+        logger.log(level, "exception:")
+        for l in f['exc_traceback'].split('\n'):
+            logger.log(level, "  %s", l.rstrip('\n'))
+
+
+def main():
+    parser = argparse.ArgumentParser(description="QEMU -device crash test")
+    parser.add_argument('-t', metavar='KEY=VALUE', nargs='*',
+                        help="Limit test cases to KEY=VALUE",
+                        action='append', dest='testcases', default=[])
+    parser.add_argument('-d', '--debug', action='store_true',
+                        help='debug output')
+    parser.add_argument('-v', '--verbose', action='store_true', default=True,
+                        help='verbose output')
+    parser.add_argument('-q', '--quiet', dest='verbose', action='store_false',
+                        help='non-verbose output')
+    parser.add_argument('-r', '--random', type=int, metavar='COUNT',
+                        help='run a random sample of COUNT test cases',
+                        default=0)
+    parser.add_argument('--shuffle', action='store_true',
+                        help='Run test cases in random order')
+    parser.add_argument('--dry-run', action='store_true',
+                        help="Don't run any tests, just generate list")
+    parser.add_argument('-D', '--devtype', metavar='TYPE',
+                        help="Test only device types that implement TYPE")
+    parser.add_argument('-Q', '--quick', action='store_true', default=True,
+                        help="Quick mode: skip test cases that are expected to fail")
+    parser.add_argument('-F', '--full', action='store_false', dest='quick',
+                        help="Full mode: test cases that are expected to fail")
+    parser.add_argument('--strict', action='store_true', dest='strict',
+                        help="Treat all warnings as fatal")
+    parser.add_argument('qemu', nargs='*', metavar='QEMU',
+                        help='QEMU binary to run')
+    args = parser.parse_args()
+
+    if args.debug:
+        lvl = logging.DEBUG
+    elif args.verbose:
+        lvl = logging.INFO
+    else:
+        lvl = logging.WARN
+    logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
+
+    fatal_failures = []
+    wl_stats = {}
+    skipped = 0
+    total = 0
+
+    tc = {}
+    dbg("testcases: %r", args.testcases)
+    if args.testcases:
+        for t in chain(*args.testcases):
+            for kv in t.split():
+                k, v = kv.split('=', 1)
+                tc[k] = v
+
+    if len(binariesToTest(args, tc)) == 0:
+        print >>sys.stderr, "No QEMU binary found"
+        parser.print_usage(sys.stderr)
+        return 1
+
+    for t in casesToTest(args, tc):
+        logger.info("running test case: %s", formatTestCase(t))
+        total += 1
+
+        expected_match = findExpectedResult(t)
+        if (args.quick and
+                (expected_match or
+                 not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])):
+            dbg("skipped: %s", formatTestCase(t))
+            skipped += 1
+            continue
+
+        if args.dry_run:
+            continue
+
+        try:
+            f = checkOneCase(args, t)
+        except KeyboardInterrupt:
+            break
+
+        if f:
+            i, wl = checkResultWhitelist(f)
+            dbg("testcase: %r, whitelist match: %r", t, wl)
+            wl_stats.setdefault(i, []).append(f)
+            level = wl.get('loglevel', logging.DEBUG)
+            logFailure(f, level)
+            if wl.get('fatal') or (args.strict and level >= logging.WARN):
+                fatal_failures.append(f)
+        else:
+            dbg("success: %s", formatTestCase(t))
+            if expected_match:
+                logger.warn("Didn't fail as expected: %s", formatTestCase(t))
+
+    logger.info("Total: %d test cases", total)
+    if skipped:
+        logger.info("Skipped %d test cases", skipped)
+
+    if args.debug:
+        stats = sorted([(len(wl_stats.get(i, [])), wl) for i, wl in enumerate(ERROR_WHITELIST)])
+        for count, wl in stats:
+            dbg("whitelist entry stats: %d: %r", count, wl)
+
+    if fatal_failures:
+        for f in fatal_failures:
+            t = f['testcase']
+            logger.error("Fatal failure: %s", formatTestCase(t))
+        logger.error("Fatal failures on some machine/device combinations")
+        return 1
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/scripts/qemu.py b/scripts/qemu.py
index 6d1b6230b7..880e3e8219 100644
--- a/scripts/qemu.py
+++ b/scripts/qemu.py
@@ -85,8 +85,16 @@ class QEMUMachine(object):
                 return
             raise
 
+    def is_running(self):
+        return self._popen and (self._popen.returncode is None)
+
+    def exitcode(self):
+        if self._popen is None:
+            return None
+        return self._popen.returncode
+
     def get_pid(self):
-        if not self._popen:
+        if not self.is_running():
             return None
         return self._popen.pid
 
@@ -128,16 +136,16 @@ class QEMUMachine(object):
                                            stderr=subprocess.STDOUT, shell=False)
             self._post_launch()
         except:
-            if self._popen:
+            if self.is_running():
                 self._popen.kill()
+                self._popen.wait()
             self._load_io_log()
             self._post_shutdown()
-            self._popen = None
             raise
 
     def shutdown(self):
         '''Terminate the VM and clean up'''
-        if not self._popen is None:
+        if self.is_running():
             try:
                 self._qmp.cmd('quit')
                 self._qmp.close()
@@ -149,7 +157,6 @@ class QEMUMachine(object):
                 sys.stderr.write('qemu received signal %i: %s\n' % (-exitcode, ' '.join(self._args)))
             self._load_io_log()
             self._post_shutdown()
-            self._popen = None
 
     underscore_to_dash = string.maketrans('_', '-')
     def qmp(self, cmd, conv_keys=True, **args):
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index df5d695344..7c45ae360c 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -89,6 +89,9 @@ typedef enum {
        updated the PC for the next instruction to be executed.  */
     EXIT_PC_STALE,
 
+    /* We are exiting the TB due to page crossing or space constraints.  */
+    EXIT_FALLTHRU,
+
     /* We are ending the TB with a noreturn function call, e.g. longjmp.
        No following code will be executed.  */
     EXIT_NORETURN,
@@ -1157,6 +1160,7 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode)
 #ifndef CONFIG_USER_ONLY
     /* Privileged PAL code */
     if (palcode < 0x40 && (ctx->tb->flags & TB_FLAGS_USER_MODE) == 0) {
+        TCGv tmp;
         switch (palcode) {
         case 0x01:
             /* CFLUSH */
@@ -1182,10 +1186,8 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode)
                            offsetof(CPUAlphaState, sysval));
             break;
 
-        case 0x35: {
+        case 0x35:
             /* SWPIPL */
-            TCGv tmp;
-
             /* Note that we already know we're in kernel mode, so we know
                that PS only contains the 3 IPL bits.  */
             tcg_gen_ld8u_i64(ctx->ir[IR_V0], cpu_env,
@@ -1197,7 +1199,6 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode)
             tcg_gen_st8_i64(tmp, cpu_env, offsetof(CPUAlphaState, ps));
             tcg_temp_free(tmp);
             break;
-        }
 
         case 0x36:
             /* RDPS */
@@ -1220,6 +1221,14 @@ static ExitStatus gen_call_pal(DisasContext *ctx, int palcode)
                 -offsetof(AlphaCPU, env) + offsetof(CPUState, cpu_index));
             break;
 
+        case 0x3E:
+            /* WTINT */
+            tmp = tcg_const_i64(1);
+            tcg_gen_st32_i64(tmp, cpu_env, -offsetof(AlphaCPU, env) +
+                                           offsetof(CPUState, halted));
+            tcg_gen_movi_i64(ctx->ir[IR_V0], 0);
+            return gen_excp(ctx, EXCP_HALTED, 0);
+
         default:
             palcode &= 0x3f;
             goto do_call_pal;
@@ -1369,7 +1378,7 @@ static ExitStatus gen_mtpr(DisasContext *ctx, TCGv vb, int regno)
         tmp = tcg_const_i64(1);
         tcg_gen_st32_i64(tmp, cpu_env, -offsetof(AlphaCPU, env) +
                                        offsetof(CPUState, halted));
-        return gen_excp(ctx, EXCP_HLT, 0);
+        return gen_excp(ctx, EXCP_HALTED, 0);
 
     case 252:
         /* HALT */
@@ -2978,7 +2987,7 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
                 || num_insns >= max_insns
                 || singlestep
                 || ctx.singlestep_enabled)) {
-            ret = EXIT_PC_STALE;
+            ret = EXIT_FALLTHRU;
         }
     } while (ret == NO_EXIT);
 
@@ -2990,6 +2999,13 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
     case EXIT_GOTO_TB:
     case EXIT_NORETURN:
         break;
+    case EXIT_FALLTHRU:
+        if (use_goto_tb(&ctx, ctx.pc)) {
+            tcg_gen_goto_tb(0);
+            tcg_gen_movi_i64(cpu_pc, ctx.pc);
+            tcg_gen_exit_tb((uintptr_t)ctx.tb);
+        }
+        /* FALLTHRU */
     case EXIT_PC_STALE:
         tcg_gen_movi_i64(cpu_pc, ctx.pc);
         /* FALLTHRU */
@@ -3001,7 +3017,7 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
         }
         break;
     default:
-        abort();
+        g_assert_not_reached();
     }
 
     gen_tb_end(tb, num_insns);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 04a3fea03f..28a9141298 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1589,7 +1589,7 @@ static Property arm_cpu_properties[] = {
     DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
     DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
                         mp_affinity, ARM64_AFFINITY_INVALID),
-    DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID),
+    DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 13da5036bc..16a1e59615 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -704,6 +704,8 @@ struct ARMCPU {
 
     ARMELChangeHook *el_change_hook;
     void *el_change_hook_opaque;
+
+    int32_t node_id; /* NUMA node this CPU belongs to */
 };
 
 static inline ARMCPU *arm_env_get_cpu(CPUARMState *env)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index a82ab49c94..860e279658 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -379,7 +379,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
         } else if (s->singlestep_enabled) {
             gen_exception_internal(EXCP_DEBUG);
         } else {
-            tcg_gen_exit_tb(0);
+            tcg_gen_lookup_and_goto_ptr(cpu_pc);
             s->is_jmp = DISAS_TB_JUMP;
         }
     }
@@ -11367,8 +11367,7 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
             gen_a64_set_pc_im(dc->pc);
             /* fall through */
         case DISAS_JUMP:
-            /* indicate that the hash table must be used to find the next TB */
-            tcg_gen_exit_tb(0);
+            tcg_gen_lookup_and_goto_ptr(cpu_pc);
             break;
         case DISAS_TB_JUMP:
         case DISAS_EXC:
diff --git a/target/arm/translate.c b/target/arm/translate.c
index ae6646c05b..0862f9e4aa 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1182,7 +1182,7 @@ static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
     gen_set_condexec(s);
     gen_set_pc_im(s, s->pc - offset);
     gen_exception_internal(excp);
-    s->is_jmp = DISAS_JUMP;
+    s->is_jmp = DISAS_EXC;
 }
 
 static void gen_exception_insn(DisasContext *s, int offset, int excp,
@@ -1191,14 +1191,14 @@ static void gen_exception_insn(DisasContext *s, int offset, int excp,
     gen_set_condexec(s);
     gen_set_pc_im(s, s->pc - offset);
     gen_exception(excp, syn, target_el);
-    s->is_jmp = DISAS_JUMP;
+    s->is_jmp = DISAS_EXC;
 }
 
 /* Force a TB lookup after an instruction that changes the CPU state.  */
 static inline void gen_lookup_tb(DisasContext *s)
 {
     tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
-    s->is_jmp = DISAS_JUMP;
+    s->is_jmp = DISAS_EXIT;
 }
 
 static inline void gen_hlt(DisasContext *s, int imm)
@@ -4150,7 +4150,15 @@ static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
 #endif
 }
 
-static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
+static void gen_goto_ptr(void)
+{
+    TCGv addr = tcg_temp_new();
+    tcg_gen_extu_i32_tl(addr, cpu_R[15]);
+    tcg_gen_lookup_and_goto_ptr(addr);
+    tcg_temp_free(addr);
+}
+
+static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
 {
     if (use_goto_tb(s, dest)) {
         tcg_gen_goto_tb(n);
@@ -4158,7 +4166,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
         tcg_gen_exit_tb((uintptr_t)s->tb + n);
     } else {
         gen_set_pc_im(s, dest);
-        tcg_gen_exit_tb(0);
+        gen_goto_ptr();
     }
 }
 
@@ -12091,11 +12099,14 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
             gen_set_pc_im(dc, dc->pc);
             /* fall through */
         case DISAS_JUMP:
+            gen_goto_ptr();
+            break;
         default:
             /* indicate that the hash table must be used to find the next TB */
             tcg_gen_exit_tb(0);
             break;
         case DISAS_TB_JUMP:
+        case DISAS_EXC:
             /* nothing more to generate */
             break;
         case DISAS_WFI:
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 6b2cc34c33..15d383d9af 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -139,6 +139,10 @@ static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
  * custom end-of-TB code)
  */
 #define DISAS_BX_EXCRET 11
+/* For instructions which want an immediate exit to the main loop,
+ * as opposed to attempting to use lookup_and_goto_ptr.
+ */
+#define DISAS_EXIT 12
 
 #ifdef TARGET_AARCH64
 void a64_translate_init(void);
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index 9e8c233501..e10abc5e04 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -517,7 +517,7 @@ static void gen_goto_tb(DisasContext *ctx, int which,
         if (ctx->singlestep_enabled) {
             gen_excp_1(EXCP_DEBUG);
         } else {
-            tcg_gen_exit_tb(0);
+            tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f);
         }
     }
 }
@@ -1510,7 +1510,7 @@ static ExitStatus do_ibranch(DisasContext *ctx, TCGv dest,
     } else if (is_n && use_nullify_skip(ctx)) {
         /* The (conditional) branch, B, nullifies the next insn, N,
            and we're allowed to skip execution N (no single-step or
-           tracepoint in effect).  Since the exit_tb that we must use
+           tracepoint in effect).  Since the goto_ptr that we must use
            for the indirect branch consumes no special resources, we
            can (conditionally) skip B and continue execution.  */
         /* The use_nullify_skip test implies we have a known control path.  */
@@ -1527,7 +1527,7 @@ static ExitStatus do_ibranch(DisasContext *ctx, TCGv dest,
         if (link != 0) {
             tcg_gen_movi_tl(cpu_gr[link], ctx->iaoq_n);
         }
-        tcg_gen_exit_tb(0);
+        tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f);
         return nullify_end(ctx, NO_EXIT);
     } else {
         cond_prep(&ctx->null_cond);
@@ -3885,7 +3885,7 @@ void gen_intermediate_code(CPUHPPAState *env, struct TranslationBlock *tb)
         if (ctx.singlestep_enabled) {
             gen_excp_1(EXCP_DEBUG);
         } else {
-            tcg_gen_exit_tb(0);
+            tcg_gen_lookup_and_goto_ptr(cpu_iaoq_f);
         }
         break;
     default:
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index a41d595c23..ffb5267162 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3986,7 +3986,7 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1),
     DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1),
 #endif
-    DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID),
+    DEFINE_PROP_INT32("node-id", X86CPU, node_id, CPU_UNSET_NUMA_NODE_ID),
     DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false),
     { .name  = "hv-spinlocks", .info  = &qdev_prop_spinlocks },
     DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false),
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c4602ca80d..cfe825f0a4 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1275,6 +1275,7 @@ struct X86CPU {
 
     struct kvm_msrs *kvm_msr_buf;
 
+    int32_t node_id; /* NUMA node this CPU belongs to */
     int32_t socket_id;
     int32_t core_id;
     int32_t thread_id;
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 1d1372fb43..674ec96d5a 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -141,6 +141,7 @@ typedef struct DisasContext {
 } DisasContext;
 
 static void gen_eob(DisasContext *s);
+static void gen_jr(DisasContext *s, TCGv dest);
 static void gen_jmp(DisasContext *s, target_ulong eip);
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
@@ -2153,9 +2154,9 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
         gen_jmp_im(eip);
         tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
     } else {
-        /* jump to another page: currently not optimized */
+        /* jump to another page */
         gen_jmp_im(eip);
-        gen_eob(s);
+        gen_jr(s, cpu_tmp0);
     }
 }
 
@@ -2509,7 +2510,8 @@ static void gen_bnd_jmp(DisasContext *s)
    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
    S->TF.  This is used by the syscall/sysret insns.  */
-static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
+static void
+do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, TCGv jr)
 {
     gen_update_cc_op(s);
 
@@ -2530,12 +2532,27 @@ static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
         tcg_gen_exit_tb(0);
     } else if (s->tf) {
         gen_helper_single_step(cpu_env);
+    } else if (!TCGV_IS_UNUSED(jr)) {
+        TCGv vaddr = tcg_temp_new();
+
+        tcg_gen_add_tl(vaddr, jr, cpu_seg_base[R_CS]);
+        tcg_gen_lookup_and_goto_ptr(vaddr);
+        tcg_temp_free(vaddr);
     } else {
         tcg_gen_exit_tb(0);
     }
     s->is_jmp = DISAS_TB_JUMP;
 }
 
+static inline void
+gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
+{
+    TCGv unused;
+
+    TCGV_UNUSED(unused);
+    do_gen_eob_worker(s, inhibit, recheck_tf, unused);
+}
+
 /* End of block.
    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
@@ -2549,6 +2566,12 @@ static void gen_eob(DisasContext *s)
     gen_eob_worker(s, false, false);
 }
 
+/* Jump to register */
+static void gen_jr(DisasContext *s, TCGv dest)
+{
+    do_gen_eob_worker(s, false, false, dest);
+}
+
 /* generate a jump to eip. No segment change must happen before as a
    direct call to the next block may occur */
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
@@ -4973,7 +4996,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_push_v(s, cpu_T1);
             gen_op_jmp_v(cpu_T0);
             gen_bnd_jmp(s);
-            gen_eob(s);
+            gen_jr(s, cpu_T0);
             break;
         case 3: /* lcall Ev */
             gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
@@ -4991,7 +5014,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
-            gen_eob(s);
+            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
+            gen_jr(s, cpu_tmp4);
             break;
         case 4: /* jmp Ev */
             if (dflag == MO_16) {
@@ -4999,7 +5023,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             }
             gen_op_jmp_v(cpu_T0);
             gen_bnd_jmp(s);
-            gen_eob(s);
+            gen_jr(s, cpu_T0);
             break;
         case 5: /* ljmp Ev */
             gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
@@ -5014,7 +5038,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_op_movl_seg_T0_vm(R_CS);
                 gen_op_jmp_v(cpu_T1);
             }
-            gen_eob(s);
+            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
+            gen_jr(s, cpu_tmp4);
             break;
         case 6: /* push Ev */
             gen_push_v(s, cpu_T0);
@@ -6394,7 +6419,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         /* Note that gen_pop_T0 uses a zero-extending load.  */
         gen_op_jmp_v(cpu_T0);
         gen_bnd_jmp(s);
-        gen_eob(s);
+        gen_jr(s, cpu_T0);
         break;
     case 0xc3: /* ret */
         ot = gen_pop_T0(s);
@@ -6402,7 +6427,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         /* Note that gen_pop_T0 uses a zero-extending load.  */
         gen_op_jmp_v(cpu_T0);
         gen_bnd_jmp(s);
-        gen_eob(s);
+        gen_jr(s, cpu_T0);
         break;
     case 0xca: /* lret im */
         val = cpu_ldsw_code(env, s->pc);
diff --git a/target/mips/translate.c b/target/mips/translate.c
index 3022f349cb..559f8fed89 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -4233,7 +4233,7 @@ static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
             save_cpu_state(ctx, 0);
             gen_helper_raise_exception_debug(cpu_env);
         }
-        tcg_gen_exit_tb(0);
+        tcg_gen_lookup_and_goto_ptr(cpu_PC);
     }
 }
 
@@ -10725,7 +10725,7 @@ static void gen_branch(DisasContext *ctx, int insn_bytes)
                 save_cpu_state(ctx, 0);
                 gen_helper_raise_exception_debug(cpu_env);
             }
-            tcg_gen_exit_tb(0);
+            tcg_gen_lookup_and_goto_ptr(cpu_PC);
             break;
         default:
             fprintf(stderr, "unknown branch 0x%x\n", proc_hflags);
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index cfec47959d..2f3c2e5dfb 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -164,7 +164,7 @@ static void gen_goto_tb(DisasContext *dc, int n, uint32_t dest)
     if (use_goto_tb(dc, dest)) {
         tcg_gen_goto_tb(n);
         tcg_gen_movi_tl(dc->cpu_R[R_PC], dest);
-        tcg_gen_exit_tb((tcg_target_long)tb + n);
+        tcg_gen_exit_tb((uintptr_t)tb + n);
     } else {
         tcg_gen_movi_tl(dc->cpu_R[R_PC], dest);
         tcg_gen_exit_tb(0);
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 401e10e7da..d10808d9f4 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1205,6 +1205,7 @@ struct PowerPCCPU {
     uint32_t compat_pvr;
     PPCVirtualHypervisor *vhyp;
     Object *intc;
+    int32_t node_id; /* NUMA node this CPU belongs to */
 
     /* Fields related to migration compatibility hacks */
     bool pre_2_8_migration;
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 4c48c593cd..628fb8685d 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -608,11 +608,16 @@ static void gen_op_calc_cc(DisasContext *s)
     set_cc_static(s);
 }
 
-static int use_goto_tb(DisasContext *s, uint64_t dest)
+static bool use_exit_tb(DisasContext *s)
 {
-    if (unlikely(s->singlestep_enabled) ||
-        (s->tb->cflags & CF_LAST_IO) ||
-        (s->tb->flags & FLAG_MASK_PER)) {
+    return (s->singlestep_enabled ||
+            (s->tb->cflags & CF_LAST_IO) ||
+            (s->tb->flags & FLAG_MASK_PER));
+}
+
+static bool use_goto_tb(DisasContext *s, uint64_t dest)
+{
+    if (unlikely(use_exit_tb(s))) {
         return false;
     }
 #ifndef CONFIG_USER_ONLY
@@ -5461,8 +5466,10 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
         /* Exit the TB, either by raising a debug exception or by return.  */
         if (do_debug) {
             gen_exception(EXCP_DEBUG);
-        } else {
+        } else if (use_exit_tb(&dc)) {
             tcg_gen_exit_tb(0);
+        } else {
+            tcg_gen_lookup_and_goto_ptr(psw_addr);
         }
         break;
     default:
diff --git a/tcg-runtime.c b/tcg-runtime.c
index 4c60c96658..7fa90ce508 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -27,6 +27,9 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
+#include "exec/tb-hash.h"
+#include "disas/disas.h"
+#include "exec/log.h"
 
 /* 32-bit helpers */
 
@@ -141,6 +144,35 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
     return ctpop64(arg);
 }
 
+void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    TranslationBlock *tb;
+    target_ulong cs_base, pc;
+    uint32_t flags;
+
+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)]);
+    if (likely(tb)) {
+        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+        if (likely(tb->pc == addr && tb->cs_base == cs_base &&
+                   tb->flags == flags)) {
+            goto found;
+        }
+        tb = tb_htable_lookup(cpu, addr, cs_base, flags);
+        if (likely(tb)) {
+            atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(addr)], tb);
+            goto found;
+        }
+    }
+    return tcg_ctx.code_gen_epilogue;
+ found:
+    qemu_log_mask_and_addr(CPU_LOG_EXEC, addr,
+                           "Chain %p [%d: " TARGET_FMT_lx "] %s\n",
+                           tb->tc_ptr, cpu->cpu_index, addr,
+                           lookup_symbol(addr));
+    return tb->tc_ptr;
+}
+
 void HELPER(exit_atomic)(CPUArchState *env)
 {
     cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
diff --git a/tcg/README b/tcg/README
index a9858c2f74..bf49e8242b 100644
--- a/tcg/README
+++ b/tcg/README
@@ -477,6 +477,14 @@ current TB was linked to this TB. Otherwise execute the next
 instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
 at most once with each slot index per TB.
 
+* lookup_and_goto_ptr tb_addr
+
+Look up a TB address ('tb_addr') and jump to it if valid. If not valid,
+jump to the TCG epilogue to go back to the exec loop.
+
+This operation is optional. If the TCG backend does not implement the
+goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
+
 * qemu_ld_i32/i64 t0, t1, flags, memidx
 * qemu_st_i32/i64 t0, t1, flags, memidx
 
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 1a5ea23844..55a46ac825 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -77,6 +77,7 @@ typedef enum {
 #define TCG_TARGET_HAS_mulsh_i32        0
 #define TCG_TARGET_HAS_extrl_i64_i32    0
 #define TCG_TARGET_HAS_extrh_i64_i32    0
+#define TCG_TARGET_HAS_goto_ptr         1
 
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rem_i64          1
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 290de6dae6..5f185458f1 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -1357,8 +1357,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     switch (opc) {
     case INDEX_op_exit_tb:
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
-        tcg_out_goto(s, tb_ret_addr);
+        /* Reuse the zeroing that exists for goto_ptr.  */
+        if (a0 == 0) {
+            tcg_out_goto(s, s->code_gen_epilogue);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
+            tcg_out_goto(s, tb_ret_addr);
+        }
         break;
 
     case INDEX_op_goto_tb:
@@ -1374,6 +1379,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
         break;
 
+    case INDEX_op_goto_ptr:
+        tcg_out_insn(s, 3207, BR, a0);
+        break;
+
     case INDEX_op_br:
         tcg_out_goto_label(s, arg_label(a0));
         break;
@@ -1735,6 +1744,7 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_br, { } },
+    { INDEX_op_goto_ptr, { "r" } },
 
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
@@ -1942,6 +1952,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
     tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
 
+    /*
+     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+     * and fall through to the rest of the epilogue.
+     */
+    s->code_gen_epilogue = s->code_ptr;
+    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
+
+    /* TB epilogue */
     tb_ret_addr = s->code_ptr;
 
     /* Remove TCG locals stack space.  */
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 75ea247bc4..5ef1086710 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -123,6 +123,7 @@ extern bool use_idiv_instructions;
 #define TCG_TARGET_HAS_mulsh_i32        0
 #define TCG_TARGET_HAS_div_i32          use_idiv_instructions
 #define TCG_TARGET_HAS_rem_i32          0
+#define TCG_TARGET_HAS_goto_ptr         1
 
 enum {
     TCG_AREG0 = TCG_REG_R6,
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index e75a6d4943..9f5cb66718 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -329,11 +329,6 @@ static const uint8_t tcg_cond_to_arm_cond[] = {
     [TCG_COND_GTU] = COND_HI,
 };
 
-static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
-{
-    tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
-}
-
 static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
 {
     tcg_out32(s, (cond << 28) | 0x0a000000 |
@@ -402,6 +397,18 @@ static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
     }
 }
 
+static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn)
+{
+    /* Unless the C portion of QEMU is compiled as thumb, we don't
+       actually need true BX semantics; merely a branch to an address
+       held in a register.  */
+    if (use_armv5t_instructions) {
+        tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
+    } else {
+        tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
+    }
+}
+
 static inline void tcg_out_dat_imm(TCGContext *s,
                 int cond, int opc, int rd, int rn, int im)
 {
@@ -977,7 +984,7 @@ static inline void tcg_out_st8(TCGContext *s, int cond,
  * with the code buffer limited to 16MB we wouldn't need the long case.
  * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
  */
-static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
+static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
 {
     intptr_t addri = (intptr_t)addr;
     ptrdiff_t disp = tcg_pcrel_diff(s, addr);
@@ -987,15 +994,9 @@ static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
         return;
     }
 
+    assert(use_armv5t_instructions || (addri & 1) == 0);
     tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
-    if (use_armv5t_instructions) {
-        tcg_out_bx(s, cond, TCG_REG_TMP);
-    } else {
-        if (addri & 1) {
-            tcg_abort();
-        }
-        tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
-    }
+    tcg_out_bx(s, cond, TCG_REG_TMP);
 }
 
 /* The call case is mostly used for helpers - so it's not unreasonable
@@ -1654,8 +1655,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     switch (opc) {
     case INDEX_op_exit_tb:
-        tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
-        tcg_out_goto(s, COND_AL, tb_ret_addr);
+        /* Reuse the zeroing that exists for goto_ptr.  */
+        a0 = args[0];
+        if (a0 == 0) {
+            tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
+        } else {
+            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
+            tcg_out_goto(s, COND_AL, tb_ret_addr);
+        }
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_insn_offset) {
@@ -1670,6 +1677,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
         break;
+    case INDEX_op_goto_ptr:
+        tcg_out_bx(s, COND_AL, args[0]);
+        break;
     case INDEX_op_br:
         tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
         break;
@@ -1960,6 +1970,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_br, { } },
+    { INDEX_op_goto_ptr, { "r" } },
 
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
@@ -2135,9 +2146,16 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
 
     tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
-    tb_ret_addr = s->code_ptr;
 
-    /* Epilogue.  We branch here via tb_ret_addr.  */
+    /*
+     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+     * and fall through to the rest of the epilogue.
+     */
+    s->code_gen_epilogue = s->code_ptr;
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
+
+    /* TB epilogue */
+    tb_ret_addr = s->code_ptr;
     tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
                    TCG_REG_CALL_STACK, stack_addend, 1);
 
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 4275787db9..73a15f7e80 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -107,6 +107,7 @@ extern bool have_popcnt;
 #define TCG_TARGET_HAS_muls2_i32        1
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_goto_ptr         1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_extrl_i64_i32    0
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 5918008296..01e3b4e95c 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -1882,8 +1882,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     switch (opc) {
     case INDEX_op_exit_tb:
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
-        tcg_out_jmp(s, tb_ret_addr);
+        /* Reuse the zeroing that exists for goto_ptr.  */
+        if (a0 == 0) {
+            tcg_out_jmp(s, s->code_gen_epilogue);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
+            tcg_out_jmp(s, tb_ret_addr);
+        }
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_insn_offset) {
@@ -1906,6 +1911,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
         break;
+    case INDEX_op_goto_ptr:
+        /* jmp to the given host address (could be epilogue) */
+        tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
+        break;
     case INDEX_op_br:
         tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
         break;
@@ -2277,6 +2286,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
 {
+    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
     static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
     static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
     static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
@@ -2299,6 +2309,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
         = { .args_ct_str = { "L", "L", "L", "L" } };
 
     switch (op) {
+    case INDEX_op_goto_ptr:
+        return &r;
+
     case INDEX_op_ld8u_i32:
     case INDEX_op_ld8u_i64:
     case INDEX_op_ld8s_i32:
@@ -2567,6 +2580,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
 #endif
 
+    /*
+     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+     * and fall through to the rest of the epilogue.
+     */
+    s->code_gen_epilogue = s->code_ptr;
+    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);
+
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;
 
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 42aea03a8b..901bb7575d 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -173,6 +173,7 @@ typedef enum {
 #define TCG_TARGET_HAS_mulsh_i64        0
 #define TCG_TARGET_HAS_extrl_i64_i32    0
 #define TCG_TARGET_HAS_extrh_i64_i32    0
+#define TCG_TARGET_HAS_goto_ptr         0
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
 #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index f46d64a3a7..d75cb63ed3 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -130,6 +130,7 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_muluh_i32        1
 #define TCG_TARGET_HAS_mulsh_i32        1
 #define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_goto_ptr         1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_add2_i32         0
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
index 2a7e1c7f5b..8cff9a6bf9 100644
--- a/tcg/mips/tcg-target.inc.c
+++ b/tcg/mips/tcg-target.inc.c
@@ -1747,6 +1747,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_nop(s);
         s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
         break;
+    case INDEX_op_goto_ptr:
+        /* jmp to the given host address (could be epilogue) */
+        tcg_out_opc_reg(s, OPC_JR, 0, a0, 0);
+        tcg_out_nop(s);
+        break;
     case INDEX_op_br:
         tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO,
                        arg_label(a0));
@@ -2160,6 +2165,7 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_br, { } },
+    { INDEX_op_goto_ptr, { "r" } },
 
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
@@ -2451,6 +2457,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     /* delay slot */
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
 
+    /*
+     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+     * and fall through to the rest of the epilogue.
+     */
+    s->code_gen_epilogue = s->code_ptr;
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO);
+
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;
     for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index abd8b3d6cd..5f4a40a5b4 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -82,6 +82,7 @@ extern bool have_isa_3_00;
 #define TCG_TARGET_HAS_muls2_i32        0
 #define TCG_TARGET_HAS_muluh_i32        1
 #define TCG_TARGET_HAS_mulsh_i32        1
+#define TCG_TARGET_HAS_goto_ptr         1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_add2_i32         0
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 64f67d2c77..8d50f18328 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -1932,6 +1932,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
     /* Epilogue */
     tcg_debug_assert(tb_ret_addr == s->code_ptr);
+    s->code_gen_epilogue = tb_ret_addr;
 
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
     for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
@@ -1986,6 +1987,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 #endif
         s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
         break;
+    case INDEX_op_goto_ptr:
+        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0);
+        tcg_out32(s, BCCTR | BO_ALWAYS);
+        break;
     case INDEX_op_br:
         {
             TCGLabel *l = arg_label(args[0]);
@@ -2555,6 +2561,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_br, { } },
+    { INDEX_op_goto_ptr, { "r" } },
 
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index cbdd2a6275..957f0c0afe 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -92,6 +92,7 @@ extern uint64_t s390_facilities;
 #define TCG_TARGET_HAS_mulsh_i32      0
 #define TCG_TARGET_HAS_extrl_i64_i32  0
 #define TCG_TARGET_HAS_extrh_i64_i32  0
+#define TCG_TARGET_HAS_goto_ptr       1
 
 #define TCG_TARGET_HAS_div2_i64       1
 #define TCG_TARGET_HAS_rot_i64        1
diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
index a679280b92..5d7083e90c 100644
--- a/tcg/s390/tcg-target.inc.c
+++ b/tcg/s390/tcg-target.inc.c
@@ -1741,9 +1741,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     switch (opc) {
     case INDEX_op_exit_tb:
-        /* return value */
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]);
-        tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
+        /* Reuse the zeroing that exists for goto_ptr.  */
+        a0 = args[0];
+        if (a0 == 0) {
+            tgen_gotoi(s, S390_CC_ALWAYS, s->code_gen_epilogue);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
+            tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
+        }
         break;
 
     case INDEX_op_goto_tb:
@@ -1767,6 +1772,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
         break;
 
+    case INDEX_op_goto_ptr:
+        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, args[0]);
+        break;
+
     OP_32_64(ld8u):
         /* ??? LLC (RXY format) is only present with the extended-immediate
            facility, whereas LLGC is always present.  */
@@ -2241,6 +2250,7 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_br, { } },
+    { INDEX_op_goto_ptr, { "r" } },
 
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
@@ -2439,6 +2449,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     /* br %r3 (go to TB) */
     tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
 
+    /*
+     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+     * and fall through to the rest of the epilogue.
+     */
+    s->code_gen_epilogue = s->code_ptr;
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
+
+    /* TB epilogue */
     tb_ret_addr = s->code_ptr;
 
     /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index b8b74f96ff..854a0afd70 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -123,6 +123,7 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_muls2_i32        1
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_goto_ptr         1
 
 #define TCG_TARGET_HAS_extrl_i64_i32    1
 #define TCG_TARGET_HAS_extrh_i64_i32    1
diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c
index 3785d77f62..18afce2f87 100644
--- a/tcg/sparc/tcg-target.inc.c
+++ b/tcg/sparc/tcg-target.inc.c
@@ -1003,7 +1003,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     /* delay slot */
     tcg_out_nop(s);
 
-    /* No epilogue required.  We issue ret + restore directly in the TB.  */
+    /* Epilogue for goto_ptr.  */
+    s->code_gen_epilogue = s->code_ptr;
+    tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+    /* delay slot */
+    tcg_out_movi_imm13(s, TCG_REG_O0, 0);
 
 #ifdef CONFIG_SOFTMMU
     build_trampolines(s);
@@ -1288,6 +1292,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_nop(s);
         s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
         break;
+    case INDEX_op_goto_ptr:
+        tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL);
+        tcg_out_nop(s);
+        break;
     case INDEX_op_br:
         tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
         tcg_out_nop(s);
@@ -1513,6 +1521,7 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_br, { } },
+    { INDEX_op_goto_ptr, { "r" } },
 
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 6b1f41500c..87f673ef49 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2587,6 +2587,18 @@ void tcg_gen_goto_tb(unsigned idx)
     tcg_gen_op1i(INDEX_op_goto_tb, idx);
 }
 
+void tcg_gen_lookup_and_goto_ptr(TCGv addr)
+{
+    if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
+        TCGv_ptr ptr = tcg_temp_new_ptr();
+        gen_helper_lookup_tb_ptr(ptr, tcg_ctx.tcg_env, addr);
+        tcg_gen_op1i(INDEX_op_goto_ptr, GET_TCGV_PTR(ptr));
+        tcg_temp_free_ptr(ptr);
+    } else {
+        tcg_gen_exit_tb(0);
+    }
+}
+
 static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
 {
     /* Trigger the asserts within as early as possible.  */
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index c68e300a68..5d3278f243 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -796,6 +796,17 @@ static inline void tcg_gen_exit_tb(uintptr_t val)
  */
 void tcg_gen_goto_tb(unsigned idx);
 
+/**
+ * tcg_gen_lookup_and_goto_ptr() - look up a TB and jump to it if valid
+ * @addr: Guest address of the target TB
+ *
+ * If the TB is not valid, jump to the epilogue.
+ *
+ * This operation is optional. If the TCG backend does not implement goto_ptr,
+ * this op is equivalent to calling tcg_gen_exit_tb() with 0 as the argument.
+ */
+void tcg_gen_lookup_and_goto_ptr(TCGv addr);
+
 #if TARGET_LONG_BITS == 32
 #define tcg_temp_new() tcg_temp_new_i32()
 #define tcg_global_reg_new tcg_global_reg_new_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index f06f89405e..956fb1e9f3 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -193,6 +193,7 @@ DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
     TCG_OPF_NOT_PRESENT)
 DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
 DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
+DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr))
 
 DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
index 114ea6fecf..c41d38a557 100644
--- a/tcg/tcg-runtime.h
+++ b/tcg/tcg-runtime.h
@@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
 DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
 
+DEF_HELPER_FLAGS_2(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env, tl)
+
 DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
 
 #ifdef CONFIG_SOFTMMU
diff --git a/tcg/tcg.c b/tcg/tcg.c
index cb898f1636..564292f54d 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -424,6 +424,11 @@ void tcg_prologue_init(TCGContext *s)
         qemu_log_unlock();
     }
 #endif
+
+    /* Assert that goto_ptr is implemented completely.  */
+    if (TCG_TARGET_HAS_goto_ptr) {
+        tcg_debug_assert(s->code_gen_epilogue != NULL);
+    }
 }
 
 void tcg_func_start(TCGContext *s)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6c216bb73f..5ec48d1787 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -699,6 +699,7 @@ struct TCGContext {
        extension that allows arithmetic on void*.  */
     int code_gen_max_blocks;
     void *code_gen_prologue;
+    void *code_gen_epilogue;
     void *code_gen_buffer;
     size_t code_gen_buffer_size;
     void *code_gen_ptr;
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 838bf3a858..06963288dc 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -85,6 +85,7 @@
 #define TCG_TARGET_HAS_muls2_i32        0
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_goto_ptr         0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_extrl_i64_i32    0
diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c
index 6c71e46391..4166ce54b7 100644
--- a/tests/test-x86-cpuid-compat.c
+++ b/tests/test-x86-cpuid-compat.c
@@ -313,6 +313,44 @@ int main(int argc, char **argv)
     add_cpuid_test("x86/cpuid/auto-xlevel2/pc-2.7",
                    "-machine pc-i440fx-2.7 -cpu 486,+xstore",
                    "xlevel2", 0);
+    /*
+     * QEMU 1.4.0 had auto-level enabled for CPUID[7], already,
+     * and the compat code that sets default level shouldn't
+     * disable the auto-level=7 code:
+     */
+    add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-1.4/off",
+                   "-machine pc-i440fx-1.4 -cpu Nehalem",
+                   "level", 2);
+    add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-1.5/on",
+                   "-machine pc-i440fx-1.4 -cpu Nehalem,+smap",
+                   "level", 7);
+    add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-2.3/off",
+                   "-machine pc-i440fx-2.3 -cpu Penryn",
+                   "level", 4);
+    add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-2.3/on",
+                   "-machine pc-i440fx-2.3 -cpu Penryn,+erms",
+                   "level", 7);
+    add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-2.9/off",
+                   "-machine pc-i440fx-2.9 -cpu Conroe",
+                   "level", 10);
+    add_cpuid_test("x86/cpuid/auto-level7/pc-i440fx-2.9/on",
+                   "-machine pc-i440fx-2.9 -cpu Conroe,+erms",
+                   "level", 10);
+
+    /*
+     * xlevel doesn't have any feature that triggers auto-level
+     * code on old machine-types.  Just check that the compat code
+     * is working correctly:
+     */
+    add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.3",
+                   "-machine pc-i440fx-2.3 -cpu SandyBridge",
+                   "xlevel", 0x8000000a);
+    add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-off",
+                   "-machine pc-i440fx-2.4 -cpu SandyBridge,",
+                   "xlevel", 0x80000008);
+    add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on",
+                   "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt",
+                   "xlevel", 0x80000008);
 
     /* Test feature parsing */
     add_feature_test("x86/cpuid/features/plus",