summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS2
-rwxr-xr-xconfigure2
-rw-r--r--default-configs/i386-softmmu.mak1
-rw-r--r--default-configs/x86_64-softmmu.mak1
-rw-r--r--gdb-xml/s390-virt.xml18
-rw-r--r--hw/cpu/Makefile.objs1
-rw-r--r--hw/cpu/icc_bus.c118
-rw-r--r--hw/i386/pc.c52
-rw-r--r--hw/i386/pc_piix.c17
-rw-r--r--hw/i386/pc_q35.c13
-rw-r--r--hw/intc/apic_common.c11
-rw-r--r--hw/mem/pc-dimm.c5
-rw-r--r--hw/s390x/event-facility.c20
-rw-r--r--hw/s390x/s390-virtio-ccw.c2
-rw-r--r--include/exec/exec-all.h23
-rw-r--r--include/hw/cpu/icc_bus.h82
-rw-r--r--include/hw/i386/apic_internal.h7
-rw-r--r--include/hw/i386/pc.h24
-rw-r--r--include/hw/i386/topology.h33
-rw-r--r--include/qom/cpu.h16
-rw-r--r--linux-user/main.c39
-rw-r--r--linux-user/signal.c159
-rw-r--r--linux-user/syscall_defs.h11
-rw-r--r--linux-user/tilegx/syscall.h3
-rw-r--r--pc-bios/s390-ccw/Makefile3
-rw-r--r--target-alpha/cpu.h1
-rw-r--r--target-alpha/translate.c70
-rw-r--r--target-arm/cpu.h2
-rw-r--r--target-arm/translate-a64.c48
-rw-r--r--target-arm/translate.c83
-rw-r--r--target-arm/translate.h8
-rw-r--r--target-cris/cpu.h1
-rw-r--r--target-cris/translate.c93
-rw-r--r--target-cris/translate_v10.c3
-rw-r--r--target-i386/Makefile.objs2
-rw-r--r--target-i386/bpt_helper.c182
-rw-r--r--target-i386/cpu.c119
-rw-r--r--target-i386/cpu.h16
-rw-r--r--target-i386/helper.c128
-rw-r--r--target-i386/kvm.c14
-rw-r--r--target-i386/misc_helper.c34
-rw-r--r--target-i386/translate.c106
-rw-r--r--target-lm32/cpu.h1
-rw-r--r--target-lm32/translate.c83
-rw-r--r--target-m68k/cpu.h1
-rw-r--r--target-m68k/translate.c82
-rw-r--r--target-microblaze/cpu.h1
-rw-r--r--target-microblaze/translate.c83
-rw-r--r--target-mips/cpu.h2
-rw-r--r--target-mips/translate.c98
-rw-r--r--target-moxie/cpu.h1
-rw-r--r--target-moxie/translate.c82
-rw-r--r--target-openrisc/cpu.h1
-rw-r--r--target-openrisc/translate.c78
-rw-r--r--target-ppc/cpu.h1
-rw-r--r--target-ppc/translate.c72
-rw-r--r--target-s390x/cpu.h4
-rw-r--r--target-s390x/gdbstub.c82
-rw-r--r--target-s390x/misc_helper.c4
-rw-r--r--target-s390x/translate.c80
-rw-r--r--target-sh4/cpu.h2
-rw-r--r--target-sh4/translate.c91
-rw-r--r--target-sparc/cpu.h2
-rw-r--r--target-sparc/translate.c185
-rw-r--r--target-tilegx/cpu.c7
-rw-r--r--target-tilegx/cpu.h9
-rw-r--r--target-tilegx/helper.c81
-rw-r--r--target-tilegx/helper.h16
-rw-r--r--target-tilegx/simd_helper.c118
-rw-r--r--target-tilegx/translate.c496
-rw-r--r--target-tricore/translate.c59
-rw-r--r--target-unicore32/translate.c83
-rw-r--r--target-xtensa/cpu.h1
-rw-r--r--target-xtensa/translate.c79
-rw-r--r--tcg/tcg-op.h52
-rw-r--r--tcg/tcg-opc.h4
-rw-r--r--tcg/tcg.c168
-rw-r--r--tcg/tcg.h20
-rw-r--r--tci.c9
-rw-r--r--tests/Makefile2
-rw-r--r--tests/ide-test.c4
-rw-r--r--tests/libqos/ahci.c2
-rw-r--r--translate-all.c520
-rw-r--r--vl.c8
84 files changed, 2255 insertions, 2092 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index e3e34fb4b1..7603ea2d44 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -667,6 +667,7 @@ F: hw/block/cdrom.c
 F: hw/block/hd-geometry.c
 F: tests/ide-test.c
 F: tests/ahci-test.c
+F: tests/libqos/ahci*
 T: git git://github.com/jnsnow/qemu.git ide
 
 Floppy
@@ -675,6 +676,7 @@ L: qemu-block@nongnu.org
 S: Supported
 F: hw/block/fdc.c
 F: include/hw/block/fdc.h
+F: tests/fdc-test.c
 T: git git://github.com/jnsnow/qemu.git ide
 
 OMAP
diff --git a/configure b/configure
index f14454e691..2d2a498ac4 100755
--- a/configure
+++ b/configure
@@ -5457,7 +5457,7 @@ case "$target_name" in
     echo "TARGET_ABI32=y" >> $config_target_mak
   ;;
   s390x)
-    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml"
+    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml s390-virt.xml"
   ;;
   tilegx)
   ;;
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 9393cf0ac9..43c96d1c91 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -44,7 +44,6 @@ CONFIG_LPC_ICH9=y
 CONFIG_PCI_Q35=y
 CONFIG_APIC=y
 CONFIG_IOAPIC=y
-CONFIG_ICC_BUS=y
 CONFIG_PVPANIC=y
 CONFIG_MEM_HOTPLUG=y
 CONFIG_XIO3130=y
diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
index 28e2099187..dfb80954d4 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -44,7 +44,6 @@ CONFIG_LPC_ICH9=y
 CONFIG_PCI_Q35=y
 CONFIG_APIC=y
 CONFIG_IOAPIC=y
-CONFIG_ICC_BUS=y
 CONFIG_PVPANIC=y
 CONFIG_MEM_HOTPLUG=y
 CONFIG_XIO3130=y
diff --git a/gdb-xml/s390-virt.xml b/gdb-xml/s390-virt.xml
new file mode 100644
index 0000000000..e2e9a7ad3c
--- /dev/null
+++ b/gdb-xml/s390-virt.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<!-- Copyright 2015 IBM Corp.
+
+     This work is licensed under the terms of the GNU GPL, version 2 or
+     (at your option) any later version. See the COPYING file in the
+     top-level directory. -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.s390.virt">
+  <reg name="ckc" bitsize="64" type="uint64" group="system"/>
+  <reg name="cputm" bitsize="64" type="uint64" group="system"/>
+  <reg name="last_break" bitsize="64" type="code_ptr" group="system"/>
+  <reg name="prefix" bitsize="64" type="data_ptr" group="system"/>
+  <reg name="pp" bitsize="64" type="uint64" group="system"/>
+  <reg name="pfault_token" bitsize="64" type="uint64" group="system"/>
+  <reg name="pfault_select" bitsize="64" type="uint64" group="system"/>
+  <reg name="pfault_compare" bitsize="64" type="uint64" group="system"/>
+</feature>
diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs
index 6381238cc5..0954a1872f 100644
--- a/hw/cpu/Makefile.objs
+++ b/hw/cpu/Makefile.objs
@@ -2,5 +2,4 @@ obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o
 obj-$(CONFIG_REALVIEW) += realview_mpcore.o
 obj-$(CONFIG_A9MPCORE) += a9mpcore.o
 obj-$(CONFIG_A15MPCORE) += a15mpcore.o
-obj-$(CONFIG_ICC_BUS) += icc_bus.o
 
diff --git a/hw/cpu/icc_bus.c b/hw/cpu/icc_bus.c
deleted file mode 100644
index 6646ea2b34..0000000000
--- a/hw/cpu/icc_bus.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/* icc_bus.c
- * emulate x86 ICC (Interrupt Controller Communications) bus
- *
- * Copyright (c) 2013 Red Hat, Inc
- *
- * Authors:
- *     Igor Mammedov <imammedo@redhat.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>
- */
-#include "hw/cpu/icc_bus.h"
-#include "hw/sysbus.h"
-
-/* icc-bridge implementation */
-
-static const TypeInfo icc_bus_info = {
-    .name = TYPE_ICC_BUS,
-    .parent = TYPE_BUS,
-    .instance_size = sizeof(ICCBus),
-};
-
-
-/* icc-device implementation */
-
-static void icc_device_realize(DeviceState *dev, Error **errp)
-{
-    ICCDeviceClass *idc = ICC_DEVICE_GET_CLASS(dev);
-
-    /* convert to QOM */
-    if (idc->realize) {
-        idc->realize(dev, errp);
-    }
-
-}
-
-static void icc_device_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = icc_device_realize;
-    dc->bus_type = TYPE_ICC_BUS;
-}
-
-static const TypeInfo icc_device_info = {
-    .name = TYPE_ICC_DEVICE,
-    .parent = TYPE_DEVICE,
-    .abstract = true,
-    .instance_size = sizeof(ICCDevice),
-    .class_size = sizeof(ICCDeviceClass),
-    .class_init = icc_device_class_init,
-};
-
-
-/*  icc-bridge implementation */
-
-typedef struct ICCBridgeState {
-    /*< private >*/
-    SysBusDevice parent_obj;
-    /*< public >*/
-
-    ICCBus icc_bus;
-    MemoryRegion apic_container;
-} ICCBridgeState;
-
-#define ICC_BRIDGE(obj) OBJECT_CHECK(ICCBridgeState, (obj), TYPE_ICC_BRIDGE)
-
-static void icc_bridge_init(Object *obj)
-{
-    ICCBridgeState *s = ICC_BRIDGE(obj);
-    SysBusDevice *sb = SYS_BUS_DEVICE(obj);
-
-    qbus_create_inplace(&s->icc_bus, sizeof(s->icc_bus), TYPE_ICC_BUS,
-                        DEVICE(s), "icc");
-
-    /* Do not change order of registering regions,
-     * APIC must be first registered region, board maps it by 0 index
-     */
-    memory_region_init(&s->apic_container, obj, "icc-apic-container",
-                       APIC_SPACE_SIZE);
-    sysbus_init_mmio(sb, &s->apic_container);
-    s->icc_bus.apic_address_space = &s->apic_container;
-}
-
-static void icc_bridge_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
-}
-
-static const TypeInfo icc_bridge_info = {
-    .name  = TYPE_ICC_BRIDGE,
-    .parent = TYPE_SYS_BUS_DEVICE,
-    .instance_init  = icc_bridge_init,
-    .instance_size  = sizeof(ICCBridgeState),
-    .class_init = icc_bridge_class_init,
-};
-
-
-static void icc_bus_register_types(void)
-{
-    type_register_static(&icc_bus_info);
-    type_register_static(&icc_device_info);
-    type_register_static(&icc_bridge_info);
-}
-
-type_init(icc_bus_register_types)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index efbd41a1f1..9275297adc 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -59,7 +59,6 @@
 #include "qemu/error-report.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/cpu_hotplug.h"
-#include "hw/cpu/icc_bus.h"
 #include "hw/boards.h"
 #include "hw/pci/pci_host.h"
 #include "acpi-build.h"
@@ -1052,23 +1051,16 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
 }
 
 static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
-                          DeviceState *icc_bridge, Error **errp)
+                          Error **errp)
 {
     X86CPU *cpu = NULL;
     Error *local_err = NULL;
 
-    if (icc_bridge == NULL) {
-        error_setg(&local_err, "Invalid icc-bridge value");
-        goto out;
-    }
-
     cpu = cpu_x86_create(cpu_model, &local_err);
     if (local_err != NULL) {
         goto out;
     }
 
-    qdev_set_parent_bus(DEVICE(cpu), qdev_get_child_bus(icc_bridge, "icc"));
-
     object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err);
     object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
 
@@ -1085,7 +1077,6 @@ static const char *current_cpu_model;
 
 void pc_hot_add_cpu(const int64_t id, Error **errp)
 {
-    DeviceState *icc_bridge;
     X86CPU *cpu;
     int64_t apic_id = x86_cpu_apic_id_from_index(id);
     Error *local_err = NULL;
@@ -1114,9 +1105,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)
         return;
     }
 
-    icc_bridge = DEVICE(object_resolve_path_type("icc-bridge",
-                                                 TYPE_ICC_BRIDGE, NULL));
-    cpu = pc_new_cpu(current_cpu_model, apic_id, icc_bridge, &local_err);
+    cpu = pc_new_cpu(current_cpu_model, apic_id, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -1124,7 +1113,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)
     object_unref(OBJECT(cpu));
 }
 
-void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
+void pc_cpus_init(const char *cpu_model)
 {
     int i;
     X86CPU *cpu = NULL;
@@ -1150,7 +1139,7 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
 
     for (i = 0; i < smp_cpus; i++) {
         cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i),
-                         icc_bridge, &error);
+                         &error);
         if (error) {
             error_report_err(error);
             exit(1);
@@ -1158,13 +1147,6 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
         object_unref(OBJECT(cpu));
     }
 
-    /* map APIC MMIO area if CPU has APIC */
-    if (cpu && cpu->apic_state) {
-        /* XXX: what if the base changes? */
-        sysbus_mmio_map_overlap(SYS_BUS_DEVICE(icc_bridge), 0,
-                                APIC_DEFAULT_ADDRESS, 0x1000);
-    }
-
     /* tell smbios about cpuid version and features */
     smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
 }
@@ -1933,12 +1915,31 @@ static void pc_machine_initfn(Object *obj)
                              NULL, &error_abort);
 }
 
+static void pc_machine_reset(void)
+{
+    CPUState *cs;
+    X86CPU *cpu;
+
+    qemu_devices_reset();
+
+    /* Reset APIC after devices have been reset to cancel
+     * any changes that qemu_devices_reset() might have done.
+     */
+    CPU_FOREACH(cs) {
+        cpu = X86_CPU(cs);
+
+        if (cpu->apic_state) {
+            device_reset(cpu->apic_state);
+        }
+    }
+}
+
 static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index)
 {
-    unsigned pkg_id, core_id, smt_id;
+    X86CPUTopoInfo topo;
     x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index,
-                          &pkg_id, &core_id, &smt_id);
-    return pkg_id;
+                          &topo);
+    return topo.pkg_id;
 }
 
 static void pc_machine_class_init(ObjectClass *oc, void *data)
@@ -1954,6 +1955,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     mc->default_boot_order = "cad";
     mc->hot_add_cpu = pc_hot_add_cpu;
     mc->max_cpus = 255;
+    mc->reset = pc_machine_reset;
     hc->plug = pc_machine_device_plug_cb;
     hc->unplug_request = pc_machine_device_unplug_request_cb;
     hc->unplug = pc_machine_device_unplug_cb;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4514cd1462..ae7bbebd0f 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -39,7 +39,6 @@
 #include "hw/kvm/clock.h"
 #include "sysemu/sysemu.h"
 #include "hw/sysbus.h"
-#include "hw/cpu/icc_bus.h"
 #include "sysemu/arch_init.h"
 #include "sysemu/block-backend.h"
 #include "hw/i2c/smbus.h"
@@ -98,7 +97,6 @@ static void pc_init1(MachineState *machine,
     MemoryRegion *ram_memory;
     MemoryRegion *pci_memory;
     MemoryRegion *rom_memory;
-    DeviceState *icc_bridge;
     PcGuestInfo *guest_info;
     ram_addr_t lowmem;
 
@@ -141,11 +139,7 @@ static void pc_init1(MachineState *machine,
         exit(1);
     }
 
-    icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
-    object_property_add_child(qdev_get_machine(), "icc-bridge",
-                              OBJECT(icc_bridge), NULL);
-
-    pc_cpus_init(machine->cpu_model, icc_bridge);
+    pc_cpus_init(machine->cpu_model);
 
     if (kvm_enabled() && kvmclock_enabled) {
         kvmclock_create();
@@ -226,7 +220,6 @@ static void pc_init1(MachineState *machine,
     if (pci_enabled) {
         ioapic_init_gsi(gsi_state, "i440fx");
     }
-    qdev_init_nofail(icc_bridge);
 
     pc_register_ferr_irq(gsi[13]);
 
@@ -332,7 +325,7 @@ static void pc_compat_2_1(MachineState *machine)
 
     pc_compat_2_2(machine);
     smbios_uuid_encoded = false;
-    x86_cpu_compat_kvm_no_autodisable(FEAT_8000_0001_ECX, CPUID_EXT3_SVM);
+    x86_cpu_change_kvm_default("svm", NULL);
     pcms->enforce_aligned_dimm = false;
 }
 
@@ -368,7 +361,7 @@ static void pc_compat_1_7(MachineState *machine)
     gigabyte_align = false;
     option_rom_has_mr = true;
     legacy_acpi_table_size = 6414;
-    x86_cpu_compat_kvm_no_autoenable(FEAT_1_ECX, CPUID_EXT_X2APIC);
+    x86_cpu_change_kvm_default("x2apic", NULL);
 }
 
 static void pc_compat_1_6(MachineState *machine)
@@ -398,7 +391,7 @@ static void pc_compat_1_3(MachineState *machine)
 static void pc_compat_1_2(MachineState *machine)
 {
     pc_compat_1_3(machine);
-    x86_cpu_compat_kvm_no_autoenable(FEAT_KVM, 1 << KVM_FEATURE_PV_EOI);
+    x86_cpu_change_kvm_default("kvm-pv-eoi", NULL);
 }
 
 /* PC compat function for pc-0.10 to pc-0.13 */
@@ -421,7 +414,7 @@ static void pc_init_isa(MachineState *machine)
     if (!machine->cpu_model) {
         machine->cpu_model = "486";
     }
-    x86_cpu_compat_kvm_no_autoenable(FEAT_KVM, 1 << KVM_FEATURE_PV_EOI);
+    x86_cpu_change_kvm_default("kvm-pv-eoi", NULL);
     enable_compat_apic_id_mode();
     pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, TYPE_I440FX_PCI_DEVICE);
 }
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 1f100b1a69..19e66702e0 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -43,7 +43,6 @@
 #include "hw/ide/pci.h"
 #include "hw/ide/ahci.h"
 #include "hw/usb.h"
-#include "hw/cpu/icc_bus.h"
 #include "qemu/error-report.h"
 #include "migration/migration.h"
 
@@ -83,7 +82,6 @@ static void pc_q35_init(MachineState *machine)
     int i;
     ICH9LPCState *ich9_lpc;
     PCIDevice *ahci;
-    DeviceState *icc_bridge;
     PcGuestInfo *guest_info;
     ram_addr_t lowmem;
     DriveInfo *hd[MAX_SATA_PORTS];
@@ -130,11 +128,7 @@ static void pc_q35_init(MachineState *machine)
         exit(1);
     }
 
-    icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
-    object_property_add_child(qdev_get_machine(), "icc-bridge",
-                              OBJECT(icc_bridge), NULL);
-
-    pc_cpus_init(machine->cpu_model, icc_bridge);
+    pc_cpus_init(machine->cpu_model);
     pc_acpi_init("q35-acpi-dsdt.aml");
 
     kvmclock_create();
@@ -236,7 +230,6 @@ static void pc_q35_init(MachineState *machine)
     if (pci_enabled) {
         ioapic_init_gsi(gsi_state, "q35");
     }
-    qdev_init_nofail(icc_bridge);
 
     pc_register_ferr_irq(gsi[13]);
 
@@ -316,7 +309,7 @@ static void pc_compat_2_1(MachineState *machine)
     pc_compat_2_2(machine);
     pcms->enforce_aligned_dimm = false;
     smbios_uuid_encoded = false;
-    x86_cpu_compat_kvm_no_autodisable(FEAT_8000_0001_ECX, CPUID_EXT3_SVM);
+    x86_cpu_change_kvm_default("svm", NULL);
 }
 
 static void pc_compat_2_0(MachineState *machine)
@@ -333,7 +326,7 @@ static void pc_compat_1_7(MachineState *machine)
     smbios_defaults = false;
     gigabyte_align = false;
     option_rom_has_mr = true;
-    x86_cpu_compat_kvm_no_autoenable(FEAT_1_ECX, CPUID_EXT_X2APIC);
+    x86_cpu_change_kvm_default("x2apic", NULL);
 }
 
 static void pc_compat_1_6(MachineState *machine)
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 0032b97c5f..ad959c4e77 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -296,7 +296,6 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
     APICCommonClass *info;
     static DeviceState *vapic;
     static int apic_no;
-    static bool mmio_registered;
 
     if (apic_no >= MAX_APICS) {
         error_setg(errp, "%s initialization failed.",
@@ -307,11 +306,6 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
 
     info = APIC_COMMON_GET_CLASS(s);
     info->realize(dev, errp);
-    if (!mmio_registered) {
-        ICCBus *b = ICC_BUS(qdev_get_parent_bus(dev));
-        memory_region_add_subregion(b->apic_address_space, 0, &s->io_memory);
-        mmio_registered = true;
-    }
 
     /* Note: We need at least 1M to map the VAPIC option ROM */
     if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK &&
@@ -425,13 +419,12 @@ static Property apic_properties_common[] = {
 
 static void apic_common_class_init(ObjectClass *klass, void *data)
 {
-    ICCDeviceClass *idc = ICC_DEVICE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->vmsd = &vmstate_apic_common;
     dc->reset = apic_reset_common;
     dc->props = apic_properties_common;
-    idc->realize = apic_common_realize;
+    dc->realize = apic_common_realize;
     /*
      * Reason: APIC and CPU need to be wired up by
      * x86_cpu_apic_create()
@@ -441,7 +434,7 @@ static void apic_common_class_init(ObjectClass *klass, void *data)
 
 static const TypeInfo apic_common_type = {
     .name = TYPE_APIC_COMMON,
-    .parent = TYPE_ICC_DEVICE,
+    .parent = TYPE_DEVICE,
     .instance_size = sizeof(APICCommonState),
     .class_size = sizeof(APICCommonClass),
     .class_init = apic_common_class_init,
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 6cc6ac30e7..506fe0d2a8 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -417,10 +417,11 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp)
         error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
         return;
     }
-    if ((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) {
+    if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) ||
+        (!nb_numa_nodes && dimm->node)) {
         error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
                    PRIu32 "' which exceeds the number of numa nodes: %d",
-                   dimm->node, nb_numa_nodes);
+                   dimm->node, nb_numa_nodes ? nb_numa_nodes : 1);
         return;
     }
 }
diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index ef2a05160a..907b48560c 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c
@@ -27,8 +27,6 @@ typedef struct SCLPEventsBus {
 struct SCLPEventFacility {
     SysBusDevice parent_obj;
     SCLPEventsBus sbus;
-    SCLPEvent quiesce_event;
-    SCLPEvent cpu_hotplug_event;
     /* guest' receive mask */
     unsigned int receive_mask;
 };
@@ -347,19 +345,21 @@ static void init_event_facility(Object *obj)
 {
     SCLPEventFacility *event_facility = EVENT_FACILITY(obj);
     DeviceState *sdev = DEVICE(obj);
+    Object *new;
 
     /* Spawn a new bus for SCLP events */
     qbus_create_inplace(&event_facility->sbus, sizeof(event_facility->sbus),
                         TYPE_SCLP_EVENTS_BUS, sdev, NULL);
 
-    object_initialize(&event_facility->quiesce_event, sizeof(SCLPEvent),
-                      TYPE_SCLP_QUIESCE);
-    qdev_set_parent_bus(DEVICE(&event_facility->quiesce_event),
-                        &event_facility->sbus.qbus);
-    object_initialize(&event_facility->cpu_hotplug_event, sizeof(SCLPEvent),
-                      TYPE_SCLP_CPU_HOTPLUG);
-    qdev_set_parent_bus(DEVICE(&event_facility->cpu_hotplug_event),
-                        &event_facility->sbus.qbus);
+    new = object_new(TYPE_SCLP_QUIESCE);
+    object_property_add_child(obj, TYPE_SCLP_QUIESCE, new, NULL);
+    object_unref(new);
+    qdev_set_parent_bus(DEVICE(new), &event_facility->sbus.qbus);
+
+    new = object_new(TYPE_SCLP_CPU_HOTPLUG);
+    object_property_add_child(obj, TYPE_SCLP_CPU_HOTPLUG, new, NULL);
+    object_unref(new);
+    qdev_set_parent_bus(DEVICE(new), &event_facility->sbus.qbus);
     /* the facility will automatically realize the devices via the bus */
 }
 
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index c53ebc1ae1..6195f132fc 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -35,7 +35,7 @@ typedef struct S390CcwMachineState {
     bool dea_key_wrap;
 } S390CcwMachineState;
 
-void io_subsystem_reset(void)
+void subsystem_reset(void)
 {
     DeviceState *css, *sclp, *flic, *diag288;
 
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index a3719b7f0d..a63fd6015e 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -62,24 +62,15 @@ typedef struct TranslationBlock TranslationBlock;
 #define OPC_BUF_SIZE 640
 #define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR)
 
-/* Maximum size a TCG op can expand to.  This is complicated because a
-   single op may require several host instructions and register reloads.
-   For now take a wild guess at 192 bytes, which should allow at least
-   a couple of fixup instructions per argument.  */
-#define TCG_MAX_OP_SIZE 192
-
 #define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM)
 
 #include "qemu/log.h"
 
 void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb);
-void gen_intermediate_code_pc(CPUArchState *env, struct TranslationBlock *tb);
 void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
-                          int pc_pos);
+                          target_ulong *data);
 
 void cpu_gen_init(void);
-int cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb,
-                 int *gen_code_size_ptr);
 bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc);
 void page_size_init(void);
 
@@ -170,13 +161,14 @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, ...)
 #define CODE_GEN_PHYS_HASH_BITS     15
 #define CODE_GEN_PHYS_HASH_SIZE     (1 << CODE_GEN_PHYS_HASH_BITS)
 
-/* estimated block size for TB allocation */
-/* XXX: use a per code average code fragment size and modulate it
-   according to the host CPU */
+/* Estimated block size for TB allocation.  */
+/* ??? The following is based on a 2015 survey of x86_64 host output.
+   Better would seem to be some sort of dynamically sized TB array,
+   adapting to the block sizes actually being produced.  */
 #if defined(CONFIG_SOFTMMU)
-#define CODE_GEN_AVG_BLOCK_SIZE 128
+#define CODE_GEN_AVG_BLOCK_SIZE 400
 #else
-#define CODE_GEN_AVG_BLOCK_SIZE 64
+#define CODE_GEN_AVG_BLOCK_SIZE 150
 #endif
 
 #if defined(__arm__) || defined(_ARCH_PPC) \
@@ -201,6 +193,7 @@ struct TranslationBlock {
 #define CF_USE_ICOUNT  0x20000
 
     void *tc_ptr;    /* pointer to the translated code */
+    uint8_t *tc_search;  /* pointer to search data */
     /* next matching tb for physical address. */
     struct TranslationBlock *phys_hash_next;
     /* original tb when cflags has CF_NOCACHE */
diff --git a/include/hw/cpu/icc_bus.h b/include/hw/cpu/icc_bus.h
deleted file mode 100644
index 98a979fa1c..0000000000
--- a/include/hw/cpu/icc_bus.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* icc_bus.h
- * emulate x86 ICC (Interrupt Controller Communications) bus
- *
- * Copyright (c) 2013 Red Hat, Inc
- *
- * Authors:
- *     Igor Mammedov <imammedo@redhat.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>
- */
-#ifndef ICC_BUS_H
-#define ICC_BUS_H
-
-#include "exec/memory.h"
-#include "hw/qdev-core.h"
-
-#define TYPE_ICC_BUS "icc-bus"
-
-#ifndef CONFIG_USER_ONLY
-
-/**
- * ICCBus:
- *
- * ICC bus
- */
-typedef struct ICCBus {
-    /*< private >*/
-    BusState parent_obj;
-    /*< public >*/
-
-    MemoryRegion *apic_address_space;
-} ICCBus;
-
-#define ICC_BUS(obj) OBJECT_CHECK(ICCBus, (obj), TYPE_ICC_BUS)
-
-/**
- * ICCDevice:
- *
- * ICC device
- */
-typedef struct ICCDevice {
-    /*< private >*/
-    DeviceState qdev;
-    /*< public >*/
-} ICCDevice;
-
-/**
- * ICCDeviceClass:
- * @init: Initialization callback for derived classes.
- *
- * ICC device class
- */
-typedef struct ICCDeviceClass {
-    /*< private >*/
-    DeviceClass parent_class;
-    /*< public >*/
-
-    DeviceRealize realize;
-} ICCDeviceClass;
-
-#define TYPE_ICC_DEVICE "icc-device"
-#define ICC_DEVICE(obj) OBJECT_CHECK(ICCDevice, (obj), TYPE_ICC_DEVICE)
-#define ICC_DEVICE_CLASS(klass) \
-     OBJECT_CLASS_CHECK(ICCDeviceClass, (klass), TYPE_ICC_DEVICE)
-#define ICC_DEVICE_GET_CLASS(obj) \
-     OBJECT_GET_CLASS(ICCDeviceClass, (obj), TYPE_ICC_DEVICE)
-
-#define TYPE_ICC_BRIDGE "icc-bridge"
-
-#endif /* CONFIG_USER_ONLY */
-#endif
diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h
index 7813396e49..74fe935e8e 100644
--- a/include/hw/i386/apic_internal.h
+++ b/include/hw/i386/apic_internal.h
@@ -22,7 +22,6 @@
 
 #include "cpu.h"
 #include "exec/memory.h"
-#include "hw/cpu/icc_bus.h"
 #include "qemu/timer.h"
 
 /* APIC Local Vector Table */
@@ -135,7 +134,7 @@ typedef struct APICCommonState APICCommonState;
 
 typedef struct APICCommonClass
 {
-    ICCDeviceClass parent_class;
+    DeviceClass parent_class;
 
     DeviceRealize realize;
     void (*set_base)(APICCommonState *s, uint64_t val);
@@ -150,7 +149,9 @@ typedef struct APICCommonClass
 } APICCommonClass;
 
 struct APICCommonState {
-    ICCDevice busdev;
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
 
     MemoryRegion io_memory;
     X86CPU *cpu;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index c13e91ddde..0503485cd0 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -168,7 +168,7 @@ bool pc_machine_is_smm_enabled(PCMachineState *pcms);
 void pc_register_ferr_irq(qemu_irq irq);
 void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
 
-void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge);
+void pc_cpus_init(const char *cpu_model);
 void pc_hot_add_cpu(const int64_t id, Error **errp);
 void pc_acpi_init(const char *default_dsdt);
 
@@ -298,7 +298,27 @@ int e820_get_num_entries(void);
 bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
 #define PC_COMPAT_2_4 \
-        HW_COMPAT_2_4
+        HW_COMPAT_2_4 \
+        {\
+            .driver   = "Haswell-" TYPE_X86_CPU,\
+            .property = "abm",\
+            .value    = "off",\
+        },\
+        {\
+            .driver   = "Haswell-noTSX-" TYPE_X86_CPU,\
+            .property = "abm",\
+            .value    = "off",\
+        },\
+        {\
+            .driver   = "Broadwell-" TYPE_X86_CPU,\
+            .property = "abm",\
+            .value    = "off",\
+        },\
+        {\
+            .driver   = "Broadwell-noTSX-" TYPE_X86_CPU,\
+            .property = "abm",\
+            .value    = "off",\
+        },
 
 #define PC_COMPAT_2_3 \
         PC_COMPAT_2_4 \
diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h
index 9c6f3a937a..148cc1bbc8 100644
--- a/include/hw/i386/topology.h
+++ b/include/hw/i386/topology.h
@@ -47,6 +47,12 @@
  */
 typedef uint32_t apic_id_t;
 
+typedef struct X86CPUTopoInfo {
+    unsigned pkg_id;
+    unsigned core_id;
+    unsigned smt_id;
+} X86CPUTopoInfo;
+
 /* Return the bit width needed for 'count' IDs
  */
 static unsigned apicid_bitwidth_for_count(unsigned count)
@@ -92,13 +98,11 @@ static inline unsigned apicid_pkg_offset(unsigned nr_cores, unsigned nr_threads)
  */
 static inline apic_id_t apicid_from_topo_ids(unsigned nr_cores,
                                              unsigned nr_threads,
-                                             unsigned pkg_id,
-                                             unsigned core_id,
-                                             unsigned smt_id)
+                                             const X86CPUTopoInfo *topo)
 {
-    return (pkg_id  << apicid_pkg_offset(nr_cores, nr_threads)) |
-           (core_id << apicid_core_offset(nr_cores, nr_threads)) |
-           smt_id;
+    return (topo->pkg_id  << apicid_pkg_offset(nr_cores, nr_threads)) |
+           (topo->core_id << apicid_core_offset(nr_cores, nr_threads)) |
+           topo->smt_id;
 }
 
 /* Calculate thread/core/package IDs for a specific topology,
@@ -107,14 +111,12 @@ static inline apic_id_t apicid_from_topo_ids(unsigned nr_cores,
 static inline void x86_topo_ids_from_idx(unsigned nr_cores,
                                          unsigned nr_threads,
                                          unsigned cpu_index,
-                                         unsigned *pkg_id,
-                                         unsigned *core_id,
-                                         unsigned *smt_id)
+                                         X86CPUTopoInfo *topo)
 {
     unsigned core_index = cpu_index / nr_threads;
-    *smt_id = cpu_index % nr_threads;
-    *core_id = core_index % nr_cores;
-    *pkg_id = core_index / nr_cores;
+    topo->smt_id = cpu_index % nr_threads;
+    topo->core_id = core_index % nr_cores;
+    topo->pkg_id = core_index / nr_cores;
 }
 
 /* Make APIC ID for the CPU 'cpu_index'
@@ -125,10 +127,9 @@ static inline apic_id_t x86_apicid_from_cpu_idx(unsigned nr_cores,
                                                 unsigned nr_threads,
                                                 unsigned cpu_index)
 {
-    unsigned pkg_id, core_id, smt_id;
-    x86_topo_ids_from_idx(nr_cores, nr_threads, cpu_index,
-                          &pkg_id, &core_id, &smt_id);
-    return apicid_from_topo_ids(nr_cores, nr_threads, pkg_id, core_id, smt_id);
+    X86CPUTopoInfo topo;
+    x86_topo_ids_from_idx(nr_cores, nr_threads, cpu_index, &topo);
+    return apicid_from_topo_ids(nr_cores, nr_threads, &topo);
 }
 
 #endif /* HW_I386_TOPOLOGY_H */
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 9405554a2b..b613ff0329 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -721,6 +721,7 @@ void cpu_single_step(CPUState *cpu, int enabled);
 /* 0x08 currently unused */
 #define BP_GDB                0x10
 #define BP_CPU                0x20
+#define BP_ANY                (BP_GDB | BP_CPU)
 #define BP_WATCHPOINT_HIT_READ 0x40
 #define BP_WATCHPOINT_HIT_WRITE 0x80
 #define BP_WATCHPOINT_HIT (BP_WATCHPOINT_HIT_READ | BP_WATCHPOINT_HIT_WRITE)
@@ -731,6 +732,21 @@ int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags);
 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint);
 void cpu_breakpoint_remove_all(CPUState *cpu, int mask);
 
+/* Return true if PC matches an installed breakpoint.  */
+static inline bool cpu_breakpoint_test(CPUState *cpu, vaddr pc, int mask)
+{
+    CPUBreakpoint *bp;
+
+    if (unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) {
+        QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
+            if (bp->pc == pc && (bp->flags & mask)) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
                           int flags, CPUWatchpoint **watchpoint);
 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr,
diff --git a/linux-user/main.c b/linux-user/main.c
index 6599a41404..1f60ff2a1f 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3414,28 +3414,47 @@ void cpu_loop(CPUS390XState *env)
 
 #ifdef TARGET_TILEGX
 
-static void gen_sigsegv_maperr(CPUTLGState *env, target_ulong addr)
+static void gen_sigill_reg(CPUTLGState *env)
 {
     target_siginfo_t info;
 
-    info.si_signo = TARGET_SIGSEGV;
+    info.si_signo = TARGET_SIGILL;
     info.si_errno = 0;
-    info.si_code = TARGET_SEGV_MAPERR;
-    info._sifields._sigfault._addr = addr;
+    info.si_code = TARGET_ILL_PRVREG;
+    info._sifields._sigfault._addr = env->pc;
     queue_signal(env, info.si_signo, &info);
 }
 
-static void gen_sigill_reg(CPUTLGState *env)
+static void do_signal(CPUTLGState *env, int signo, int sigcode)
 {
     target_siginfo_t info;
 
-    info.si_signo = TARGET_SIGILL;
+    info.si_signo = signo;
     info.si_errno = 0;
-    info.si_code = TARGET_ILL_PRVREG;
     info._sifields._sigfault._addr = env->pc;
+
+    if (signo == TARGET_SIGSEGV) {
+        /* The passed in sigcode is a dummy; check for a page mapping
+           and pass either MAPERR or ACCERR.  */
+        target_ulong addr = env->excaddr;
+        info._sifields._sigfault._addr = addr;
+        if (page_check_range(addr, 1, PAGE_VALID) < 0) {
+            sigcode = TARGET_SEGV_MAPERR;
+        } else {
+            sigcode = TARGET_SEGV_ACCERR;
+        }
+    }
+    info.si_code = sigcode;
+
     queue_signal(env, info.si_signo, &info);
 }
 
+static void gen_sigsegv_maperr(CPUTLGState *env, target_ulong addr)
+{
+    env->excaddr = addr;
+    do_signal(env, TARGET_SIGSEGV, 0);
+}
+
 static void set_regval(CPUTLGState *env, uint8_t reg, uint64_t val)
 {
     if (unlikely(reg >= TILEGX_R_COUNT)) {
@@ -3622,13 +3641,13 @@ void cpu_loop(CPUTLGState *env)
         case TILEGX_EXCP_OPCODE_FETCHOR4:
             do_fetch(env, trapnr, false);
             break;
+        case TILEGX_EXCP_SIGNAL:
+            do_signal(env, env->signo, env->sigcode);
+            break;
         case TILEGX_EXCP_REG_IDN_ACCESS:
         case TILEGX_EXCP_REG_UDN_ACCESS:
             gen_sigill_reg(env);
             break;
-        case TILEGX_EXCP_SEGV:
-            gen_sigsegv_maperr(env, env->excaddr);
-            break;
         default:
             fprintf(stderr, "trapnr is %d[0x%x].\n", trapnr, trapnr);
             g_assert_not_reached();
diff --git a/linux-user/signal.c b/linux-user/signal.c
index ac82baa0f0..9d62e027e3 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -5537,6 +5537,163 @@ long do_rt_sigreturn(CPUAlphaState *env)
     force_sig(TARGET_SIGSEGV);
 }
 
+#elif defined(TARGET_TILEGX)
+
+struct target_sigcontext {
+    union {
+        /* General-purpose registers.  */
+        abi_ulong gregs[56];
+        struct {
+            abi_ulong __gregs[53];
+            abi_ulong tp;        /* Aliases gregs[TREG_TP].  */
+            abi_ulong sp;        /* Aliases gregs[TREG_SP].  */
+            abi_ulong lr;        /* Aliases gregs[TREG_LR].  */
+        };
+    };
+    abi_ulong pc;        /* Program counter.  */
+    abi_ulong ics;       /* In Interrupt Critical Section?  */
+    abi_ulong faultnum;  /* Fault number.  */
+    abi_ulong pad[5];
+};
+
+struct target_ucontext {
+    abi_ulong tuc_flags;
+    abi_ulong tuc_link;
+    target_stack_t tuc_stack;
+    struct target_sigcontext tuc_mcontext;
+    target_sigset_t tuc_sigmask;   /* mask last for extensibility */
+};
+
+struct target_rt_sigframe {
+    unsigned char save_area[16]; /* caller save area */
+    struct target_siginfo info;
+    struct target_ucontext uc;
+};
+
+static void setup_sigcontext(struct target_sigcontext *sc,
+                             CPUArchState *env, int signo)
+{
+    int i;
+
+    for (i = 0; i < TILEGX_R_COUNT; ++i) {
+        __put_user(env->regs[i], &sc->gregs[i]);
+    }
+
+    __put_user(env->pc, &sc->pc);
+    __put_user(0, &sc->ics);
+    __put_user(signo, &sc->faultnum);
+}
+
+static void restore_sigcontext(CPUTLGState *env, struct target_sigcontext *sc)
+{
+    int i;
+
+    for (i = 0; i < TILEGX_R_COUNT; ++i) {
+        __get_user(env->regs[i], &sc->gregs[i]);
+    }
+
+    __get_user(env->pc, &sc->pc);
+}
+
+static abi_ulong get_sigframe(struct target_sigaction *ka, CPUArchState *env,
+                              size_t frame_size)
+{
+    unsigned long sp = env->regs[TILEGX_R_SP];
+
+    if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) {
+        return -1UL;
+    }
+
+    if ((ka->sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) {
+        sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
+    }
+
+    sp -= frame_size;
+    sp &= -16UL;
+    return sp;
+}
+
+static void setup_rt_frame(int sig, struct target_sigaction *ka,
+                           target_siginfo_t *info,
+                           target_sigset_t *set, CPUArchState *env)
+{
+    abi_ulong frame_addr;
+    struct target_rt_sigframe *frame;
+    unsigned long restorer;
+
+    frame_addr = get_sigframe(ka, env, sizeof(*frame));
+    if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
+        goto give_sigsegv;
+    }
+
+    /* Always write at least the signal number for the stack backtracer. */
+    if (ka->sa_flags & TARGET_SA_SIGINFO) {
+        /* At sigreturn time, restore the callee-save registers too. */
+        tswap_siginfo(&frame->info, info);
+        /* regs->flags |= PT_FLAGS_RESTORE_REGS; FIXME: we can skip it? */
+    } else {
+        __put_user(info->si_signo, &frame->info.si_signo);
+    }
+
+    /* Create the ucontext.  */
+    __put_user(0, &frame->uc.tuc_flags);
+    __put_user(0, &frame->uc.tuc_link);
+    __put_user(target_sigaltstack_used.ss_sp, &frame->uc.tuc_stack.ss_sp);
+    __put_user(sas_ss_flags(env->regs[TILEGX_R_SP]),
+               &frame->uc.tuc_stack.ss_flags);
+    __put_user(target_sigaltstack_used.ss_size, &frame->uc.tuc_stack.ss_size);
+    setup_sigcontext(&frame->uc.tuc_mcontext, env, info->si_signo);
+
+    restorer = (unsigned long) do_rt_sigreturn;
+    if (ka->sa_flags & TARGET_SA_RESTORER) {
+            restorer = (unsigned long) ka->sa_restorer;
+    }
+    env->pc = (unsigned long) ka->_sa_handler;
+    env->regs[TILEGX_R_SP] = (unsigned long) frame;
+    env->regs[TILEGX_R_LR] = restorer;
+    env->regs[0] = (unsigned long) sig;
+    env->regs[1] = (unsigned long) &frame->info;
+    env->regs[2] = (unsigned long) &frame->uc;
+    /* regs->flags |= PT_FLAGS_CALLER_SAVES; FIXME: we can skip it? */
+
+    unlock_user_struct(frame, frame_addr, 1);
+    return;
+
+give_sigsegv:
+    if (sig == TARGET_SIGSEGV) {
+        ka->_sa_handler = TARGET_SIG_DFL;
+    }
+    force_sig(TARGET_SIGSEGV /* , current */);
+}
+
+long do_rt_sigreturn(CPUTLGState *env)
+{
+    abi_ulong frame_addr = env->regs[TILEGX_R_SP];
+    struct target_rt_sigframe *frame;
+    sigset_t set;
+
+    if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
+        goto badframe;
+    }
+    target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
+    do_sigprocmask(SIG_SETMASK, &set, NULL);
+
+    restore_sigcontext(env, &frame->uc.tuc_mcontext);
+    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
+                                             uc.tuc_stack),
+                       0, env->regs[TILEGX_R_SP]) == -EFAULT) {
+        goto badframe;
+    }
+
+    unlock_user_struct(frame, frame_addr, 0);
+    return env->regs[TILEGX_R_RE];
+
+
+ badframe:
+    unlock_user_struct(frame, frame_addr, 0);
+    force_sig(TARGET_SIGSEGV);
+}
+
 #else
 
 static void setup_frame(int sig, struct target_sigaction *ka,
@@ -5657,7 +5814,7 @@ void process_pending_signals(CPUArchState *cpu_env)
 #endif
         /* prepare the stack frame of the virtual CPU */
 #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \
-    || defined(TARGET_OPENRISC)
+    || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX)
         /* These targets do not have traditional signals.  */
         setup_rt_frame(sig, sa, &q->info, &target_old_set, cpu_env);
 #else
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 7ca33a6f62..f996acf945 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -748,6 +748,10 @@ typedef struct target_siginfo {
 #define TARGET_ILL_PRVREG	(6)	/* privileged register */
 #define TARGET_ILL_COPROC	(7)	/* coprocessor error */
 #define TARGET_ILL_BADSTK	(8)	/* internal stack error */
+#ifdef TARGET_TILEGX
+#define TARGET_ILL_DBLFLT       (9)     /* double fault */
+#define TARGET_ILL_HARDWALL     (10)    /* user networks hardwall violation */
+#endif
 
 /*
  * SIGFPE si_codes
@@ -767,6 +771,7 @@ typedef struct target_siginfo {
  */
 #define TARGET_SEGV_MAPERR     (1)  /* address not mapped to object */
 #define TARGET_SEGV_ACCERR     (2)  /* invalid permissions for mapped object */
+#define TARGET_SEGV_BNDERR     (3)  /* failed address bound checks */
 
 /*
  * SIGBUS si_codes
@@ -774,12 +779,18 @@ typedef struct target_siginfo {
 #define TARGET_BUS_ADRALN       (1)	/* invalid address alignment */
 #define TARGET_BUS_ADRERR       (2)	/* non-existent physical address */
 #define TARGET_BUS_OBJERR       (3)	/* object specific hardware error */
+/* hardware memory error consumed on a machine check: action required */
+#define TARGET_BUS_MCEERR_AR    (4)
+/* hardware memory error detected in process but not consumed: action optional*/
+#define TARGET_BUS_MCEERR_AO    (5)
 
 /*
  * SIGTRAP si_codes
  */
 #define TARGET_TRAP_BRKPT	(1)	/* process breakpoint */
 #define TARGET_TRAP_TRACE	(2)	/* process trace trap */
+#define TARGET_TRAP_BRANCH      (3)     /* process taken branch trap */
+#define TARGET_TRAP_HWBKPT      (4)     /* hardware breakpoint/watchpoint */
 
 #endif /* defined(TARGET_I386) || defined(TARGET_ARM) */
 
diff --git a/linux-user/tilegx/syscall.h b/linux-user/tilegx/syscall.h
index 653ece13d8..a938d4e90c 100644
--- a/linux-user/tilegx/syscall.h
+++ b/linux-user/tilegx/syscall.h
@@ -37,4 +37,7 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
 
+/* For faultnum */
+#define TARGET_INT_SWINT_1            14
+
 #endif
diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile
index 746603a315..15e423274f 100644
--- a/pc-bios/s390-ccw/Makefile
+++ b/pc-bios/s390-ccw/Makefile
@@ -10,7 +10,8 @@ $(call set-vpath, $(SRC_PATH)/pc-bios/s390-ccw)
 .PHONY : all clean build-all
 
 OBJECTS = start.o main.o bootmap.o sclp-ascii.o virtio.o
-CFLAGS += -fPIE -fno-stack-protector -ffreestanding -fno-delete-null-pointer-checks
+CFLAGS += -fPIE -fno-stack-protector -ffreestanding
+CFLAGS += -fno-delete-null-pointer-checks -msoft-float
 LDFLAGS += -Wl,-pie -nostdlib
 
 build-all: s390-ccw.img
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 097637eb4e..bcd8076abb 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -287,7 +287,6 @@ struct CPUAlphaState {
 
 #define cpu_list alpha_cpu_list
 #define cpu_exec cpu_alpha_exec
-#define cpu_gen_code cpu_alpha_gen_code
 #define cpu_signal_handler cpu_alpha_signal_handler
 
 #include "exec/cpu-all.h"
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 2ba5fb80ae..f936d1b5b9 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -2858,18 +2858,14 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
     return ret;
 }
 
-static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
 {
+    AlphaCPU *cpu = alpha_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUAlphaState *env = &cpu->env;
     DisasContext ctx, *ctxp = &ctx;
     target_ulong pc_start;
     target_ulong pc_mask;
     uint32_t insn;
-    CPUBreakpoint *bp;
-    int j, lj = -1;
     ExitStatus ret;
     int num_insns;
     int max_insns;
@@ -2904,6 +2900,9 @@ static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     if (in_superpage(&ctx, pc_start)) {
         pc_mask = (1ULL << 41) - 1;
@@ -2913,35 +2912,17 @@ static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
 
     gen_tb_start(tb);
     do {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == ctx.pc) {
-                    gen_excp(&ctx, EXCP_DEBUG, 0);
-                    break;
-                }
-            }
-        }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = ctx.pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        tcg_gen_insn_start(ctx.pc);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
+            gen_excp(&ctx, EXCP_DEBUG, 0);
+            break;
         }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
         insn = cpu_ldl_code(env, ctx.pc);
-        num_insns++;
-
-	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(ctx.pc);
-        }
 
         TCGV_UNUSED_I64(ctx.zero);
         TCGV_UNUSED_I64(ctx.sink);
@@ -2997,16 +2978,8 @@ static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
 
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = ctx.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -3017,17 +2990,8 @@ static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
 #endif
 }
 
-void gen_intermediate_code (CPUAlphaState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(alpha_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUAlphaState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(alpha_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index cc1578c9e8..493f9d02a9 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -95,6 +95,7 @@
 struct arm_boot_info;
 
 #define NB_MMU_MODES 7
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 /* We currently assume float and double are IEEE single and double
    precision respectively.
@@ -1600,7 +1601,6 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
 #define cpu_init(cpu_model) CPU(cpu_arm_init(cpu_model))
 
 #define cpu_exec cpu_arm_exec
-#define cpu_gen_code cpu_arm_gen_code
 #define cpu_signal_handler cpu_arm_signal_handler
 #define cpu_list arm_cpu_list
 
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index ec0936cf97..e65e309535 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -11000,15 +11000,11 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
     free_tmp_a64(s);
 }
 
-void gen_intermediate_code_internal_a64(ARMCPU *cpu,
-                                        TranslationBlock *tb,
-                                        bool search_pc)
+void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
 {
     CPUState *cs = CPU(cpu);
     CPUARMState *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
-    CPUBreakpoint *bp;
-    int j, lj;
     target_ulong pc_start;
     target_ulong next_page_start;
     int num_insns;
@@ -11067,19 +11063,25 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
     init_tmp_a64_array(dc);
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
 
     tcg_clear_temp_count();
 
     do {
+        tcg_gen_insn_start(dc->pc, 0);
+        num_insns++;
+
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
+            CPUBreakpoint *bp;
             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
                 if (bp->pc == dc->pc) {
                     gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
@@ -11091,27 +11093,10 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
             }
         }
 
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
-
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(dc->pc);
-        }
-
         if (dc->ss_active && !dc->pstate_ss) {
             /* Singlestep state is Active-pending.
              * If we're in this state at the start of a TB then either
@@ -11123,7 +11108,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
              * "did not step an insn" case, and so the syndrome ISV and EX
              * bits should be zero.
              */
-            assert(num_insns == 0);
+            assert(num_insns == 1);
             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
                           default_exception_el(dc));
             dc->is_jmp = DISAS_EXC;
@@ -11142,7 +11127,6 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.
          */
-        num_insns++;
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
@@ -11221,14 +11205,6 @@ done_generating:
         qemu_log("\n");
     }
 #endif
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 84a21ace54..22c35877e5 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -52,7 +52,6 @@
 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
 
 #include "translate.h"
-static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
 
 #if defined(CONFIG_USER_ONLY)
 #define IS_USER(s) 1
@@ -11168,17 +11167,12 @@ undef:
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
-   basic block 'tb'. If search_pc is TRUE, also generate PC
-   information for each intermediate instruction. */
-static inline void gen_intermediate_code_internal(ARMCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+   basic block 'tb'.  */
+void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
 {
+    ARMCPU *cpu = arm_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUARMState *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
-    CPUBreakpoint *bp;
-    int j, lj;
     target_ulong pc_start;
     target_ulong next_page_start;
     int num_insns;
@@ -11190,7 +11184,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
      * the A32/T32 complexity to do with conditional execution/IT blocks/etc.
      */
     if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
-        gen_intermediate_code_internal_a64(cpu, tb, search_pc);
+        gen_intermediate_code_a64(cpu, tb);
         return;
     }
 
@@ -11256,11 +11250,14 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
     cpu_M0 = tcg_temp_new_i64();
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
 
@@ -11286,10 +11283,9 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
      * then the CPUARMState will be wrong and we need to reset it.
      * This is handled in the same way as restoration of the
-     * PC in these situations: we will be called again with search_pc=1
-     * and generate a mapping of the condexec bits for each PC in
-     * gen_opc_condexec_bits[]. restore_state_to_opc() then uses
-     * this to restore the condexec bits.
+     * PC in these situations; we save the value of the condexec bits
+     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
+     * then uses this to restore them after an exception.
      *
      * Note that there are no instructions which can read the condexec
      * bits, and none which can write non-static values to them, so
@@ -11306,6 +11302,10 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
         store_cpu_field(tmp, condexec_bits);
       }
     do {
+        tcg_gen_insn_start(dc->pc,
+                           (dc->condexec_cond << 4) | (dc->condexec_mask >> 1));
+        num_insns++;
+
 #ifdef CONFIG_USER_ONLY
         /* Intercept jump to the magic kernel page.  */
         if (dc->pc >= 0xffff0000) {
@@ -11326,6 +11326,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
 #endif
 
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
+            CPUBreakpoint *bp;
             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
                 if (bp->pc == dc->pc) {
                     gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
@@ -11336,24 +11337,9 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
                 }
             }
         }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
-
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(dc->pc);
         }
 
         if (dc->ss_active && !dc->pstate_ss) {
@@ -11367,7 +11353,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
              * "did not step an insn" case, and so the syndrome ISV and EX
              * bits should be zero.
              */
-            assert(num_insns == 0);
+            assert(num_insns == 1);
             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
                           default_exception_el(dc));
             goto done_generating;
@@ -11403,7 +11389,6 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
          * Otherwise the subsequent code could get translated several times.
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
-        num_insns ++;
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
@@ -11533,25 +11518,8 @@ done_generating:
         qemu_log("\n");
     }
 #endif
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
-}
-
-void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(arm_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUARMState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(arm_env_get_cpu(env), tb, true);
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 }
 
 static const char *cpu_mode_names[16] = {
@@ -11608,13 +11576,14 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
     }
 }
 
-void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
     if (is_a64(env)) {
-        env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+        env->pc = data[0];
         env->condexec_bits = 0;
     } else {
-        env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
-        env->condexec_bits = gen_opc_condexec_bits[pc_pos];
+        env->regs[15] = data[0];
+        env->condexec_bits = data[1];
     }
 }
diff --git a/target-arm/translate.h b/target-arm/translate.h
index b8fe37a0a7..53ef971058 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -122,9 +122,7 @@ static inline int default_exception_el(DisasContext *s)
 
 #ifdef TARGET_AARCH64
 void a64_translate_init(void);
-void gen_intermediate_code_internal_a64(ARMCPU *cpu,
-                                        TranslationBlock *tb,
-                                        bool search_pc);
+void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb);
 void gen_a64_set_pc_im(uint64_t val);
 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
                             fprintf_function cpu_fprintf, int flags);
@@ -133,9 +131,7 @@ static inline void a64_translate_init(void)
 {
 }
 
-static inline void gen_intermediate_code_internal_a64(ARMCPU *cpu,
-                                                      TranslationBlock *tb,
-                                                      bool search_pc)
+static inline void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
 {
 }
 
diff --git a/target-cris/cpu.h b/target-cris/cpu.h
index d47fad466b..32204607ca 100644
--- a/target-cris/cpu.h
+++ b/target-cris/cpu.h
@@ -223,7 +223,6 @@ enum {
 #define cpu_init(cpu_model) CPU(cpu_cris_init(cpu_model))
 
 #define cpu_exec cpu_cris_exec
-#define cpu_gen_code cpu_cris_gen_code
 #define cpu_signal_handler cpu_cris_signal_handler
 
 /* MMU modes definitions */
diff --git a/target-cris/translate.c b/target-cris/translate.c
index d5b54e1ad4..964845c461 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -2994,10 +2994,6 @@ static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc)
     int insn_len = 2;
     int i;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(dc->pc);
-        }
-
     /* Load a halfword onto the instruction register.  */
         dc->ir = cris_fetch(env, dc, dc->pc, 2, 0);
 
@@ -3034,23 +3030,6 @@ static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc)
     return insn_len;
 }
 
-static void check_breakpoint(CPUCRISState *env, DisasContext *dc)
-{
-    CPUState *cs = CPU(cris_env_get_cpu(env));
-    CPUBreakpoint *bp;
-
-    if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == dc->pc) {
-                cris_evaluate_flags(dc);
-                tcg_gen_movi_tl(env_pc, dc->pc);
-                t_gen_raise_exception(EXCP_DEBUG);
-                dc->is_jmp = DISAS_UPDATE;
-            }
-        }
-    }
-}
-
 #include "translate_v10.c"
 
 /*
@@ -3088,15 +3067,12 @@ static void check_breakpoint(CPUCRISState *env, DisasContext *dc)
  */
 
 /* generate intermediate code for basic block 'tb'.  */
-static inline void
-gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUCRISState *env, struct TranslationBlock *tb)
 {
+    CRISCPU *cpu = cris_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUCRISState *env = &cpu->env;
     uint32_t pc_start;
     unsigned int insn_len;
-    int j, lj;
     struct DisasContext ctx;
     struct DisasContext *dc = &ctx;
     uint32_t next_page_start;
@@ -3148,13 +3124,13 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
 
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         qemu_log(
-                "srch=%d pc=%x %x flg=%" PRIx64 " bt=%x ds=%u ccs=%x\n"
+                "pc=%x %x flg=%" PRIx64 " bt=%x ds=%u ccs=%x\n"
                 "pid=%x usp=%x\n"
                 "%x.%x.%x.%x\n"
                 "%x.%x.%x.%x\n"
                 "%x.%x.%x.%x\n"
                 "%x.%x.%x.%x\n",
-                search_pc, dc->pc, dc->ppc,
+                dc->pc, dc->ppc,
                 (uint64_t)tb->flags,
                 env->btarget, (unsigned)tb->flags & 7,
                 env->pregs[PR_CCS],
@@ -3170,38 +3146,33 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
     }
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     do {
-        check_breakpoint(env, dc);
-
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            if (dc->delayed_branch == 1) {
-                tcg_ctx.gen_opc_pc[lj] = dc->ppc | 1;
-            } else {
-                tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            }
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        tcg_gen_insn_start(dc->delayed_branch == 1
+                           ? dc->ppc | 1 : dc->pc);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            cris_evaluate_flags(dc);
+            tcg_gen_movi_tl(env_pc, dc->pc);
+            t_gen_raise_exception(EXCP_DEBUG);
+            dc->is_jmp = DISAS_UPDATE;
+            break;
         }
 
         /* Pretty disas.  */
         LOG_DIS("%8.8x:\t", dc->pc);
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
         dc->clear_x = 1;
@@ -3213,7 +3184,6 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
             cris_clear_x_flag(dc);
         }
 
-        num_insns++;
         /* Check for delayed branches here. If we do it before
            actually generating any host code, the simulator will just
            loop doing nothing for on this program location.  */
@@ -3318,16 +3288,8 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
     }
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
 #if !DISAS_CRIS
@@ -3341,16 +3303,6 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
 #endif
 }
 
-void gen_intermediate_code (CPUCRISState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(cris_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUCRISState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(cris_env_get_cpu(env), tb, true);
-}
-
 void cris_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                          int flags)
 {
@@ -3443,7 +3395,8 @@ void cris_initialize_tcg(void)
     }
 }
 
-void restore_state_to_opc(CPUCRISState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUCRISState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/target-cris/translate_v10.c b/target-cris/translate_v10.c
index da0b2caf85..3ab1c398e0 100644
--- a/target-cris/translate_v10.c
+++ b/target-cris/translate_v10.c
@@ -1199,9 +1199,6 @@ static unsigned int crisv10_decoder(CPUCRISState *env, DisasContext *dc)
 {
     unsigned int insn_len = 2;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
-        tcg_gen_debug_insn_start(dc->pc);
-
     /* Load a halfword onto the instruction register.  */
     dc->ir = cpu_lduw_code(env, dc->pc);
 
diff --git a/target-i386/Makefile.objs b/target-i386/Makefile.objs
index 3da413e8bd..437d9975b9 100644
--- a/target-i386/Makefile.objs
+++ b/target-i386/Makefile.objs
@@ -1,4 +1,4 @@
-obj-y += translate.o helper.o cpu.o
+obj-y += translate.o helper.o cpu.o bpt_helper.o
 obj-y += excp_helper.o fpu_helper.o cc_helper.o int_helper.o svm_helper.o
 obj-y += smm_helper.o misc_helper.o mem_helper.o seg_helper.o
 obj-y += gdbstub.o
diff --git a/target-i386/bpt_helper.c b/target-i386/bpt_helper.c
new file mode 100644
index 0000000000..c071c24782
--- /dev/null
+++ b/target-i386/bpt_helper.c
@@ -0,0 +1,182 @@
+/*
+ *  i386 breakpoint helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "exec/helper-proto.h"
+
+
+void hw_breakpoint_insert(CPUX86State *env, int index)
+{
+    CPUState *cs = CPU(x86_env_get_cpu(env));
+    int type = 0, err = 0;
+
+    switch (hw_breakpoint_type(env->dr[7], index)) {
+    case DR7_TYPE_BP_INST:
+        if (hw_breakpoint_enabled(env->dr[7], index)) {
+            err = cpu_breakpoint_insert(cs, env->dr[index], BP_CPU,
+                                        &env->cpu_breakpoint[index]);
+        }
+        break;
+    case DR7_TYPE_DATA_WR:
+        type = BP_CPU | BP_MEM_WRITE;
+        break;
+    case DR7_TYPE_IO_RW:
+        /* No support for I/O watchpoints yet */
+        break;
+    case DR7_TYPE_DATA_RW:
+        type = BP_CPU | BP_MEM_ACCESS;
+        break;
+    }
+
+    if (type != 0) {
+        err = cpu_watchpoint_insert(cs, env->dr[index],
+                                    hw_breakpoint_len(env->dr[7], index),
+                                    type, &env->cpu_watchpoint[index]);
+    }
+
+    if (err) {
+        env->cpu_breakpoint[index] = NULL;
+    }
+}
+
+void hw_breakpoint_remove(CPUX86State *env, int index)
+{
+    CPUState *cs;
+
+    if (!env->cpu_breakpoint[index]) {
+        return;
+    }
+    cs = CPU(x86_env_get_cpu(env));
+    switch (hw_breakpoint_type(env->dr[7], index)) {
+    case DR7_TYPE_BP_INST:
+        if (hw_breakpoint_enabled(env->dr[7], index)) {
+            cpu_breakpoint_remove_by_ref(cs, env->cpu_breakpoint[index]);
+        }
+        break;
+    case DR7_TYPE_DATA_WR:
+    case DR7_TYPE_DATA_RW:
+        cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]);
+        break;
+    case DR7_TYPE_IO_RW:
+        /* No support for I/O watchpoints yet */
+        break;
+    }
+}
+
+static bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update)
+{
+    target_ulong dr6;
+    int reg;
+    bool hit_enabled = false;
+
+    dr6 = env->dr[6] & ~0xf;
+    for (reg = 0; reg < DR7_MAX_BP; reg++) {
+        bool bp_match = false;
+        bool wp_match = false;
+
+        switch (hw_breakpoint_type(env->dr[7], reg)) {
+        case DR7_TYPE_BP_INST:
+            if (env->dr[reg] == env->eip) {
+                bp_match = true;
+            }
+            break;
+        case DR7_TYPE_DATA_WR:
+        case DR7_TYPE_DATA_RW:
+            if (env->cpu_watchpoint[reg] &&
+                env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT) {
+                wp_match = true;
+            }
+            break;
+        case DR7_TYPE_IO_RW:
+            break;
+        }
+        if (bp_match || wp_match) {
+            dr6 |= 1 << reg;
+            if (hw_breakpoint_enabled(env->dr[7], reg)) {
+                hit_enabled = true;
+            }
+        }
+    }
+
+    if (hit_enabled || force_dr6_update) {
+        env->dr[6] = dr6;
+    }
+
+    return hit_enabled;
+}
+
+void breakpoint_handler(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+    CPUBreakpoint *bp;
+
+    if (cs->watchpoint_hit) {
+        if (cs->watchpoint_hit->flags & BP_CPU) {
+            cs->watchpoint_hit = NULL;
+            if (check_hw_breakpoints(env, false)) {
+                raise_exception(env, EXCP01_DB);
+            } else {
+                cpu_resume_from_signal(cs, NULL);
+            }
+        }
+    } else {
+        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
+            if (bp->pc == env->eip) {
+                if (bp->flags & BP_CPU) {
+                    check_hw_breakpoints(env, true);
+                    raise_exception(env, EXCP01_DB);
+                }
+                break;
+            }
+        }
+    }
+}
+
+void helper_single_step(CPUX86State *env)
+{
+#ifndef CONFIG_USER_ONLY
+    check_hw_breakpoints(env, true);
+    env->dr[6] |= DR6_BS;
+#endif
+    raise_exception(env, EXCP01_DB);
+}
+
+void helper_movl_drN_T0(CPUX86State *env, int reg, target_ulong t0)
+{
+#ifndef CONFIG_USER_ONLY
+    int i;
+
+    if (reg < 4) {
+        hw_breakpoint_remove(env, reg);
+        env->dr[reg] = t0;
+        hw_breakpoint_insert(env, reg);
+    } else if (reg == 7) {
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            hw_breakpoint_remove(env, i);
+        }
+        env->dr[7] = t0;
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            hw_breakpoint_insert(env, i);
+        }
+    } else {
+        env->dr[reg] = t0;
+    }
+#endif
+}
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index bd411b9d8d..c793812cc2 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -43,7 +43,6 @@
 
 #include "sysemu/sysemu.h"
 #include "hw/qdev-properties.h"
-#include "hw/cpu/icc_bus.h"
 #ifndef CONFIG_USER_ONLY
 #include "exec/address-spaces.h"
 #include "hw/xen/xen.h"
@@ -478,38 +477,6 @@ const char *get_register_name_32(unsigned int reg)
     return x86_reg_info_32[reg].name;
 }
 
-/* KVM-specific features that are automatically added to all CPU models
- * when KVM is enabled.
- */
-static uint32_t kvm_default_features[FEATURE_WORDS] = {
-    [FEAT_KVM] = (1 << KVM_FEATURE_CLOCKSOURCE) |
-        (1 << KVM_FEATURE_NOP_IO_DELAY) |
-        (1 << KVM_FEATURE_CLOCKSOURCE2) |
-        (1 << KVM_FEATURE_ASYNC_PF) |
-        (1 << KVM_FEATURE_STEAL_TIME) |
-        (1 << KVM_FEATURE_PV_EOI) |
-        (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT),
-    [FEAT_1_ECX] = CPUID_EXT_X2APIC,
-};
-
-/* Features that are not added by default to any CPU model when KVM is enabled.
- */
-static uint32_t kvm_default_unset_features[FEATURE_WORDS] = {
-    [FEAT_1_EDX] = CPUID_ACPI,
-    [FEAT_1_ECX] = CPUID_EXT_MONITOR,
-    [FEAT_8000_0001_ECX] = CPUID_EXT3_SVM,
-};
-
-void x86_cpu_compat_kvm_no_autoenable(FeatureWord w, uint32_t features)
-{
-    kvm_default_features[w] &= ~features;
-}
-
-void x86_cpu_compat_kvm_no_autodisable(FeatureWord w, uint32_t features)
-{
-    kvm_default_unset_features[w] &= ~features;
-}
-
 /*
  * Returns the set of feature flags that are supported and migratable by
  * QEMU, for a given FeatureWord.
@@ -1113,7 +1080,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX |
             CPUID_EXT2_SYSCALL,
         .features[FEAT_8000_0001_ECX] =
-            CPUID_EXT3_LAHF_LM,
+            CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM,
         .features[FEAT_7_0_EBX] =
             CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 |
             CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP |
@@ -1148,7 +1115,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX |
             CPUID_EXT2_SYSCALL,
         .features[FEAT_8000_0001_ECX] =
-            CPUID_EXT3_LAHF_LM,
+            CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM,
         .features[FEAT_7_0_EBX] =
             CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 |
             CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP |
@@ -1185,7 +1152,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX |
             CPUID_EXT2_SYSCALL,
         .features[FEAT_8000_0001_ECX] =
-            CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH,
+            CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH,
         .features[FEAT_7_0_EBX] =
             CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 |
             CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP |
@@ -1223,7 +1190,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX |
             CPUID_EXT2_SYSCALL,
         .features[FEAT_8000_0001_ECX] =
-            CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH,
+            CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH,
         .features[FEAT_7_0_EBX] =
             CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 |
             CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP |
@@ -1392,6 +1359,43 @@ static X86CPUDefinition builtin_x86_defs[] = {
     },
 };
 
+typedef struct PropValue {
+    const char *prop, *value;
+} PropValue;
+
+/* KVM-specific features that are automatically added/removed
+ * from all CPU models when KVM is enabled.
+ */
+static PropValue kvm_default_props[] = {
+    { "kvmclock", "on" },
+    { "kvm-nopiodelay", "on" },
+    { "kvm-asyncpf", "on" },
+    { "kvm-steal-time", "on" },
+    { "kvm-pv-eoi", "on" },
+    { "kvmclock-stable-bit", "on" },
+    { "x2apic", "on" },
+    { "acpi", "off" },
+    { "monitor", "off" },
+    { "svm", "off" },
+    { NULL, NULL },
+};
+
+void x86_cpu_change_kvm_default(const char *prop, const char *value)
+{
+    PropValue *pv;
+    for (pv = kvm_default_props; pv->prop; pv++) {
+        if (!strcmp(pv->prop, prop)) {
+            pv->value = value;
+            break;
+        }
+    }
+
+    /* It is valid to call this function only for properties that
+     * are already present in the kvm_default_props table.
+     */
+    assert(pv->prop);
+}
+
 static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
                                                    bool migratable_only);
 
@@ -2061,6 +2065,18 @@ static int x86_cpu_filter_features(X86CPU *cpu)
     return rv;
 }
 
+static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
+{
+    PropValue *pv;
+    for (pv = props; pv->prop; pv++) {
+        if (!pv->value) {
+            continue;
+        }
+        object_property_parse(OBJECT(cpu), pv->value, pv->prop,
+                              &error_abort);
+    }
+}
+
 /* Load data from X86CPUDefinition
  */
 static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
@@ -2084,11 +2100,7 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
 
     /* Special cases not set in the X86CPUDefinition structs: */
     if (kvm_enabled()) {
-        FeatureWord w;
-        for (w = 0; w < FEATURE_WORDS; w++) {
-            env->features[w] |= kvm_default_features[w];
-            env->features[w] &= ~kvm_default_unset_features[w];
-        }
+        x86_cpu_apply_props(cpu, kvm_default_props);
     }
 
     env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR;
@@ -2723,7 +2735,6 @@ static void mce_init(X86CPU *cpu)
 #ifndef CONFIG_USER_ONLY
 static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
 {
-    DeviceState *dev = DEVICE(cpu);
     APICCommonState *apic;
     const char *apic_type = "apic";
 
@@ -2733,11 +2744,7 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
         apic_type = "xen-apic";
     }
 
-    cpu->apic_state = qdev_try_create(qdev_get_parent_bus(dev), apic_type);
-    if (cpu->apic_state == NULL) {
-        error_setg(errp, "APIC device '%s' could not be created", apic_type);
-        return;
-    }
+    cpu->apic_state = DEVICE(object_new(apic_type));
 
     object_property_add_child(OBJECT(cpu), "apic",
                               OBJECT(cpu->apic_state), NULL);
@@ -2745,15 +2752,30 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
     /* TODO: convert to link<> */
     apic = APIC_COMMON(cpu->apic_state);
     apic->cpu = cpu;
+    apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE;
 }
 
 static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp)
 {
+    APICCommonState *apic;
+    static bool apic_mmio_map_once;
+
     if (cpu->apic_state == NULL) {
         return;
     }
     object_property_set_bool(OBJECT(cpu->apic_state), true, "realized",
                              errp);
+
+    /* Map APIC MMIO area */
+    apic = APIC_COMMON(cpu->apic_state);
+    if (!apic_mmio_map_once) {
+        memory_region_add_subregion_overlap(get_system_memory(),
+                                            apic->apicbase &
+                                            MSR_IA32_APICBASE_BASE,
+                                            &apic->io_memory,
+                                            0x1000);
+        apic_mmio_map_once = true;
+     }
 }
 
 static void x86_cpu_machine_done(Notifier *n, void *unused)
@@ -3133,7 +3155,6 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
 
     xcc->parent_realize = dc->realize;
     dc->realize = x86_cpu_realizefn;
-    dc->bus_type = TYPE_ICC_BUS;
     dc->props = x86_cpu_properties;
 
     xcc->parent_reset = cc->reset;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 034fab6f39..54d9d50140 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -794,6 +794,7 @@ typedef struct {
 #define MAX_GP_COUNTERS    (MSR_IA32_PERF_STATUS - MSR_P6_EVNTSEL0)
 
 #define NB_MMU_MODES 3
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 #define NB_OPMASK_REGS 8
 
@@ -833,6 +834,7 @@ typedef struct CPUX86State {
     BNDReg bnd_regs[4];
     BNDCSReg bndcs_regs;
     uint64_t msr_bndcfgs;
+    uint64_t efer;
 
     /* Beginning of state preserved by INIT (dummy marker).  */
     struct {} start_init_save;
@@ -865,7 +867,6 @@ typedef struct CPUX86State {
     uint32_t sysenter_cs;
     target_ulong sysenter_esp;
     target_ulong sysenter_eip;
-    uint64_t efer;
     uint64_t star;
 
     uint64_t vm_hsave;
@@ -1154,7 +1155,6 @@ static inline int hw_breakpoint_len(unsigned long dr7, int index)
 
 void hw_breakpoint_insert(CPUX86State *env, int index);
 void hw_breakpoint_remove(CPUX86State *env, int index);
-bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update);
 void breakpoint_handler(CPUState *cs);
 
 /* will be suppressed */
@@ -1189,7 +1189,6 @@ uint64_t cpu_get_tsc(CPUX86State *env);
 #define cpu_init(cpu_model) CPU(cpu_x86_init(cpu_model))
 
 #define cpu_exec cpu_x86_exec
-#define cpu_gen_code cpu_x86_gen_code
 #define cpu_signal_handler cpu_x86_signal_handler
 #define cpu_list x86_cpu_list
 #define cpudef_setup x86_cpudef_setup
@@ -1341,8 +1340,15 @@ void cpu_smm_update(X86CPU *cpu);
 
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access);
 
-void x86_cpu_compat_kvm_no_autoenable(FeatureWord w, uint32_t features);
-void x86_cpu_compat_kvm_no_autodisable(FeatureWord w, uint32_t features);
+/* Change the value of a KVM-specific default
+ *
+ * If value is NULL, no default will be set and the original
+ * value from the CPU model table will be kept.
+ *
+ * It is valid to call this funciton only for properties that
+ * are already present in the kvm_default_props table.
+ */
+void x86_cpu_change_kvm_default(const char *prop, const char *value);
 
 
 /* Return name of 32-bit register, from a R_* constant */
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 9364d96f96..d18be95c3f 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1096,134 +1096,6 @@ out:
     return pte | page_offset;
 }
 
-void hw_breakpoint_insert(CPUX86State *env, int index)
-{
-    CPUState *cs = CPU(x86_env_get_cpu(env));
-    int type = 0, err = 0;
-
-    switch (hw_breakpoint_type(env->dr[7], index)) {
-    case DR7_TYPE_BP_INST:
-        if (hw_breakpoint_enabled(env->dr[7], index)) {
-            err = cpu_breakpoint_insert(cs, env->dr[index], BP_CPU,
-                                        &env->cpu_breakpoint[index]);
-        }
-        break;
-    case DR7_TYPE_DATA_WR:
-        type = BP_CPU | BP_MEM_WRITE;
-        break;
-    case DR7_TYPE_IO_RW:
-        /* No support for I/O watchpoints yet */
-        break;
-    case DR7_TYPE_DATA_RW:
-        type = BP_CPU | BP_MEM_ACCESS;
-        break;
-    }
-
-    if (type != 0) {
-        err = cpu_watchpoint_insert(cs, env->dr[index],
-                                    hw_breakpoint_len(env->dr[7], index),
-                                    type, &env->cpu_watchpoint[index]);
-    }
-
-    if (err) {
-        env->cpu_breakpoint[index] = NULL;
-    }
-}
-
-void hw_breakpoint_remove(CPUX86State *env, int index)
-{
-    CPUState *cs;
-
-    if (!env->cpu_breakpoint[index]) {
-        return;
-    }
-    cs = CPU(x86_env_get_cpu(env));
-    switch (hw_breakpoint_type(env->dr[7], index)) {
-    case DR7_TYPE_BP_INST:
-        if (hw_breakpoint_enabled(env->dr[7], index)) {
-            cpu_breakpoint_remove_by_ref(cs, env->cpu_breakpoint[index]);
-        }
-        break;
-    case DR7_TYPE_DATA_WR:
-    case DR7_TYPE_DATA_RW:
-        cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]);
-        break;
-    case DR7_TYPE_IO_RW:
-        /* No support for I/O watchpoints yet */
-        break;
-    }
-}
-
-bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update)
-{
-    target_ulong dr6;
-    int reg;
-    bool hit_enabled = false;
-
-    dr6 = env->dr[6] & ~0xf;
-    for (reg = 0; reg < DR7_MAX_BP; reg++) {
-        bool bp_match = false;
-        bool wp_match = false;
-
-        switch (hw_breakpoint_type(env->dr[7], reg)) {
-        case DR7_TYPE_BP_INST:
-            if (env->dr[reg] == env->eip) {
-                bp_match = true;
-            }
-            break;
-        case DR7_TYPE_DATA_WR:
-        case DR7_TYPE_DATA_RW:
-            if (env->cpu_watchpoint[reg] &&
-                env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT) {
-                wp_match = true;
-            }
-            break;
-        case DR7_TYPE_IO_RW:
-            break;
-        }
-        if (bp_match || wp_match) {
-            dr6 |= 1 << reg;
-            if (hw_breakpoint_enabled(env->dr[7], reg)) {
-                hit_enabled = true;
-            }
-        }
-    }
-
-    if (hit_enabled || force_dr6_update) {
-        env->dr[6] = dr6;
-    }
-
-    return hit_enabled;
-}
-
-void breakpoint_handler(CPUState *cs)
-{
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-    CPUBreakpoint *bp;
-
-    if (cs->watchpoint_hit) {
-        if (cs->watchpoint_hit->flags & BP_CPU) {
-            cs->watchpoint_hit = NULL;
-            if (check_hw_breakpoints(env, false)) {
-                raise_exception(env, EXCP01_DB);
-            } else {
-                cpu_resume_from_signal(cs, NULL);
-            }
-        }
-    } else {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == env->eip) {
-                if (bp->flags & BP_CPU) {
-                    check_hw_breakpoints(env, true);
-                    raise_exception(env, EXCP01_DB);
-                }
-                break;
-            }
-        }
-    }
-}
-
 typedef struct MCEInjectionParams {
     Monitor *mon;
     X86CPU *cpu;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 7b0ba179cc..80d1a7e01e 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -67,6 +67,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
 
 static bool has_msr_star;
 static bool has_msr_hsave_pa;
+static bool has_msr_tsc_aux;
 static bool has_msr_tsc_adjust;
 static bool has_msr_tsc_deadline;
 static bool has_msr_feature_control;
@@ -825,6 +826,10 @@ static int kvm_get_supported_msrs(KVMState *s)
                     has_msr_hsave_pa = true;
                     continue;
                 }
+                if (kvm_msr_list->indices[i] == MSR_TSC_AUX) {
+                    has_msr_tsc_aux = true;
+                    continue;
+                }
                 if (kvm_msr_list->indices[i] == MSR_TSC_ADJUST) {
                     has_msr_tsc_adjust = true;
                     continue;
@@ -1299,6 +1304,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
     if (has_msr_hsave_pa) {
         kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
     }
+    if (has_msr_tsc_aux) {
+        kvm_msr_entry_set(&msrs[n++], MSR_TSC_AUX, env->tsc_aux);
+    }
     if (has_msr_tsc_adjust) {
         kvm_msr_entry_set(&msrs[n++], MSR_TSC_ADJUST, env->tsc_adjust);
     }
@@ -1671,6 +1679,9 @@ static int kvm_get_msrs(X86CPU *cpu)
     if (has_msr_hsave_pa) {
         msrs[n++].index = MSR_VM_HSAVE_PA;
     }
+    if (has_msr_tsc_aux) {
+        msrs[n++].index = MSR_TSC_AUX;
+    }
     if (has_msr_tsc_adjust) {
         msrs[n++].index = MSR_TSC_ADJUST;
     }
@@ -1820,6 +1831,9 @@ static int kvm_get_msrs(X86CPU *cpu)
         case MSR_IA32_TSC:
             env->tsc = msrs[i].data;
             break;
+        case MSR_TSC_AUX:
+            env->tsc_aux = msrs[i].data;
+            break;
         case MSR_TSC_ADJUST:
             env->tsc_adjust = msrs[i].data;
             break;
diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c
index 6bfc7dd24e..13bd4f5eec 100644
--- a/target-i386/misc_helper.c
+++ b/target-i386/misc_helper.c
@@ -95,15 +95,6 @@ void helper_into(CPUX86State *env, int next_eip_addend)
     }
 }
 
-void helper_single_step(CPUX86State *env)
-{
-#ifndef CONFIG_USER_ONLY
-    check_hw_breakpoints(env, true);
-    env->dr[6] |= DR6_BS;
-#endif
-    raise_exception(env, EXCP01_DB);
-}
-
 void helper_cpuid(CPUX86State *env)
 {
     uint32_t eax, ebx, ecx, edx;
@@ -127,10 +118,6 @@ target_ulong helper_read_crN(CPUX86State *env, int reg)
 void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
 {
 }
-
-void helper_movl_drN_T0(CPUX86State *env, int reg, target_ulong t0)
-{
-}
 #else
 target_ulong helper_read_crN(CPUX86State *env, int reg)
 {
@@ -176,27 +163,6 @@ void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
         break;
     }
 }
-
-void helper_movl_drN_T0(CPUX86State *env, int reg, target_ulong t0)
-{
-    int i;
-
-    if (reg < 4) {
-        hw_breakpoint_remove(env, reg);
-        env->dr[reg] = t0;
-        hw_breakpoint_insert(env, reg);
-    } else if (reg == 7) {
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            hw_breakpoint_remove(env, i);
-        }
-        env->dr[7] = t0;
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            hw_breakpoint_insert(env, i);
-        }
-    } else {
-        env->dr[reg] = t0;
-    }
-}
 #endif
 
 void helper_lmsw(CPUX86State *env, target_ulong t0)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 8b35de1a1a..ef10e685cc 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -75,8 +75,6 @@ static TCGv_ptr cpu_ptr0, cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
-static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
-
 #include "exec/gen-icount.h"
 
 #ifdef TARGET_X86_64
@@ -4401,9 +4399,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     target_ulong next_eip, tval;
     int rex_w, rex_r;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(pc_start);
-    }
     s->pc = pc_start;
     prefixes = 0;
     s->override = -1;
@@ -7842,18 +7837,13 @@ void optimize_flags_init(void)
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
-   basic block 'tb'. If search_pc is TRUE, also generate PC
-   information for each intermediate instruction. */
-static inline void gen_intermediate_code_internal(X86CPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+   basic block 'tb'.  */
+void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
 {
+    X86CPU *cpu = x86_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUX86State *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
     target_ulong pc_ptr;
-    CPUBreakpoint *bp;
-    int j, lj;
     uint64_t flags;
     target_ulong pc_start;
     target_ulong cs_base;
@@ -7933,40 +7923,32 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
 
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     for(;;) {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == pc_ptr &&
-                    !((bp->flags & BP_CPU) && (tb->flags & HF_RF_MASK))) {
-                    gen_debug(dc, pc_ptr - dc->cs_base);
-                    goto done_generating;
-                }
-            }
-        }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = pc_ptr;
-            gen_opc_cc_op[lj] = dc->cc_op;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        tcg_gen_insn_start(pc_ptr, dc->cc_op);
+        num_insns++;
+
+        /* If RF is set, suppress an internally generated breakpoint.  */
+        if (unlikely(cpu_breakpoint_test(cs, pc_ptr,
+                                         tb->flags & HF_RF_MASK
+                                         ? BP_GDB : BP_ANY))) {
+            gen_debug(dc, pc_ptr - dc->cs_base);
+            goto done_generating;
         }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
 
         pc_ptr = disas_insn(env, dc, pc_ptr);
-        num_insns++;
         /* stop translation if indicated */
         if (dc->is_jmp)
             break;
@@ -8014,14 +7996,6 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
 done_generating:
     gen_tb_end(tb, num_insns);
 
-    /* we don't forget to fill the last values */
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    }
-
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         int disas_flags;
@@ -8038,42 +8012,16 @@ done_generating:
     }
 #endif
 
-    if (!search_pc) {
-        tb->size = pc_ptr - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = pc_ptr - pc_start;
+    tb->icount = num_insns;
 }
 
-void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
+void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    gen_intermediate_code_internal(x86_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUX86State *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(x86_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int pc_pos)
-{
-    int cc_op;
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
-        int i;
-        qemu_log("RESTORE:\n");
-        for(i = 0;i <= pc_pos; i++) {
-            if (tcg_ctx.gen_opc_instr_start[i]) {
-                qemu_log("0x%04x: " TARGET_FMT_lx "\n", i,
-                        tcg_ctx.gen_opc_pc[i]);
-            }
-        }
-        qemu_log("pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
-                pc_pos, tcg_ctx.gen_opc_pc[pc_pos] - tb->cs_base,
-                (uint32_t)tb->cs_base);
-    }
-#endif
-    env->eip = tcg_ctx.gen_opc_pc[pc_pos] - tb->cs_base;
-    cc_op = gen_opc_cc_op[pc_pos];
-    if (cc_op != CC_OP_DYNAMIC)
+    int cc_op = data[1];
+    env->eip = data[0] - tb->cs_base;
+    if (cc_op != CC_OP_DYNAMIC) {
         env->cc_op = cc_op;
+    }
 }
diff --git a/target-lm32/cpu.h b/target-lm32/cpu.h
index 3f874d5111..2b7620c7da 100644
--- a/target-lm32/cpu.h
+++ b/target-lm32/cpu.h
@@ -219,7 +219,6 @@ bool lm32_cpu_do_semihosting(CPUState *cs);
 
 #define cpu_list lm32_cpu_list
 #define cpu_exec cpu_lm32_exec
-#define cpu_gen_code cpu_lm32_gen_code
 #define cpu_signal_handler cpu_lm32_signal_handler
 
 int lm32_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw,
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index cf7042e3e0..c61ad0f9ab 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -1005,10 +1005,6 @@ static const DecoderInfo decinfo[] = {
 
 static inline void decode(DisasContext *dc, uint32_t ir)
 {
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(dc->pc);
-    }
-
     dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
 
@@ -1036,32 +1032,13 @@ static inline void decode(DisasContext *dc, uint32_t ir)
     decinfo[dc->opcode](dc);
 }
 
-static void check_breakpoint(CPULM32State *env, DisasContext *dc)
-{
-    CPUState *cs = CPU(lm32_env_get_cpu(env));
-    CPUBreakpoint *bp;
-
-    if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == dc->pc) {
-                tcg_gen_movi_tl(cpu_pc, dc->pc);
-                t_gen_raise_exception(dc, EXCP_DEBUG);
-                dc->is_jmp = DISAS_UPDATE;
-             }
-        }
-    }
-}
-
 /* generate intermediate code for basic block 'tb'.  */
-static inline
-void gen_intermediate_code_internal(LM32CPU *cpu,
-                                    TranslationBlock *tb, bool search_pc)
+void gen_intermediate_code(CPULM32State *env, struct TranslationBlock *tb)
 {
+    LM32CPU *cpu = lm32_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPULM32State *env = &cpu->env;
     struct DisasContext ctx, *dc = &ctx;
     uint32_t pc_start;
-    int j, lj;
     uint32_t next_page_start;
     int num_insns;
     int max_insns;
@@ -1083,41 +1060,36 @@ void gen_intermediate_code_internal(LM32CPU *cpu,
     }
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     do {
-        check_breakpoint(env, dc);
-
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            tcg_gen_movi_tl(cpu_pc, dc->pc);
+            t_gen_raise_exception(dc, EXCP_DEBUG);
+            dc->is_jmp = DISAS_UPDATE;
+            break;
         }
 
         /* Pretty disas.  */
         LOG_DIS("%8.8x:\t", dc->pc);
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
 
         decode(dc, cpu_ldl_code(env, dc->pc));
         dc->pc += 4;
-        num_insns++;
-
     } while (!dc->is_jmp
          && !tcg_op_buf_full()
          && !cs->singlestep_enabled
@@ -1154,16 +1126,8 @@ void gen_intermediate_code_internal(LM32CPU *cpu,
 
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -1175,16 +1139,6 @@ void gen_intermediate_code_internal(LM32CPU *cpu,
 #endif
 }
 
-void gen_intermediate_code(CPULM32State *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(lm32_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPULM32State *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(lm32_env_get_cpu(env), tb, true);
-}
-
 void lm32_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                          int flags)
 {
@@ -1219,9 +1173,10 @@ void lm32_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
     cpu_fprintf(f, "\n\n");
 }
 
-void restore_state_to_opc(CPULM32State *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPULM32State *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
 
 void lm32_translate_init(void)
diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
index d1957063b4..224c16967c 100644
--- a/target-m68k/cpu.h
+++ b/target-m68k/cpu.h
@@ -213,7 +213,6 @@ void register_m68k_insns (CPUM68KState *env);
 #define cpu_init(cpu_model) CPU(cpu_m68k_init(cpu_model))
 
 #define cpu_exec cpu_m68k_exec
-#define cpu_gen_code cpu_m68k_gen_code
 #define cpu_signal_handler cpu_m68k_signal_handler
 #define cpu_list m68k_cpu_list
 
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 3cdf6652aa..5995ccea92 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -2955,10 +2955,6 @@ static void disas_m68k_insn(CPUM68KState * env, DisasContext *s)
 {
     uint16_t insn;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(s->pc);
-    }
-
     insn = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
@@ -2966,15 +2962,11 @@ static void disas_m68k_insn(CPUM68KState * env, DisasContext *s)
 }
 
 /* generate intermediate code for basic block 'tb'.  */
-static inline void
-gen_intermediate_code_internal(M68kCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb)
 {
+    M68kCPU *cpu = m68k_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUM68KState *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
-    CPUBreakpoint *bp;
-    int j, lj;
     target_ulong pc_start;
     int pc_offset;
     int num_insns;
@@ -2993,43 +2985,34 @@ gen_intermediate_code_internal(M68kCPU *cpu, TranslationBlock *tb,
     dc->fpcr = env->fpcr;
     dc->user = (env->sr & SR_S) == 0;
     dc->done_mac = 0;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     do {
         pc_offset = dc->pc - pc_start;
         gen_throws_exception = NULL;
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == dc->pc) {
-                    gen_exception(dc, dc->pc, EXCP_DEBUG);
-                    dc->is_jmp = DISAS_JUMP;
-                    break;
-                }
-            }
-            if (dc->is_jmp)
-                break;
-        }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            gen_exception(dc, dc->pc, EXCP_DEBUG);
+            dc->is_jmp = DISAS_JUMP;
+            break;
         }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
+
         dc->insn_pc = dc->pc;
 	disas_m68k_insn(env, dc);
-        num_insns++;
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
@@ -3073,28 +3056,8 @@ gen_intermediate_code_internal(M68kCPU *cpu, TranslationBlock *tb,
         qemu_log("\n");
     }
 #endif
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
-
-    //optimize_flags();
-    //expand_target_qops();
-}
-
-void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(m68k_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUM68KState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(m68k_env_get_cpu(env), tb, true);
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 }
 
 void m68k_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
@@ -3120,7 +3083,8 @@ void m68k_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
     cpu_fprintf (f, "FPRESULT = %12g\n", *(double *)&env->fp_result);
 }
 
-void restore_state_to_opc(CPUM68KState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUM68KState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/target-microblaze/cpu.h b/target-microblaze/cpu.h
index b707c71367..6b212ab7f0 100644
--- a/target-microblaze/cpu.h
+++ b/target-microblaze/cpu.h
@@ -295,7 +295,6 @@ int cpu_mb_signal_handler(int host_signum, void *pinfo,
 #define cpu_init(cpu_model) CPU(cpu_mb_init(cpu_model))
 
 #define cpu_exec cpu_mb_exec
-#define cpu_gen_code cpu_mb_gen_code
 #define cpu_signal_handler cpu_mb_signal_handler
 
 /* MMU modes definitions */
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 3de89440a6..a9c501099c 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -1588,10 +1588,6 @@ static inline void decode(DisasContext *dc, uint32_t ir)
 {
     int i;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(dc->pc);
-    }
-
     dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
 
@@ -1630,30 +1626,12 @@ static inline void decode(DisasContext *dc, uint32_t ir)
     }
 }
 
-static void check_breakpoint(CPUMBState *env, DisasContext *dc)
-{
-    CPUState *cs = CPU(mb_env_get_cpu(env));
-    CPUBreakpoint *bp;
-
-    if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == dc->pc) {
-                t_gen_raise_exception(dc, EXCP_DEBUG);
-                dc->is_jmp = DISAS_UPDATE;
-             }
-        }
-    }
-}
-
 /* generate intermediate code for basic block 'tb'.  */
-static inline void
-gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUMBState *env, struct TranslationBlock *tb)
 {
+    MicroBlazeCPU *cpu = mb_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUMBState *env = &cpu->env;
     uint32_t pc_start;
-    int j, lj;
     struct DisasContext ctx;
     struct DisasContext *dc = &ctx;
     uint32_t next_page_start, org_flags;
@@ -1690,47 +1668,46 @@ gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
     }
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     do
     {
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
+
 #if SIM_COMPAT
         if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
             tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
             gen_helper_debug();
         }
 #endif
-        check_breakpoint(env, dc);
-
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-                        tcg_ctx.gen_opc_icount[lj] = num_insns;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            t_gen_raise_exception(dc, EXCP_DEBUG);
+            dc->is_jmp = DISAS_UPDATE;
+            break;
         }
 
         /* Pretty disas.  */
         LOG_DIS("%8.8x:\t", dc->pc);
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
 
         dc->clear_imm = 1;
         decode(dc, cpu_ldl_code(env, dc->pc));
         if (dc->clear_imm)
             dc->tb_flags &= ~IMM_FLAG;
         dc->pc += 4;
-        num_insns++;
 
         if (dc->delayed_branch) {
             dc->delayed_branch--;
@@ -1821,15 +1798,8 @@ gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
     }
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    } else {
-        tb->size = dc->pc - pc_start;
-                tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
 #if !SIM_COMPAT
@@ -1846,16 +1816,6 @@ gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
     assert(!dc->abort_at_next_insn);
 }
 
-void gen_intermediate_code (CPUMBState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(mb_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUMBState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(mb_env_get_cpu(env), tb, true);
-}
-
 void mb_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                        int flags)
 {
@@ -1936,7 +1896,8 @@ void mb_tcg_init(void)
     }
 }
 
-void restore_state_to_opc(CPUMBState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUMBState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->sregs[SR_PC] = tcg_ctx.gen_opc_pc[pc_pos];
+    env->sregs[SR_PC] = data[0];
 }
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index ec5f991dfb..f32a0fd737 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -130,6 +130,7 @@ struct CPUMIPSFPUContext {
 };
 
 #define NB_MMU_MODES 3
+#define TARGET_INSN_START_EXTRA_WORDS 2
 
 typedef struct CPUMIPSMVPContext CPUMIPSMVPContext;
 struct CPUMIPSMVPContext {
@@ -619,7 +620,6 @@ void mips_cpu_unassigned_access(CPUState *cpu, hwaddr addr,
 void mips_cpu_list (FILE *f, fprintf_function cpu_fprintf);
 
 #define cpu_exec cpu_mips_exec
-#define cpu_gen_code cpu_mips_gen_code
 #define cpu_signal_handler cpu_mips_signal_handler
 #define cpu_list mips_cpu_list
 
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 87d495975a..897839ced9 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -1359,9 +1359,6 @@ static TCGv_i32 fpu_fcr0, fpu_fcr31;
 static TCGv_i64 fpu_f64[32];
 static TCGv_i64 msa_wr_d[64];
 
-static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
-static target_ulong gen_opc_btarget[OPC_BUF_SIZE];
-
 #include "exec/gen-icount.h"
 
 #define gen_helper_0e0i(name, arg) do {                           \
@@ -18904,10 +18901,6 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
         gen_set_label(l1);
     }
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(ctx->pc);
-    }
-
     op = MASK_OP_MAJOR(ctx->opcode);
     rs = (ctx->opcode >> 21) & 0x1f;
     rt = (ctx->opcode >> 16) & 0x1f;
@@ -19539,25 +19532,18 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
     }
 }
 
-static inline void
-gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUMIPSState *env, struct TranslationBlock *tb)
 {
+    MIPSCPU *cpu = mips_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUMIPSState *env = &cpu->env;
     DisasContext ctx;
     target_ulong pc_start;
     target_ulong next_page_start;
-    CPUBreakpoint *bp;
-    int j, lj = -1;
     int num_insns;
     int max_insns;
     int insn_bytes;
     int is_slot;
 
-    if (search_pc)
-        qemu_log("search pc %d\n", search_pc);
-
     pc_start = tb->pc;
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
     ctx.pc = pc_start;
@@ -19567,6 +19553,7 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
     ctx.CP0_Config1 = env->CP0_Config1;
     ctx.tb = tb;
     ctx.bstate = BS_NONE;
+    ctx.btarget = 0;
     ctx.kscrexist = (env->CP0_Config4 >> CP0C4_KScrExist) & 0xff;
     ctx.rxi = (env->CP0_Config3 >> CP0C3_RXI) & 1;
     ctx.ie = (env->CP0_Config4 >> CP0C4_IE) & 3;
@@ -19590,40 +19577,32 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
                                  MO_UNALN : MO_ALIGN;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
+
     LOG_DISAS("\ntb %p idx %d hflags %04x\n", tb, ctx.mem_idx, ctx.hflags);
     gen_tb_start(tb);
     while (ctx.bstate == BS_NONE) {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == ctx.pc) {
-                    save_cpu_state(&ctx, 1);
-                    ctx.bstate = BS_BRANCH;
-                    gen_helper_raise_exception_debug(cpu_env);
-                    /* Include the breakpoint location or the tb won't
-                     * be flushed when it must be.  */
-                    ctx.pc += 4;
-                    goto done_generating;
-                }
-            }
-        }
+        tcg_gen_insn_start(ctx.pc, ctx.hflags & MIPS_HFLAG_BMASK, ctx.btarget);
+        num_insns++;
 
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = ctx.pc;
-            gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
-            gen_opc_btarget[lj] = ctx.btarget;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
+            save_cpu_state(&ctx, 1);
+            ctx.bstate = BS_BRANCH;
+            gen_helper_raise_exception_debug(cpu_env);
+            /* Include the breakpoint location or the tb won't
+             * be flushed when it must be.  */
+            ctx.pc += 4;
+            goto done_generating;
         }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
 
         is_slot = ctx.hflags & MIPS_HFLAG_BMASK;
         if (!(ctx.hflags & MIPS_HFLAG_M16)) {
@@ -19660,8 +19639,6 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
         }
         ctx.pc += insn_bytes;
 
-        num_insns++;
-
         /* Execute a branch and its delay slot as a single instruction.
            This is what GDB expects and is consistent with what the
            hardware does (e.g. if a delay slot instruction faults, the
@@ -19710,15 +19687,9 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
 done_generating:
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    } else {
-        tb->size = ctx.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.pc - pc_start;
+    tb->icount = num_insns;
+
 #ifdef DEBUG_DISAS
     LOG_DISAS("\n");
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -19729,16 +19700,6 @@ done_generating:
 #endif
 }
 
-void gen_intermediate_code (CPUMIPSState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(mips_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUMIPSState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(mips_env_get_cpu(env), tb, true);
-}
-
 static void fpu_dump_state(CPUMIPSState *env, FILE *f, fprintf_function fpu_fprintf,
                            int flags)
 {
@@ -20062,18 +20023,19 @@ void cpu_state_reset(CPUMIPSState *env)
     }
 }
 
-void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->active_tc.PC = tcg_ctx.gen_opc_pc[pc_pos];
+    env->active_tc.PC = data[0];
     env->hflags &= ~MIPS_HFLAG_BMASK;
-    env->hflags |= gen_opc_hflags[pc_pos];
+    env->hflags |= data[1];
     switch (env->hflags & MIPS_HFLAG_BMASK_BASE) {
     case MIPS_HFLAG_BR:
         break;
     case MIPS_HFLAG_BC:
     case MIPS_HFLAG_BL:
     case MIPS_HFLAG_B:
-        env->btarget = gen_opc_btarget[pc_pos];
+        env->btarget = data[2];
         break;
     }
 }
diff --git a/target-moxie/cpu.h b/target-moxie/cpu.h
index 2bac15bf60..a612744011 100644
--- a/target-moxie/cpu.h
+++ b/target-moxie/cpu.h
@@ -122,7 +122,6 @@ int cpu_moxie_signal_handler(int host_signum, void *pinfo,
 #define cpu_init(cpu_model) CPU(cpu_moxie_init(cpu_model))
 
 #define cpu_exec cpu_moxie_exec
-#define cpu_gen_code cpu_moxie_gen_code
 #define cpu_signal_handler cpu_moxie_signal_handler
 
 static inline int cpu_mmu_index(CPUMoxieState *env, bool ifetch)
diff --git a/target-moxie/translate.c b/target-moxie/translate.c
index cc77366ee7..f84841efe2 100644
--- a/target-moxie/translate.c
+++ b/target-moxie/translate.c
@@ -153,10 +153,6 @@ static int decode_opc(MoxieCPU *cpu, DisasContext *ctx)
     /* Set the default instruction length.  */
     int length = 2;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(ctx->pc);
-    }
-
     /* Examine the 16-bit opcode.  */
     opcode = ctx->opcode;
 
@@ -819,17 +815,13 @@ static int decode_opc(MoxieCPU *cpu, DisasContext *ctx)
 }
 
 /* generate intermediate code for basic block 'tb'.  */
-static inline void
-gen_intermediate_code_internal(MoxieCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUMoxieState *env, struct TranslationBlock *tb)
 {
+    MoxieCPU *cpu = moxie_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
     DisasContext ctx;
     target_ulong pc_start;
-    CPUBreakpoint *bp;
-    int j, lj = -1;
-    CPUMoxieState *env = &cpu->env;
-    int num_insns;
+    int num_insns, max_insns;
 
     pc_start = tb->pc;
     ctx.pc = pc_start;
@@ -839,40 +831,35 @@ gen_intermediate_code_internal(MoxieCPU *cpu, TranslationBlock *tb,
     ctx.singlestep_enabled = 0;
     ctx.bstate = BS_NONE;
     num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0) {
+        max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     do {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (ctx.pc == bp->pc) {
-                    tcg_gen_movi_i32(cpu_pc, ctx.pc);
-                    gen_helper_debug(cpu_env);
-                    ctx.bstate = BS_EXCP;
-                    goto done_generating;
-                }
-            }
-        }
+        tcg_gen_insn_start(ctx.pc);
+        num_insns++;
 
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = ctx.pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
+            tcg_gen_movi_i32(cpu_pc, ctx.pc);
+            gen_helper_debug(cpu_env);
+            ctx.bstate = BS_EXCP;
+            goto done_generating;
         }
+
         ctx.opcode = cpu_lduw_code(env, ctx.pc);
         ctx.pc += decode_opc(cpu, &ctx);
-        num_insns++;
 
+        if (num_insns >= max_insns) {
+            break;
+        }
         if (cs->singlestep_enabled) {
             break;
         }
-
         if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0) {
             break;
         }
@@ -898,29 +885,12 @@ gen_intermediate_code_internal(MoxieCPU *cpu, TranslationBlock *tb,
  done_generating:
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = ctx.pc - pc_start;
-        tb->icount = num_insns;
-    }
-}
-
-void gen_intermediate_code(CPUMoxieState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(moxie_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUMoxieState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(moxie_env_get_cpu(env), tb, true);
+    tb->size = ctx.pc - pc_start;
+    tb->icount = num_insns;
 }
 
-void restore_state_to_opc(CPUMoxieState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUMoxieState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/target-openrisc/cpu.h b/target-openrisc/cpu.h
index 1ff1c9ec2a..eb71607395 100644
--- a/target-openrisc/cpu.h
+++ b/target-openrisc/cpu.h
@@ -360,7 +360,6 @@ int cpu_openrisc_signal_handler(int host_signum, void *pinfo, void *puc);
 
 #define cpu_list cpu_openrisc_list
 #define cpu_exec cpu_openrisc_exec
-#define cpu_gen_code cpu_openrisc_gen_code
 #define cpu_signal_handler cpu_openrisc_signal_handler
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 473556e14a..b66fde18fe 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1618,30 +1618,12 @@ static void disas_openrisc_insn(DisasContext *dc, OpenRISCCPU *cpu)
     }
 }
 
-static void check_breakpoint(OpenRISCCPU *cpu, DisasContext *dc)
-{
-    CPUState *cs = CPU(cpu);
-    CPUBreakpoint *bp;
-
-    if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == dc->pc) {
-                tcg_gen_movi_tl(cpu_pc, dc->pc);
-                gen_exception(dc, EXCP_DEBUG);
-                dc->is_jmp = DISAS_UPDATE;
-            }
-        }
-    }
-}
-
-static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  int search_pc)
+void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
 {
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
     struct DisasContext ctx, *dc = &ctx;
     uint32_t pc_start;
-    int j, k;
     uint32_t next_page_start;
     int num_insns;
     int max_insns;
@@ -1663,36 +1645,30 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     }
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    k = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
 
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
 
     do {
-        check_breakpoint(cpu, dc);
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (k < j) {
-                k++;
-                while (k < j) {
-                    tcg_ctx.gen_opc_instr_start[k++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[k] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[k] = 1;
-            tcg_ctx.gen_opc_icount[k] = num_insns;
-        }
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(dc->pc);
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            tcg_gen_movi_tl(cpu_pc, dc->pc);
+            gen_exception(dc, EXCP_DEBUG);
+            dc->is_jmp = DISAS_UPDATE;
+            break;
         }
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
         dc->ppc = dc->pc - 4;
@@ -1701,7 +1677,6 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
         tcg_gen_movi_tl(cpu_npc, dc->npc);
         disas_openrisc_insn(dc, cpu);
         dc->pc = dc->npc;
-        num_insns++;
         /* delay slot */
         if (dc->delayed_branch) {
             dc->delayed_branch--;
@@ -1756,16 +1731,8 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
 
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        k++;
-        while (k <= j) {
-            tcg_ctx.gen_opc_instr_start[k++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -1777,17 +1744,6 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
 #endif
 }
 
-void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(openrisc_env_get_cpu(env), tb, 0);
-}
-
-void gen_intermediate_code_pc(CPUOpenRISCState *env,
-                              struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(openrisc_env_get_cpu(env), tb, 1);
-}
-
 void openrisc_cpu_dump_state(CPUState *cs, FILE *f,
                              fprintf_function cpu_fprintf,
                              int flags)
@@ -1804,7 +1760,7 @@ void openrisc_cpu_dump_state(CPUState *cs, FILE *f,
 }
 
 void restore_state_to_opc(CPUOpenRISCState *env, TranslationBlock *tb,
-                          int pc_pos)
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index c6dbb38fea..98ce5a7ab0 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1241,7 +1241,6 @@ int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val);
 #define cpu_init(cpu_model) CPU(cpu_ppc_init(cpu_model))
 
 #define cpu_exec cpu_ppc_exec
-#define cpu_gen_code cpu_ppc_gen_code
 #define cpu_signal_handler cpu_ppc_signal_handler
 #define cpu_list ppc_cpu_list
 
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index c0eed13f98..c2bc1a7ec6 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -11409,17 +11409,13 @@ void ppc_cpu_dump_statistics(CPUState *cs, FILE*f,
 }
 
 /*****************************************************************************/
-static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUPPCState *env = &cpu->env;
     DisasContext ctx, *ctxp = &ctx;
     opc_handler_t **table, *handler;
     target_ulong pc_start;
-    CPUBreakpoint *bp;
-    int j, lj = -1;
     int num_insns;
     int max_insns;
 
@@ -11476,36 +11472,29 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
 #endif
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     tcg_clear_temp_count();
     /* Set env in case of segfault during code fetch */
     while (ctx.exception == POWERPC_EXCP_NONE && !tcg_op_buf_full()) {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == ctx.nip) {
-                    gen_debug_exception(ctxp);
-                    break;
-                }
-            }
-        }
-        if (unlikely(search_pc)) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = ctx.nip;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        tcg_gen_insn_start(ctx.nip);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, ctx.nip, BP_ANY))) {
+            gen_debug_exception(ctxp);
+            break;
         }
+
         LOG_DISAS("----------------\n");
         LOG_DISAS("nip=" TARGET_FMT_lx " super=%d ir=%d\n",
                   ctx.nip, ctx.mem_idx, (int)msr_ir);
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO))
             gen_io_start();
         if (unlikely(need_byteswap(&ctx))) {
             ctx.opcode = bswap32(cpu_ldl_code(env, ctx.nip));
@@ -11515,12 +11504,8 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
         LOG_DISAS("translate opcode %08x (%02x %02x %02x) (%s)\n",
                     ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
                     opc3(ctx.opcode), ctx.le_mode ? "little" : "big");
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(ctx.nip);
-        }
         ctx.nip += 4;
         table = env->opcodes;
-        num_insns++;
         handler = table[opc1(ctx.opcode)];
         if (is_indirect_opcode(handler)) {
             table = ind_table(handler);
@@ -11599,15 +11584,9 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
     }
     gen_tb_end(tb, num_insns);
 
-    if (unlikely(search_pc)) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    } else {
-        tb->size = ctx.nip - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.nip - pc_start;
+    tb->icount = num_insns;
+
 #if defined(DEBUG_DISAS)
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         int flags;
@@ -11620,17 +11599,8 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
 #endif
 }
 
-void gen_intermediate_code (CPUPPCState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(ppc_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUPPCState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(ppc_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->nip = tcg_ctx.gen_opc_pc[pc_pos];
+    env->nip = data[0];
 }
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 5acd54c6ca..e4de8632f3 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -42,6 +42,7 @@
 #include "fpu/softfloat.h"
 
 #define NB_MMU_MODES 3
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 #define MMU_MODE0_SUFFIX _primary
 #define MMU_MODE1_SUFFIX _secondary
@@ -568,7 +569,7 @@ void cpu_unlock(void);
 typedef struct SubchDev SubchDev;
 
 #ifndef CONFIG_USER_ONLY
-extern void io_subsystem_reset(void);
+extern void subsystem_reset(void);
 SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
                          uint16_t schid);
 bool css_subch_visible(SubchDev *sch);
@@ -597,7 +598,6 @@ bool css_present(uint8_t cssid);
 
 #define cpu_init(model) CPU(cpu_s390x_init(model))
 #define cpu_exec cpu_s390x_exec
-#define cpu_gen_code cpu_s390x_gen_code
 #define cpu_signal_handler cpu_s390x_signal_handler
 
 void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
diff --git a/target-s390x/gdbstub.c b/target-s390x/gdbstub.c
index 0c39a3c69f..a05d1cd7ab 100644
--- a/target-s390x/gdbstub.c
+++ b/target-s390x/gdbstub.c
@@ -205,6 +205,82 @@ static int cpu_write_c_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
         return 0;
     }
 }
+
+/* the values represent the positions in s390-virt.xml */
+#define S390_VIRT_CKC_REGNUM    0
+#define S390_VIRT_CPUTM_REGNUM  1
+#define S390_VIRT_BEA_REGNUM    2
+#define S390_VIRT_PREFIX_REGNUM 3
+#define S390_VIRT_PP_REGNUM     4
+#define S390_VIRT_PFT_REGNUM    5
+#define S390_VIRT_PFS_REGNUM    6
+#define S390_VIRT_PFC_REGNUM    7
+/* total number of registers in s390-virt.xml */
+#define S390_NUM_VIRT_REGS 8
+
+static int cpu_read_virt_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
+{
+    switch (n) {
+    case S390_VIRT_CKC_REGNUM:
+        return gdb_get_regl(mem_buf, env->ckc);
+    case S390_VIRT_CPUTM_REGNUM:
+        return gdb_get_regl(mem_buf, env->cputm);
+    case S390_VIRT_BEA_REGNUM:
+        return gdb_get_regl(mem_buf, env->gbea);
+    case S390_VIRT_PREFIX_REGNUM:
+        return gdb_get_regl(mem_buf, env->psa);
+    case S390_VIRT_PP_REGNUM:
+        return gdb_get_regl(mem_buf, env->pp);
+    case S390_VIRT_PFT_REGNUM:
+        return gdb_get_regl(mem_buf, env->pfault_token);
+    case S390_VIRT_PFS_REGNUM:
+        return gdb_get_regl(mem_buf, env->pfault_select);
+    case S390_VIRT_PFC_REGNUM:
+        return gdb_get_regl(mem_buf, env->pfault_compare);
+    default:
+        return 0;
+    }
+}
+
+static int cpu_write_virt_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
+{
+    switch (n) {
+    case S390_VIRT_CKC_REGNUM:
+        env->ckc = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_CPUTM_REGNUM:
+        env->cputm = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_BEA_REGNUM:
+        env->gbea = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_PREFIX_REGNUM:
+        env->psa = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_PP_REGNUM:
+        env->pp = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_PFT_REGNUM:
+        env->pfault_token = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_PFS_REGNUM:
+        env->pfault_select = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    case S390_VIRT_PFC_REGNUM:
+        env->pfault_compare = ldtul_p(mem_buf);
+        cpu_synchronize_post_init(ENV_GET_CPU(env));
+        return 8;
+    default:
+        return 0;
+    }
+}
 #endif
 
 void s390_cpu_gdb_init(CPUState *cs)
@@ -225,5 +301,11 @@ void s390_cpu_gdb_init(CPUState *cs)
     gdb_register_coprocessor(cs, cpu_read_c_reg,
                              cpu_write_c_reg,
                              S390_NUM_C_REGS, "s390-cr.xml", 0);
+
+    if (kvm_enabled()) {
+        gdb_register_coprocessor(cs, cpu_read_virt_reg,
+                                 cpu_write_virt_reg,
+                                 S390_NUM_VIRT_REGS, "s390-virt.xml", 0);
+    }
 #endif
 }
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index 8eac0e12b9..3a19e321c8 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -128,7 +128,7 @@ static int modified_clear_reset(S390CPU *cpu)
         run_on_cpu(t, s390_do_cpu_full_reset, t);
     }
     cmma_reset(cpu);
-    io_subsystem_reset();
+    subsystem_reset();
     scc->load_normal(CPU(cpu));
     cpu_synchronize_all_post_reset();
     resume_all_vcpus();
@@ -146,7 +146,7 @@ static int load_normal_reset(S390CPU *cpu)
         run_on_cpu(t, s390_do_cpu_reset, t);
     }
     cmma_reset(cpu);
-    io_subsystem_reset();
+    subsystem_reset();
     scc->initial_cpu_reset(CPU(cpu));
     scc->load_normal(CPU(cpu));
     cpu_synchronize_all_post_reset();
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 2bca33acca..05d51fe84a 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -124,7 +124,7 @@ void s390_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
     for (i = 0; i < 32; i++) {
         cpu_fprintf(f, "V%02d=%016" PRIx64 "%016" PRIx64, i,
                     env->vregs[i][0].ll, env->vregs[i][1].ll);
-        cpu_fprintf(f, (i % 2) ? " " : "\n");
+        cpu_fprintf(f, (i % 2) ? "\n" : " ");
     }
 
 #ifndef CONFIG_USER_ONLY
@@ -161,8 +161,6 @@ static char cpu_reg_names[32][4];
 static TCGv_i64 regs[16];
 static TCGv_i64 fregs[16];
 
-static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
-
 void s390x_translate_init(void)
 {
     int i;
@@ -5319,18 +5317,14 @@ static ExitStatus translate_one(CPUS390XState *env, DisasContext *s)
     return ret;
 }
 
-static inline void gen_intermediate_code_internal(S390CPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
 {
+    S390CPU *cpu = s390_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUS390XState *env = &cpu->env;
     DisasContext dc;
     target_ulong pc_start;
     uint64_t next_page_start;
-    int j, lj = -1;
     int num_insns, max_insns;
-    CPUBreakpoint *bp;
     ExitStatus status;
     bool do_debug;
 
@@ -5353,41 +5347,27 @@ static inline void gen_intermediate_code_internal(S390CPU *cpu,
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
 
     do {
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc.pc;
-            gen_opc_cc_op[lj] = dc.cc_op;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
-        if (++num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
-            gen_io_start();
+        tcg_gen_insn_start(dc.pc, dc.cc_op);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc.pc, BP_ANY))) {
+            status = EXIT_PC_STALE;
+            do_debug = true;
+            break;
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(dc.pc);
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
+            gen_io_start();
         }
 
         status = NO_EXIT;
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == dc.pc) {
-                    status = EXIT_PC_STALE;
-                    do_debug = true;
-                    break;
-                }
-            }
-        }
         if (status == NO_EXIT) {
             status = translate_one(env, &dc);
         }
@@ -5432,16 +5412,8 @@ static inline void gen_intermediate_code_internal(S390CPU *cpu,
 
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc.pc - pc_start;
+    tb->icount = num_insns;
 
 #if defined(S390X_DEBUG_DISAS)
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -5452,21 +5424,11 @@ static inline void gen_intermediate_code_internal(S390CPU *cpu,
 #endif
 }
 
-void gen_intermediate_code (CPUS390XState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(s390_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc (CPUS390XState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(s390_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    int cc_op;
-    env->psw.addr = tcg_ctx.gen_opc_pc[pc_pos];
-    cc_op = gen_opc_cc_op[pc_pos];
+    int cc_op = data[1];
+    env->psw.addr = data[0];
     if ((cc_op != CC_OP_DYNAMIC) && (cc_op != CC_OP_STATIC)) {
         env->cc_op = cc_op;
     }
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index 6fb63215ef..5b022c5973 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -120,6 +120,7 @@ typedef struct tlb_t {
 #define ITLB_SIZE 4
 
 #define NB_MMU_MODES 2
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 enum sh_features {
     SH_FEATURE_SH4A = 1,
@@ -225,7 +226,6 @@ void cpu_load_tlb(CPUSH4State * env);
 #define cpu_init(cpu_model) CPU(cpu_sh4_init(cpu_model))
 
 #define cpu_exec cpu_sh4_exec
-#define cpu_gen_code cpu_sh4_gen_code
 #define cpu_signal_handler cpu_sh4_signal_handler
 #define cpu_list sh4_cpu_list
 
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 724c0e7106..f764bc2539 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -70,8 +70,6 @@ static TCGv cpu_fregs[32];
 /* internal register indexes */
 static TCGv cpu_flags, cpu_delayed_pc;
 
-static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
-
 #include "exec/gen-icount.h"
 
 void sh4_translate_init(void)
@@ -1790,10 +1788,6 @@ static void decode_opc(DisasContext * ctx)
 {
     uint32_t old_flags = ctx->flags;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(ctx->pc);
-    }
-
     _decode_opc(ctx);
 
     if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
@@ -1820,16 +1814,12 @@ static void decode_opc(DisasContext * ctx)
         gen_store_flags(ctx->flags);
 }
 
-static inline void
-gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
-                               bool search_pc)
+void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
 {
+    SuperHCPU *cpu = sh_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUSH4State *env = &cpu->env;
     DisasContext ctx;
     target_ulong pc_start;
-    CPUBreakpoint *bp;
-    int i, ii;
     int num_insns;
     int max_insns;
 
@@ -1846,45 +1836,34 @@ gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
     ctx.features = env->features;
     ctx.has_movcal = (ctx.flags & TB_FLAG_PENDING_MOVCA);
 
-    ii = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
+
     gen_tb_start(tb);
     while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (ctx.pc == bp->pc) {
-		    /* We have hit a breakpoint - make sure PC is up-to-date */
-		    tcg_gen_movi_i32(cpu_pc, ctx.pc);
-                    gen_helper_debug(cpu_env);
-                    ctx.bstate = BS_BRANCH;
-		    break;
-		}
-	    }
-	}
-        if (search_pc) {
-            i = tcg_op_buf_count();
-            if (ii < i) {
-                ii++;
-                while (ii < i)
-                    tcg_ctx.gen_opc_instr_start[ii++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[ii] = ctx.pc;
-            gen_opc_hflags[ii] = ctx.flags;
-            tcg_ctx.gen_opc_instr_start[ii] = 1;
-            tcg_ctx.gen_opc_icount[ii] = num_insns;
+        tcg_gen_insn_start(ctx.pc, ctx.flags);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
+            /* We have hit a breakpoint - make sure PC is up-to-date */
+            tcg_gen_movi_i32(cpu_pc, ctx.pc);
+            gen_helper_debug(cpu_env);
+            ctx.bstate = BS_BRANCH;
+            break;
         }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
-#if 0
-	fprintf(stderr, "Loading opcode at address 0x%08x\n", ctx.pc);
-	fflush(stderr);
-#endif
+        }
+
         ctx.opcode = cpu_lduw_code(env, ctx.pc);
 	decode_opc(&ctx);
-        num_insns++;
 	ctx.pc += 2;
 	if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
 	    break;
@@ -1924,15 +1903,8 @@ gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
 
     gen_tb_end(tb, num_insns);
 
-    if (search_pc) {
-        i = tcg_op_buf_count();
-        ii++;
-        while (ii <= i)
-            tcg_ctx.gen_opc_instr_start[ii++] = 0;
-    } else {
-        tb->size = ctx.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.pc - pc_start;
+    tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -1943,18 +1915,9 @@ gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
 #endif
 }
 
-void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(sh_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUSH4State * env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(sh_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
-    env->flags = gen_opc_hflags[pc_pos];
+    env->pc = data[0];
+    env->flags = data[1];
 }
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 053edd5ed1..9fa770b144 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -230,6 +230,7 @@ typedef struct trap_state {
     uint32_t tt;
 } trap_state;
 #endif
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 typedef struct sparc_def_t {
     const char *name;
@@ -592,7 +593,6 @@ int cpu_sparc_signal_handler(int host_signum, void *pinfo, void *puc);
 #endif
 
 #define cpu_exec cpu_sparc_exec
-#define cpu_gen_code cpu_sparc_gen_code
 #define cpu_signal_handler cpu_sparc_signal_handler
 #define cpu_list sparc_cpu_list
 
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 4690b4699a..b59742ad2e 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -64,9 +64,6 @@ static TCGv cpu_wim;
 /* Floating point registers */
 static TCGv_i64 cpu_fpr[TARGET_DPREGS];
 
-static target_ulong gen_opc_npc[OPC_BUF_SIZE];
-static target_ulong gen_opc_jump_pc[2];
-
 #include "exec/gen-icount.h"
 
 typedef struct DisasContext {
@@ -955,17 +952,44 @@ static inline void gen_branch2(DisasContext *dc, target_ulong pc1,
     gen_goto_tb(dc, 1, pc2, pc2 + 4);
 }
 
-static inline void gen_branch_a(DisasContext *dc, target_ulong pc1,
-                                target_ulong pc2, TCGv r_cond)
+static void gen_branch_a(DisasContext *dc, target_ulong pc1)
 {
     TCGLabel *l1 = gen_new_label();
+    target_ulong npc = dc->npc;
 
-    tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond, 0, l1);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cond, 0, l1);
 
-    gen_goto_tb(dc, 0, pc2, pc1);
+    gen_goto_tb(dc, 0, npc, pc1);
 
     gen_set_label(l1);
-    gen_goto_tb(dc, 1, pc2 + 4, pc2 + 8);
+    gen_goto_tb(dc, 1, npc + 4, npc + 8);
+
+    dc->is_br = 1;
+}
+
+static void gen_branch_n(DisasContext *dc, target_ulong pc1)
+{
+    target_ulong npc = dc->npc;
+
+    if (likely(npc != DYNAMIC_PC)) {
+        dc->pc = npc;
+        dc->jump_pc[0] = pc1;
+        dc->jump_pc[1] = npc + 4;
+        dc->npc = JUMP_PC;
+    } else {
+        TCGv t, z;
+
+        tcg_gen_mov_tl(cpu_pc, cpu_npc);
+
+        tcg_gen_addi_tl(cpu_npc, cpu_npc, 4);
+        t = tcg_const_tl(pc1);
+        z = tcg_const_tl(0);
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_npc, cpu_cond, z, t, cpu_npc);
+        tcg_temp_free(t);
+        tcg_temp_free(z);
+
+        dc->pc = DYNAMIC_PC;
+    }
 }
 
 static inline void gen_generic_branch(DisasContext *dc)
@@ -1398,18 +1422,9 @@ static void do_branch(DisasContext *dc, int32_t offset, uint32_t insn, int cc)
         flush_cond(dc);
         gen_cond(cpu_cond, cc, cond, dc);
         if (a) {
-            gen_branch_a(dc, target, dc->npc, cpu_cond);
-            dc->is_br = 1;
+            gen_branch_a(dc, target);
         } else {
-            dc->pc = dc->npc;
-            dc->jump_pc[0] = target;
-            if (unlikely(dc->npc == DYNAMIC_PC)) {
-                dc->jump_pc[1] = DYNAMIC_PC;
-                tcg_gen_addi_tl(cpu_pc, cpu_npc, 4);
-            } else {
-                dc->jump_pc[1] = dc->npc + 4;
-                dc->npc = JUMP_PC;
-            }
+            gen_branch_n(dc, target);
         }
     }
 }
@@ -1447,18 +1462,9 @@ static void do_fbranch(DisasContext *dc, int32_t offset, uint32_t insn, int cc)
         flush_cond(dc);
         gen_fcond(cpu_cond, cc, cond);
         if (a) {
-            gen_branch_a(dc, target, dc->npc, cpu_cond);
-            dc->is_br = 1;
+            gen_branch_a(dc, target);
         } else {
-            dc->pc = dc->npc;
-            dc->jump_pc[0] = target;
-            if (unlikely(dc->npc == DYNAMIC_PC)) {
-                dc->jump_pc[1] = DYNAMIC_PC;
-                tcg_gen_addi_tl(cpu_pc, cpu_npc, 4);
-            } else {
-                dc->jump_pc[1] = dc->npc + 4;
-                dc->npc = JUMP_PC;
-            }
+            gen_branch_n(dc, target);
         }
     }
 }
@@ -1476,18 +1482,9 @@ static void do_branch_reg(DisasContext *dc, int32_t offset, uint32_t insn,
     flush_cond(dc);
     gen_cond_reg(cpu_cond, cond, r_reg);
     if (a) {
-        gen_branch_a(dc, target, dc->npc, cpu_cond);
-        dc->is_br = 1;
+        gen_branch_a(dc, target);
     } else {
-        dc->pc = dc->npc;
-        dc->jump_pc[0] = target;
-        if (unlikely(dc->npc == DYNAMIC_PC)) {
-            dc->jump_pc[1] = DYNAMIC_PC;
-            tcg_gen_addi_tl(cpu_pc, cpu_npc, 4);
-        } else {
-            dc->jump_pc[1] = dc->npc + 4;
-            dc->npc = JUMP_PC;
-        }
+        gen_branch_n(dc, target);
     }
 }
 
@@ -2482,10 +2479,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     TCGv_i64 cpu_src1_64, cpu_src2_64, cpu_dst_64;
     target_long simm;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(dc->pc);
-    }
-
     opc = GET_FIELD(insn, 0, 1);
     rd = GET_FIELD(insn, 2, 6);
 
@@ -5213,16 +5206,12 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     }
 }
 
-static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool spc)
+void gen_intermediate_code(CPUSPARCState * env, TranslationBlock * tb)
 {
+    SPARCCPU *cpu = sparc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUSPARCState *env = &cpu->env;
     target_ulong pc_start, last_pc;
     DisasContext dc1, *dc = &dc1;
-    CPUBreakpoint *bp;
-    int j, lj = -1;
     int num_insns;
     int max_insns;
     unsigned int insn;
@@ -5242,42 +5231,41 @@ static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
 
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
+
     gen_tb_start(tb);
     do {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == dc->pc) {
-                    if (dc->pc != pc_start)
-                        save_state(dc);
-                    gen_helper_debug(cpu_env);
-                    tcg_gen_exit_tb(0);
-                    dc->is_br = 1;
-                    goto exit_gen_loop;
-                }
-            }
+        if (dc->npc & JUMP_PC) {
+            assert(dc->jump_pc[1] == dc->pc + 4);
+            tcg_gen_insn_start(dc->pc, dc->jump_pc[0] | JUMP_PC);
+        } else {
+            tcg_gen_insn_start(dc->pc, dc->npc);
         }
-        if (spc) {
-            qemu_log("Search PC...\n");
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                tcg_ctx.gen_opc_pc[lj] = dc->pc;
-                gen_opc_npc[lj] = dc->npc;
-                tcg_ctx.gen_opc_instr_start[lj] = 1;
-                tcg_ctx.gen_opc_icount[lj] = num_insns;
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            if (dc->pc != pc_start) {
+                save_state(dc);
             }
+            gen_helper_debug(cpu_env);
+            tcg_gen_exit_tb(0);
+            dc->is_br = 1;
+            goto exit_gen_loop;
         }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
+
         last_pc = dc->pc;
         insn = cpu_ldl_code(env, dc->pc);
 
         disas_sparc_insn(dc, insn);
-        num_insns++;
 
         if (dc->is_br)
             break;
@@ -5316,20 +5304,9 @@ static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
     }
     gen_tb_end(tb, num_insns);
 
-    if (spc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-#if 0
-        log_page_dump();
-#endif
-        gen_opc_jump_pc[0] = dc->jump_pc[0];
-        gen_opc_jump_pc[1] = dc->jump_pc[1];
-    } else {
-        tb->size = last_pc + 4 - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = last_pc + 4 - pc_start;
+    tb->icount = num_insns;
+
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         qemu_log("--------------\n");
@@ -5340,16 +5317,6 @@ static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
 #endif
 }
 
-void gen_intermediate_code(CPUSPARCState * env, TranslationBlock * tb)
-{
-    gen_intermediate_code_internal(sparc_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUSPARCState * env, TranslationBlock * tb)
-{
-    gen_intermediate_code_internal(sparc_env_get_cpu(env), tb, true);
-}
-
 void gen_intermediate_code_init(CPUSPARCState *env)
 {
     unsigned int i;
@@ -5451,19 +5418,21 @@ void gen_intermediate_code_init(CPUSPARCState *env)
     }
 }
 
-void restore_state_to_opc(CPUSPARCState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUSPARCState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    target_ulong npc;
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
-    npc = gen_opc_npc[pc_pos];
-    if (npc == 1) {
+    target_ulong pc = data[0];
+    target_ulong npc = data[1];
+
+    env->pc = pc;
+    if (npc == DYNAMIC_PC) {
         /* dynamic NPC: already stored */
-    } else if (npc == 2) {
+    } else if (npc & JUMP_PC) {
         /* jump PC: use 'cond' and the jump targets of the translation */
         if (env->cond) {
-            env->npc = gen_opc_jump_pc[0];
+            env->npc = npc & ~3;
         } else {
-            env->npc = gen_opc_jump_pc[1];
+            env->npc = pc + 4;
         }
     } else {
         env->npc = npc;
diff --git a/target-tilegx/cpu.c b/target-tilegx/cpu.c
index 78b73e45c4..3c5481d443 100644
--- a/target-tilegx/cpu.c
+++ b/target-tilegx/cpu.c
@@ -22,6 +22,7 @@
 #include "qemu-common.h"
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
+#include "linux-user/syscall_defs.h"
 
 static void tilegx_cpu_dump_state(CPUState *cs, FILE *f,
                                   fprintf_function cpu_fprintf, int flags)
@@ -121,8 +122,12 @@ static int tilegx_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int rw,
 {
     TileGXCPU *cpu = TILEGX_CPU(cs);
 
-    cs->exception_index = TILEGX_EXCP_SEGV;
+    /* The sigcode field will be filled in by do_signal in main.c.  */
+    cs->exception_index = TILEGX_EXCP_SIGNAL;
     cpu->env.excaddr = address;
+    cpu->env.signo = TARGET_SIGSEGV;
+    cpu->env.sigcode = 0;
+
     return 1;
 }
 
diff --git a/target-tilegx/cpu.h b/target-tilegx/cpu.h
index b9f5082b95..03df107a67 100644
--- a/target-tilegx/cpu.h
+++ b/target-tilegx/cpu.h
@@ -53,6 +53,8 @@ enum {
     TILEGX_SPR_CMPEXCH = 0,
     TILEGX_SPR_CRITICAL_SEC = 1,
     TILEGX_SPR_SIM_CONTROL = 2,
+    TILEGX_SPR_EX_CONTEXT_0_0 = 3,
+    TILEGX_SPR_EX_CONTEXT_0_1 = 4,
     TILEGX_SPR_COUNT
 };
 
@@ -60,7 +62,7 @@ enum {
 typedef enum {
     TILEGX_EXCP_NONE = 0,
     TILEGX_EXCP_SYSCALL = 1,
-    TILEGX_EXCP_SEGV = 2,
+    TILEGX_EXCP_SIGNAL = 2,
     TILEGX_EXCP_OPCODE_UNKNOWN = 0x101,
     TILEGX_EXCP_OPCODE_UNIMPLEMENTED = 0x102,
     TILEGX_EXCP_OPCODE_CMPEXCH = 0x103,
@@ -87,10 +89,12 @@ typedef struct CPUTLGState {
     uint64_t pc;                       /* Current pc */
 
 #if defined(CONFIG_USER_ONLY)
+    uint64_t excaddr;                  /* exception address */
     uint64_t atomic_srca;              /* Arguments to atomic "exceptions" */
     uint64_t atomic_srcb;
     uint32_t atomic_dstr;
-    uint64_t excaddr;                  /* exception address */
+    uint32_t signo;                    /* Signal number */
+    uint32_t sigcode;                  /* Signal code */
 #endif
 
     CPU_COMMON
@@ -163,7 +167,6 @@ TileGXCPU *cpu_tilegx_init(const char *cpu_model);
 #define cpu_init(cpu_model) CPU(cpu_tilegx_init(cpu_model))
 
 #define cpu_exec cpu_tilegx_exec
-#define cpu_gen_code cpu_tilegx_gen_code
 #define cpu_signal_handler cpu_tilegx_signal_handler
 
 static inline void cpu_get_tb_cpu_state(CPUTLGState *env, target_ulong *pc,
diff --git a/target-tilegx/helper.c b/target-tilegx/helper.c
index a01bb8d513..dda821f5cb 100644
--- a/target-tilegx/helper.c
+++ b/target-tilegx/helper.c
@@ -21,6 +21,8 @@
 #include "cpu.h"
 #include "qemu-common.h"
 #include "exec/helper-proto.h"
+#include <zlib.h> /* For crc32 */
+#include "syscall_defs.h"
 
 void helper_exception(CPUTLGState *env, uint32_t excp)
 {
@@ -30,6 +32,27 @@ void helper_exception(CPUTLGState *env, uint32_t excp)
     cpu_loop_exit(cs);
 }
 
+void helper_ext01_ics(CPUTLGState *env)
+{
+    uint64_t val = env->spregs[TILEGX_SPR_EX_CONTEXT_0_1];
+
+    switch (val) {
+    case 0:
+    case 1:
+        env->spregs[TILEGX_SPR_CRITICAL_SEC] = val;
+        break;
+    default:
+#if defined(CONFIG_USER_ONLY)
+        env->signo = TARGET_SIGILL;
+        env->sigcode = TARGET_ILL_ILLOPC;
+        helper_exception(env, TILEGX_EXCP_SIGNAL);
+#else
+        helper_exception(env, TILEGX_EXCP_OPCODE_UNIMPLEMENTED);
+#endif
+        break;
+    }
+}
+
 uint64_t helper_cntlz(uint64_t arg)
 {
     return clz64(arg);
@@ -78,3 +101,61 @@ uint64_t helper_shufflebytes(uint64_t dest, uint64_t srca, uint64_t srcb)
 
     return vdst;
 }
+
+uint64_t helper_crc32_8(uint64_t accum, uint64_t input)
+{
+    uint8_t buf = input;
+
+    /* zlib crc32 converts the accumulator and output to one's complement.  */
+    return crc32(accum ^ 0xffffffff, &buf, 1) ^ 0xffffffff;
+}
+
+uint64_t helper_crc32_32(uint64_t accum, uint64_t input)
+{
+    uint8_t buf[4];
+
+    stl_le_p(buf, input);
+
+    /* zlib crc32 converts the accumulator and output to one's complement.  */
+    return crc32(accum ^ 0xffffffff, buf, 4) ^ 0xffffffff;
+}
+
+uint64_t helper_cmula(uint64_t srcd, uint64_t srca, uint64_t srcb)
+{
+    uint32_t reala = (int16_t)srca;
+    uint32_t imaga = (int16_t)(srca >> 16);
+    uint32_t realb = (int16_t)srcb;
+    uint32_t imagb = (int16_t)(srcb >> 16);
+    uint32_t reald = srcd;
+    uint32_t imagd = srcd >> 32;
+    uint32_t realr = reala * realb - imaga * imagb + reald;
+    uint32_t imagr = reala * imagb + imaga * realb + imagd;
+
+    return deposit64(realr, 32, 32, imagr);
+}
+
+uint64_t helper_cmulaf(uint64_t srcd, uint64_t srca, uint64_t srcb)
+{
+    uint32_t reala = (int16_t)srca;
+    uint32_t imaga = (int16_t)(srca >> 16);
+    uint32_t realb = (int16_t)srcb;
+    uint32_t imagb = (int16_t)(srcb >> 16);
+    uint32_t reald = (int16_t)srcd;
+    uint32_t imagd = (int16_t)(srcd >> 16);
+    int32_t realr = reala * realb - imaga * imagb;
+    int32_t imagr = reala * imagb + imaga * realb;
+
+    return deposit32((realr >> 15) + reald, 16, 16, (imagr >> 15) + imagd);
+}
+
+uint64_t helper_cmul2(uint64_t srca, uint64_t srcb, int shift, int round)
+{
+    uint32_t reala = (int16_t)srca;
+    uint32_t imaga = (int16_t)(srca >> 16);
+    uint32_t realb = (int16_t)srcb;
+    uint32_t imagb = (int16_t)(srcb >> 16);
+    int32_t realr = reala * realb - imaga * imagb + round;
+    int32_t imagr = reala * imagb + imaga * realb + round;
+
+    return deposit32(realr >> shift, 16, 16, imagr >> shift);
+}
diff --git a/target-tilegx/helper.h b/target-tilegx/helper.h
index 766f5f2f9c..9281d0f428 100644
--- a/target-tilegx/helper.h
+++ b/target-tilegx/helper.h
@@ -1,10 +1,26 @@
 DEF_HELPER_2(exception, noreturn, env, i32)
+DEF_HELPER_1(ext01_ics, void, env)
 DEF_HELPER_FLAGS_1(cntlz, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(cnttz, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(pcnt, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(revbits, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(crc32_8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(crc32_32, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_3(cmula, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_3(cmulaf, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_4(cmul2, TCG_CALL_NO_RWG_SE, i64, i64, i64, int, int)
 
+DEF_HELPER_FLAGS_2(v1int_h, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v1int_l, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2int_h, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2int_l, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(v1multu, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2mults, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(v1shl, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(v1shru, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(v1shrs, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2shl, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2shru, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2shrs, TCG_CALL_NO_RWG_SE, i64, i64, i64)
diff --git a/target-tilegx/simd_helper.c b/target-tilegx/simd_helper.c
index b9319292f3..6d7bb5ce2b 100644
--- a/target-tilegx/simd_helper.c
+++ b/target-tilegx/simd_helper.c
@@ -23,12 +23,54 @@
 #include "exec/helper-proto.h"
 
 
+/* Broadcast a value to all elements of a vector.  */
+#define V1(X)      (((X) & 0xff) * 0x0101010101010101ull)
+#define V2(X)      (((X) & 0xffff) * 0x0001000100010001ull)
+
+
+uint64_t helper_v1multu(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 64; i += 8) {
+        unsigned ae = extract64(a, i, 8);
+        unsigned be = extract64(b, i, 8);
+        r = deposit64(r, i, 8, ae * be);
+    }
+    return r;
+}
+
+uint64_t helper_v2mults(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    /* While the instruction talks about signed inputs, with a
+       truncated result the sign of the inputs doesn't matter.  */
+    for (i = 0; i < 64; i += 16) {
+        unsigned ae = extract64(a, i, 16);
+        unsigned be = extract64(b, i, 16);
+        r = deposit64(r, i, 16, ae * be);
+    }
+    return r;
+}
+
 uint64_t helper_v1shl(uint64_t a, uint64_t b)
 {
     uint64_t m;
 
     b &= 7;
-    m = 0x0101010101010101ULL * (0xff >> b);
+    m = V1(0xff >> b);
+    return (a & m) << b;
+}
+
+uint64_t helper_v2shl(uint64_t a, uint64_t b)
+{
+    uint64_t m;
+
+    b &= 15;
+    m = V2(0xffff >> b);
     return (a & m) << b;
 }
 
@@ -37,7 +79,16 @@ uint64_t helper_v1shru(uint64_t a, uint64_t b)
     uint64_t m;
 
     b &= 7;
-    m = 0x0101010101010101ULL * ((0xff << b) & 0xff);
+    m = V1(0xff << b);
+    return (a & m) >> b;
+}
+
+uint64_t helper_v2shru(uint64_t a, uint64_t b)
+{
+    uint64_t m;
+
+    b &= 15;
+    m = V2(0xffff << b);
     return (a & m) >> b;
 }
 
@@ -48,8 +99,67 @@ uint64_t helper_v1shrs(uint64_t a, uint64_t b)
 
     b &= 7;
     for (i = 0; i < 64; i += 8) {
-        int64_t ae = (int8_t)(a >> i);
-        r |= ((ae >> b) & 0xff) << i;
+        r = deposit64(r, i, 8, sextract64(a, i + b, 8 - b));
+    }
+    return r;
+}
+
+uint64_t helper_v2shrs(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    b &= 15;
+    for (i = 0; i < 64; i += 16) {
+        r = deposit64(r, i, 16, sextract64(a, i + b, 16 - b));
+    }
+    return r;
+}
+
+uint64_t helper_v1int_h(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r = deposit64(r, 2 * i + 8, 8, extract64(a, i + 32, 8));
+        r = deposit64(r, 2 * i, 8, extract64(b, i + 32, 8));
+    }
+    return r;
+}
+
+uint64_t helper_v1int_l(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r = deposit64(r, 2 * i + 8, 8, extract64(a, i, 8));
+        r = deposit64(r, 2 * i, 8, extract64(b, i, 8));
+    }
+    return r;
+}
+
+uint64_t helper_v2int_h(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 16) {
+        r = deposit64(r, 2 * i + 16, 16, extract64(a, i + 32, 16));
+        r = deposit64(r, 2 * i, 16, extract64(b, i + 32, 16));
+    }
+    return r;
+}
+
+uint64_t helper_v2int_l(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 16) {
+        r = deposit64(r, 2 * i + 16, 16, extract64(a, i, 16));
+        r = deposit64(r, 2 * i, 16, extract64(b, i, 16));
     }
     return r;
 }
diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c
index e70c3e5ab7..34d45f87b9 100644
--- a/target-tilegx/translate.c
+++ b/target-tilegx/translate.c
@@ -23,6 +23,8 @@
 #include "disas/disas.h"
 #include "tcg-op.h"
 #include "exec/cpu_ldst.h"
+#include "linux-user/syscall_defs.h"
+
 #include "opcode_tilegx.h"
 #include "spr_def_64.h"
 
@@ -96,6 +98,7 @@ typedef struct {
 #define OE_SH(E,XY)    OE(SHIFT_OPCODE_##XY, E##_SHIFT_OPCODE_##XY, XY)
 
 #define V1_IMM(X)      (((X) & 0xff) * 0x0101010101010101ull)
+#define V2_IMM(X)      (((X) & 0xffff) * 0x0001000100010001ull)
 
 
 static void gen_exception(DisasContext *dc, TileExcp num)
@@ -275,6 +278,44 @@ static void gen_mul_half(TCGv tdest, TCGv tsrca, TCGv tsrcb,
     tcg_temp_free(t);
 }
 
+static void gen_cmul2(TCGv tdest, TCGv tsrca, TCGv tsrcb, int sh, int rd)
+{
+    TCGv_i32 tsh = tcg_const_i32(sh);
+    TCGv_i32 trd = tcg_const_i32(rd);
+    gen_helper_cmul2(tdest, tsrca, tsrcb, tsh, trd);
+    tcg_temp_free_i32(tsh);
+    tcg_temp_free_i32(trd);
+}
+
+static TileExcp gen_st_opcode(DisasContext *dc, unsigned dest, unsigned srca,
+                              unsigned srcb, TCGMemOp memop, const char *name)
+{
+    if (dest) {
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
+    }
+
+    tcg_gen_qemu_st_tl(load_gr(dc, srcb), load_gr(dc, srca),
+		       dc->mmuidx, memop);
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", name,
+                  reg_names[srca], reg_names[srcb]);
+    return TILEGX_EXCP_NONE;
+}
+
+static TileExcp gen_st_add_opcode(DisasContext *dc, unsigned srca, unsigned srcb,
+                                  int imm, TCGMemOp memop, const char *name)
+{
+    TCGv tsrca = load_gr(dc, srca);
+    TCGv tsrcb = load_gr(dc, srcb);
+
+    tcg_gen_qemu_st_tl(tsrcb, tsrca, dc->mmuidx, memop);
+    tcg_gen_addi_tl(dest_gr(dc, srca), tsrca, imm);
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %d", name,
+                  reg_names[srca], reg_names[srcb], imm);
+    return TILEGX_EXCP_NONE;
+}
+
 /* Equality comparison with zero can be done quickly and efficiently.  */
 static void gen_v1cmpeq0(TCGv v)
 {
@@ -310,42 +351,152 @@ static void gen_v1cmpne0(TCGv v)
     tcg_temp_free(c);
 }
 
-static TileExcp gen_st_opcode(DisasContext *dc, unsigned dest, unsigned srca,
-                              unsigned srcb, TCGMemOp memop, const char *name)
+/* Vector addition can be performed via arithmetic plus masking.  It is
+   efficient this way only for 4 or more elements.  */
+static void gen_v12add(TCGv tdest, TCGv tsrca, TCGv tsrcb, uint64_t sign)
 {
-    if (dest) {
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
-    }
+    TCGv tmask = tcg_const_tl(~sign);
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
-    tcg_gen_qemu_st_tl(load_gr(dc, srcb), load_gr(dc, srca),
-		       dc->mmuidx, memop);
+    /* ((a & ~sign) + (b & ~sign)) ^ ((a ^ b) & sign).  */
+    tcg_gen_and_tl(t0, tsrca, tmask);
+    tcg_gen_and_tl(t1, tsrcb, tmask);
+    tcg_gen_add_tl(tdest, t0, t1);
+    tcg_gen_xor_tl(t0, tsrca, tsrcb);
+    tcg_gen_andc_tl(t0, t0, tmask);
+    tcg_gen_xor_tl(tdest, tdest, t0);
 
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", name,
-                  reg_names[srca], reg_names[srcb]);
-    return TILEGX_EXCP_NONE;
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(tmask);
 }
 
-static TileExcp gen_st_add_opcode(DisasContext *dc, unsigned srca, unsigned srcb,
-                                  int imm, TCGMemOp memop, const char *name)
+/* Similarly for vector subtraction.  */
+static void gen_v12sub(TCGv tdest, TCGv tsrca, TCGv tsrcb, uint64_t sign)
 {
-    TCGv tsrca = load_gr(dc, srca);
-    TCGv tsrcb = load_gr(dc, srcb);
+    TCGv tsign = tcg_const_tl(sign);
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
-    tcg_gen_qemu_st_tl(tsrcb, tsrca, dc->mmuidx, memop);
-    tcg_gen_addi_tl(dest_gr(dc, srca), tsrca, imm);
+    /* ((a | sign) - (b & ~sign)) ^ ((a ^ ~b) & sign).  */
+    tcg_gen_or_tl(t0, tsrca, tsign);
+    tcg_gen_andc_tl(t1, tsrcb, tsign);
+    tcg_gen_sub_tl(tdest, t0, t1);
+    tcg_gen_eqv_tl(t0, tsrca, tsrcb);
+    tcg_gen_and_tl(t0, t0, tsign);
+    tcg_gen_xor_tl(tdest, tdest, t0);
 
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %d", name,
-                  reg_names[srca], reg_names[srcb], imm);
-    return TILEGX_EXCP_NONE;
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(tsign);
+}
+
+static void gen_v4sh(TCGv d64, TCGv a64, TCGv b64,
+                     void (*generate)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 al = tcg_temp_new_i32();
+    TCGv_i32 ah = tcg_temp_new_i32();
+    TCGv_i32 bl = tcg_temp_new_i32();
+
+    tcg_gen_extr_i64_i32(al, ah, a64);
+    tcg_gen_extrl_i64_i32(bl, b64);
+    tcg_gen_andi_i32(bl, bl, 31);
+    generate(al, al, bl);
+    generate(ah, ah, bl);
+    tcg_gen_concat_i32_i64(d64, al, ah);
+
+    tcg_temp_free_i32(al);
+    tcg_temp_free_i32(ah);
+    tcg_temp_free_i32(bl);
+}
+
+static void gen_v4op(TCGv d64, TCGv a64, TCGv b64,
+                     void (*generate)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 al = tcg_temp_new_i32();
+    TCGv_i32 ah = tcg_temp_new_i32();
+    TCGv_i32 bl = tcg_temp_new_i32();
+    TCGv_i32 bh = tcg_temp_new_i32();
+
+    tcg_gen_extr_i64_i32(al, ah, a64);
+    tcg_gen_extr_i64_i32(bl, bh, b64);
+    generate(al, al, bl);
+    generate(ah, ah, bh);
+    tcg_gen_concat_i32_i64(d64, al, ah);
+
+    tcg_temp_free_i32(al);
+    tcg_temp_free_i32(ah);
+    tcg_temp_free_i32(bl);
+    tcg_temp_free_i32(bh);
+}
+
+static TileExcp gen_signal(DisasContext *dc, int signo, int sigcode,
+                           const char *mnemonic)
+{
+    TCGv_i32 t0 = tcg_const_i32(signo);
+    TCGv_i32 t1 = tcg_const_i32(sigcode);
+
+    tcg_gen_st_i32(t0, cpu_env, offsetof(CPUTLGState, signo));
+    tcg_gen_st_i32(t1, cpu_env, offsetof(CPUTLGState, sigcode));
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s", mnemonic);
+    return TILEGX_EXCP_SIGNAL;
+}
+
+static bool parse_from_addli(uint64_t bundle, int *signo, int *sigcode)
+{
+    int imm;
+
+    if ((get_Opcode_X0(bundle) != ADDLI_OPCODE_X0)
+        || (get_Dest_X0(bundle) != TILEGX_R_ZERO)
+        || (get_SrcA_X0(bundle) != TILEGX_R_ZERO)) {
+        return false;
+    }
+
+    imm = get_Imm16_X0(bundle);
+    *signo = imm & 0x3f;
+    *sigcode = (imm >> 6) & 0xf;
+
+    /* ??? The linux kernel validates both signo and the sigcode vs the
+       known max for each signal.  Don't bother here.  */
+    return true;
+}
+
+static TileExcp gen_specill(DisasContext *dc, unsigned dest, unsigned srca,
+                            uint64_t bundle)
+{
+    const char *mnemonic;
+    int signo;
+    int sigcode;
+
+    if (dest == 0x1c && srca == 0x25) {
+        signo = TARGET_SIGTRAP;
+        sigcode = TARGET_TRAP_BRKPT;
+        mnemonic = "bpt";
+    } else if (dest == 0x1d && srca == 0x25
+               && parse_from_addli(bundle, &signo, &sigcode)) {
+        mnemonic = "raise";
+    } else {
+        signo = TARGET_SIGILL;
+        sigcode = TARGET_ILL_ILLOPC;
+        mnemonic = "ill";
+    }
+
+    return gen_signal(dc, signo, sigcode, mnemonic);
 }
 
 static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
-                              unsigned dest, unsigned srca)
+                              unsigned dest, unsigned srca, uint64_t bundle)
 {
     TCGv tdest, tsrca;
     const char *mnemonic;
     TCGMemOp memop;
     TileExcp ret = TILEGX_EXCP_NONE;
+    bool prefetch_nofault = false;
 
     /* Eliminate instructions with no output before doing anything else.  */
     switch (opext) {
@@ -368,10 +519,9 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         mnemonic = "flushwb";
         goto done0;
     case OE_RR_X1(ILL):
+        return gen_specill(dc, dest, srca, bundle);
     case OE_RR_Y1(ILL):
-        mnemonic = (dest == 0x1c && srca == 0x25 ? "bpt" : "ill");
-        qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s", mnemonic);
-        return TILEGX_EXCP_OPCODE_UNKNOWN;
+        return gen_signal(dc, TARGET_SIGILL, TARGET_ILL_ILLOPC, "ill");
     case OE_RR_X1(MF):
         mnemonic = "mf";
         goto done0;
@@ -379,6 +529,15 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         /* ??? This should yield, especially in system mode.  */
         mnemonic = "nap";
         goto done0;
+    case OE_RR_X1(IRET):
+        gen_helper_ext01_ics(cpu_env);
+        dc->jmp.cond = TCG_COND_ALWAYS;
+        dc->jmp.dest = tcg_temp_new();
+        tcg_gen_ld_tl(dc->jmp.dest, cpu_env,
+                      offsetof(CPUTLGState, spregs[TILEGX_SPR_EX_CONTEXT_0_0]));
+        tcg_gen_andi_tl(dc->jmp.dest, dc->jmp.dest, ~7);
+        mnemonic = "iret";
+        goto done0;
     case OE_RR_X1(SWINT0):
     case OE_RR_X1(SWINT2):
     case OE_RR_X1(SWINT3):
@@ -388,7 +547,7 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         mnemonic = "swint1";
     done0:
         if (srca || dest) {
-            return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+            return TILEGX_EXCP_OPCODE_UNKNOWN;
         }
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s", mnemonic);
         return ret;
@@ -434,7 +593,7 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         tcg_gen_andi_tl(dc->jmp.dest, load_gr(dc, srca), ~7);
     done1:
         if (dest) {
-            return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+            return TILEGX_EXCP_OPCODE_UNKNOWN;
         }
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s", mnemonic, reg_names[srca]);
         return ret;
@@ -456,31 +615,33 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_RR_X0(FSINGLE_PACK1):
     case OE_RR_Y0(FSINGLE_PACK1):
-    case OE_RR_X1(IRET):
         return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RR_X1(LD1S):
         memop = MO_SB;
-        mnemonic = "ld1s";
+        mnemonic = "ld1s"; /* prefetch_l1_fault */
         goto do_load;
     case OE_RR_X1(LD1U):
         memop = MO_UB;
-        mnemonic = "ld1u";
+        mnemonic = "ld1u"; /* prefetch, prefetch_l1 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load;
     case OE_RR_X1(LD2S):
         memop = MO_TESW;
-        mnemonic = "ld2s";
+        mnemonic = "ld2s"; /* prefetch_l2_fault */
         goto do_load;
     case OE_RR_X1(LD2U):
         memop = MO_TEUW;
-        mnemonic = "ld2u";
+        mnemonic = "ld2u"; /* prefetch_l2 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load;
     case OE_RR_X1(LD4S):
         memop = MO_TESL;
-        mnemonic = "ld4s";
+        mnemonic = "ld4s"; /* prefetch_l3_fault */
         goto do_load;
     case OE_RR_X1(LD4U):
         memop = MO_TEUL;
-        mnemonic = "ld4u";
+        mnemonic = "ld4u"; /* prefetch_l3 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load;
     case OE_RR_X1(LDNT1S):
         memop = MO_SB;
@@ -514,7 +675,9 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         memop = MO_TEQ;
         mnemonic = "ld";
     do_load:
-        tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        if (!prefetch_nofault) {
+            tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        }
         break;
     case OE_RR_X1(LDNA):
         tcg_gen_andi_tl(tdest, tsrca, ~7);
@@ -524,7 +687,7 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
     case OE_RR_X1(LNK):
     case OE_RR_Y1(LNK):
         if (srca) {
-            return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+            return TILEGX_EXCP_OPCODE_UNKNOWN;
         }
         tcg_gen_movi_tl(tdest, dc->pc + TILEGX_BUNDLE_SIZE_IN_BYTES);
         mnemonic = "lnk";
@@ -546,14 +709,29 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_RR_X0(TBLIDXB0):
     case OE_RR_Y0(TBLIDXB0):
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tsrca, 2, 8);
+        mnemonic = "tblidxb0";
+        break;
     case OE_RR_X0(TBLIDXB1):
     case OE_RR_Y0(TBLIDXB1):
+        tcg_gen_shri_tl(tdest, tsrca, 8);
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tdest, 2, 8);
+        mnemonic = "tblidxb1";
+        break;
     case OE_RR_X0(TBLIDXB2):
     case OE_RR_Y0(TBLIDXB2):
+        tcg_gen_shri_tl(tdest, tsrca, 16);
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tdest, 2, 8);
+        mnemonic = "tblidxb2";
+        break;
     case OE_RR_X0(TBLIDXB3):
     case OE_RR_Y0(TBLIDXB3):
+        tcg_gen_shri_tl(tdest, tsrca, 24);
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tdest, 2, 8);
+        mnemonic = "tblidxb3";
+        break;
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", mnemonic,
@@ -663,15 +841,41 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
         mnemonic = "cmpne";
         break;
     case OE_RRR(CMULAF, 0, X0):
+        gen_helper_cmulaf(tdest, load_gr(dc, dest), tsrca, tsrcb);
+        mnemonic = "cmulaf";
+        break;
     case OE_RRR(CMULA, 0, X0):
+        gen_helper_cmula(tdest, load_gr(dc, dest), tsrca, tsrcb);
+        mnemonic = "cmula";
+        break;
     case OE_RRR(CMULFR, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 15, 1 << 14);
+        mnemonic = "cmulfr";
+        break;
     case OE_RRR(CMULF, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 15, 0);
+        mnemonic = "cmulf";
+        break;
     case OE_RRR(CMULHR, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 16, 1 << 15);
+        mnemonic = "cmulhr";
+        break;
     case OE_RRR(CMULH, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 16, 0);
+        mnemonic = "cmulh";
+        break;
     case OE_RRR(CMUL, 0, X0):
+        gen_helper_cmula(tdest, load_zero(dc), tsrca, tsrcb);
+        mnemonic = "cmul";
+        break;
     case OE_RRR(CRC32_32, 0, X0):
+        gen_helper_crc32_32(tdest, tsrca, tsrcb);
+        mnemonic = "crc32_32";
+        break;
     case OE_RRR(CRC32_8, 0, X0):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_helper_crc32_8(tdest, tsrca, tsrcb);
+        mnemonic = "crc32_8";
+        break;
     case OE_RRR(DBLALIGN2, 0, X0):
     case OE_RRR(DBLALIGN2, 0, X1):
         gen_dblaligni(tdest, tsrca, tsrcb, 16);
@@ -1024,8 +1228,12 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_RRR(V1ADDUC, 0, X0):
     case OE_RRR(V1ADDUC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1ADD, 0, X0):
     case OE_RRR(V1ADD, 0, X1):
+        gen_v12add(tdest, tsrca, tsrcb, V1_IMM(0x80));
+        mnemonic = "v1add";
+        break;
     case OE_RRR(V1ADIFFU, 0, X0):
     case OE_RRR(V1AVGU, 0, X0):
         return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
@@ -1060,17 +1268,28 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(V1DOTPUS, 0, X0):
     case OE_RRR(V1DOTPU, 0, X0):
     case OE_RRR(V1DOTP, 0, X0):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1INT_H, 0, X0):
     case OE_RRR(V1INT_H, 0, X1):
+        gen_helper_v1int_h(tdest, tsrca, tsrcb);
+        mnemonic = "v1int_h";
+        break;
     case OE_RRR(V1INT_L, 0, X0):
     case OE_RRR(V1INT_L, 0, X1):
+        gen_helper_v1int_l(tdest, tsrca, tsrcb);
+        mnemonic = "v1int_l";
+        break;
     case OE_RRR(V1MAXU, 0, X0):
     case OE_RRR(V1MAXU, 0, X1):
     case OE_RRR(V1MINU, 0, X0):
     case OE_RRR(V1MINU, 0, X1):
     case OE_RRR(V1MNZ, 0, X0):
     case OE_RRR(V1MNZ, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1MULTU, 0, X0):
+        gen_helper_v1multu(tdest, tsrca, tsrcb);
+        mnemonic = "v1multu";
+        break;
     case OE_RRR(V1MULUS, 0, X0):
     case OE_RRR(V1MULU, 0, X0):
     case OE_RRR(V1MZ, 0, X0):
@@ -1095,12 +1314,20 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_RRR(V1SUBUC, 0, X0):
     case OE_RRR(V1SUBUC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1SUB, 0, X0):
     case OE_RRR(V1SUB, 0, X1):
+        gen_v12sub(tdest, tsrca, tsrcb, V1_IMM(0x80));
+        mnemonic = "v1sub";
+        break;
     case OE_RRR(V2ADDSC, 0, X0):
     case OE_RRR(V2ADDSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2ADD, 0, X0):
     case OE_RRR(V2ADD, 0, X1):
+        gen_v12add(tdest, tsrca, tsrcb, V2_IMM(0x8000));
+        mnemonic = "v2add";
+        break;
     case OE_RRR(V2ADIFFS, 0, X0):
     case OE_RRR(V2AVGS, 0, X0):
     case OE_RRR(V2CMPEQ, 0, X0):
@@ -1117,10 +1344,17 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(V2CMPNE, 0, X1):
     case OE_RRR(V2DOTPA, 0, X0):
     case OE_RRR(V2DOTP, 0, X0):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2INT_H, 0, X0):
     case OE_RRR(V2INT_H, 0, X1):
+        gen_helper_v2int_h(tdest, tsrca, tsrcb);
+        mnemonic = "v2int_h";
+        break;
     case OE_RRR(V2INT_L, 0, X0):
     case OE_RRR(V2INT_L, 0, X1):
+        gen_helper_v2int_l(tdest, tsrca, tsrcb);
+        mnemonic = "v2int_l";
+        break;
     case OE_RRR(V2MAXS, 0, X0):
     case OE_RRR(V2MAXS, 0, X1):
     case OE_RRR(V2MINS, 0, X0):
@@ -1129,7 +1363,11 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(V2MNZ, 0, X1):
     case OE_RRR(V2MULFSC, 0, X0):
     case OE_RRR(V2MULS, 0, X0):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2MULTS, 0, X0):
+        gen_helper_v2mults(tdest, tsrca, tsrcb);
+        mnemonic = "v2mults";
+        break;
     case OE_RRR(V2MZ, 0, X0):
     case OE_RRR(V2MZ, 0, X1):
     case OE_RRR(V2PACKH, 0, X0):
@@ -1144,21 +1382,38 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(V2SADU, 0, X0):
     case OE_RRR(V2SHLSC, 0, X0):
     case OE_RRR(V2SHLSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2SHL, 0, X0):
     case OE_RRR(V2SHL, 0, X1):
+        gen_helper_v2shl(tdest, tsrca, tsrcb);
+        mnemonic = "v2shl";
+        break;
     case OE_RRR(V2SHRS, 0, X0):
     case OE_RRR(V2SHRS, 0, X1):
+        gen_helper_v2shrs(tdest, tsrca, tsrcb);
+        mnemonic = "v2shrs";
+        break;
     case OE_RRR(V2SHRU, 0, X0):
     case OE_RRR(V2SHRU, 0, X1):
+        gen_helper_v2shru(tdest, tsrca, tsrcb);
+        mnemonic = "v2shru";
+        break;
     case OE_RRR(V2SUBSC, 0, X0):
     case OE_RRR(V2SUBSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2SUB, 0, X0):
     case OE_RRR(V2SUB, 0, X1):
+        gen_v12sub(tdest, tsrca, tsrcb, V2_IMM(0x8000));
+        mnemonic = "v2sub";
+        break;
     case OE_RRR(V4ADDSC, 0, X0):
     case OE_RRR(V4ADDSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V4ADD, 0, X0):
     case OE_RRR(V4ADD, 0, X1):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_v4op(tdest, tsrca, tsrcb, tcg_gen_add_i32);
+        mnemonic = "v4add";
+        break;
     case OE_RRR(V4INT_H, 0, X0):
     case OE_RRR(V4INT_H, 0, X1):
         tcg_gen_shri_tl(tdest, tsrcb, 32);
@@ -1174,17 +1429,30 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(V4PACKSC, 0, X1):
     case OE_RRR(V4SHLSC, 0, X0):
     case OE_RRR(V4SHLSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V4SHL, 0, X0):
     case OE_RRR(V4SHL, 0, X1):
+        gen_v4sh(tdest, tsrca, tsrcb, tcg_gen_shl_i32);
+        mnemonic = "v4shl";
+        break;
     case OE_RRR(V4SHRS, 0, X0):
     case OE_RRR(V4SHRS, 0, X1):
+        gen_v4sh(tdest, tsrca, tsrcb, tcg_gen_sar_i32);
+        mnemonic = "v4shrs";
+        break;
     case OE_RRR(V4SHRU, 0, X0):
     case OE_RRR(V4SHRU, 0, X1):
+        gen_v4sh(tdest, tsrca, tsrcb, tcg_gen_shr_i32);
+        mnemonic = "v4shru";
+        break;
     case OE_RRR(V4SUBSC, 0, X0):
     case OE_RRR(V4SUBSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V4SUB, 0, X0):
     case OE_RRR(V4SUB, 0, X1):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_v4op(tdest, tsrca, tsrcb, tcg_gen_sub_i32);
+        mnemonic = "v2sub";
+        break;
     case OE_RRR(XOR, 0, X0):
     case OE_RRR(XOR, 0, X1):
     case OE_RRR(XOR, 5, Y0):
@@ -1193,7 +1461,7 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
         mnemonic = "xor";
         break;
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %s", mnemonic,
@@ -1206,6 +1474,7 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
 {
     TCGv tdest = dest_gr(dc, dest);
     TCGv tsrca = load_gr(dc, srca);
+    bool prefetch_nofault = false;
     const char *mnemonic;
     TCGMemOp memop;
     int i2, i3;
@@ -1255,27 +1524,30 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_IM(LD1S_ADD, X1):
         memop = MO_SB;
-        mnemonic = "ld1s_add";
+        mnemonic = "ld1s_add"; /* prefetch_add_l1_fault */
         goto do_load_add;
     case OE_IM(LD1U_ADD, X1):
         memop = MO_UB;
-        mnemonic = "ld1u_add";
+        mnemonic = "ld1u_add"; /* prefetch_add_l1 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load_add;
     case OE_IM(LD2S_ADD, X1):
         memop = MO_TESW;
-        mnemonic = "ld2s_add";
+        mnemonic = "ld2s_add"; /* prefetch_add_l2_fault */
         goto do_load_add;
     case OE_IM(LD2U_ADD, X1):
         memop = MO_TEUW;
-        mnemonic = "ld2u_add";
+        mnemonic = "ld2u_add"; /* prefetch_add_l2 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load_add;
     case OE_IM(LD4S_ADD, X1):
         memop = MO_TESL;
-        mnemonic = "ld4s_add";
+        mnemonic = "ld4s_add"; /* prefetch_add_l3_fault */
         goto do_load_add;
     case OE_IM(LD4U_ADD, X1):
         memop = MO_TEUL;
-        mnemonic = "ld4u_add";
+        mnemonic = "ld4u_add"; /* prefetch_add_l3 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load_add;
     case OE_IM(LDNT1S_ADD, X1):
         memop = MO_SB;
@@ -1307,9 +1579,11 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
         goto do_load_add;
     case OE_IM(LD_ADD, X1):
         memop = MO_TEQ;
-        mnemonic = "ldnt_add";
+        mnemonic = "ld_add";
     do_load_add:
-        tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        if (!prefetch_nofault) {
+            tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        }
         tcg_gen_addi_tl(dest_gr(dc, srca), tsrca, imm);
         break;
     case OE_IM(LDNA_ADD, X1):
@@ -1325,6 +1599,11 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_IM(V1ADDI, X0):
     case OE_IM(V1ADDI, X1):
+        t0 = tcg_const_tl(V1_IMM(imm));
+        gen_v12add(tdest, tsrca, t0, V1_IMM(0x80));
+        tcg_temp_free(t0);
+        mnemonic = "v1addi";
+        break;
     case OE_IM(V1CMPEQI, X0):
     case OE_IM(V1CMPEQI, X1):
         tcg_gen_xori_tl(tdest, tsrca, V1_IMM(imm));
@@ -1339,8 +1618,14 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
     case OE_IM(V1MAXUI, X1):
     case OE_IM(V1MINUI, X0):
     case OE_IM(V1MINUI, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_IM(V2ADDI, X0):
     case OE_IM(V2ADDI, X1):
+        t0 = tcg_const_tl(V2_IMM(imm));
+        gen_v12add(tdest, tsrca, t0, V2_IMM(0x8000));
+        tcg_temp_free(t0);
+        mnemonic = "v2addi";
+        break;
     case OE_IM(V2CMPEQI, X0):
     case OE_IM(V2CMPEQI, X1):
     case OE_IM(V2CMPLTSI, X0):
@@ -1427,11 +1712,27 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_SH(V2SHLI, X0):
     case OE_SH(V2SHLI, X1):
+        i2 = imm & 15;
+        i3 = 0xffff >> i2;
+        tcg_gen_andi_tl(tdest, tsrca, V2_IMM(i3));
+        tcg_gen_shli_tl(tdest, tdest, i2);
+        mnemonic = "v2shli";
+        break;
     case OE_SH(V2SHRSI, X0):
     case OE_SH(V2SHRSI, X1):
+        t0 = tcg_const_tl(imm & 15);
+        gen_helper_v2shrs(tdest, tsrca, t0);
+        tcg_temp_free(t0);
+        mnemonic = "v2shrsi";
+        break;
     case OE_SH(V2SHRUI, X0):
     case OE_SH(V2SHRUI, X1):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        i2 = imm & 15;
+        i3 = (0xffff << i2) & 0xffff;
+        tcg_gen_andi_tl(tdest, tsrca, V2_IMM(i3));
+        tcg_gen_shri_tl(tdest, tdest, i2);
+        mnemonic = "v2shrui";
+        break;
 
     case OE(ADDLI_OPCODE_X0, 0, X0):
     case OE(ADDLI_OPCODE_X1, 0, X1):
@@ -1452,7 +1753,7 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
         break;
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %d", mnemonic,
@@ -1546,7 +1847,7 @@ static TileExcp gen_bf_opcode_x0(DisasContext *dc, unsigned ext,
         break;
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %u, %u", mnemonic,
@@ -1602,7 +1903,7 @@ static TileExcp gen_branch_opcode_x1(DisasContext *dc, unsigned ext,
         mnemonic = "blbs";
         break;
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -1654,6 +1955,10 @@ static const TileSPR *find_spr(unsigned spr)
       offsetof(CPUTLGState, spregs[TILEGX_SPR_CRITICAL_SEC]), 0, 0)
     D(SIM_CONTROL,
       offsetof(CPUTLGState, spregs[TILEGX_SPR_SIM_CONTROL]), 0, 0)
+    D(EX_CONTEXT_0_0,
+      offsetof(CPUTLGState, spregs[TILEGX_SPR_EX_CONTEXT_0_0]), 0, 0)
+    D(EX_CONTEXT_0_1,
+      offsetof(CPUTLGState, spregs[TILEGX_SPR_EX_CONTEXT_0_1]), 0, 0)
     }
 
 #undef D
@@ -1669,7 +1974,7 @@ static TileExcp gen_mtspr_x1(DisasContext *dc, unsigned spr, unsigned srca)
 
     if (def == NULL) {
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "mtspr spr[%u], %s", spr, reg_names[srca]);
-        return TILEGX_EXCP_OPCODE_UNKNOWN;
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     }
 
     tsrca = load_gr(dc, srca);
@@ -1689,7 +1994,7 @@ static TileExcp gen_mfspr_x1(DisasContext *dc, unsigned dest, unsigned spr)
 
     if (def == NULL) {
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "mtspr %s, spr[%u]", reg_names[dest], spr);
-        return TILEGX_EXCP_OPCODE_UNKNOWN;
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     }
 
     tdest = dest_gr(dc, dest);
@@ -1715,7 +2020,7 @@ static TileExcp decode_y0(DisasContext *dc, tilegx_bundle_bits bundle)
     case RRR_1_OPCODE_Y0:
         if (ext == UNARY_RRR_1_OPCODE_Y0) {
             ext = get_UnaryOpcodeExtension_Y0(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, Y0), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, Y0), dest, srca, bundle);
         }
         /* fallthru */
     case RRR_0_OPCODE_Y0:
@@ -1744,7 +2049,7 @@ static TileExcp decode_y0(DisasContext *dc, tilegx_bundle_bits bundle)
         return gen_rri_opcode(dc, OE(opc, 0, Y0), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1761,7 +2066,7 @@ static TileExcp decode_y1(DisasContext *dc, tilegx_bundle_bits bundle)
     case RRR_1_OPCODE_Y1:
         if (ext == UNARY_RRR_1_OPCODE_Y0) {
             ext = get_UnaryOpcodeExtension_Y1(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, Y1), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, Y1), dest, srca, bundle);
         }
         /* fallthru */
     case RRR_0_OPCODE_Y1:
@@ -1788,7 +2093,7 @@ static TileExcp decode_y1(DisasContext *dc, tilegx_bundle_bits bundle)
         return gen_rri_opcode(dc, OE(opc, 0, Y1), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1846,7 +2151,7 @@ static TileExcp decode_y2(DisasContext *dc, tilegx_bundle_bits bundle)
         return gen_st_opcode(dc, 0, srca, srcbdest, MO_TEQ, "st");
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1863,7 +2168,7 @@ static TileExcp decode_x0(DisasContext *dc, tilegx_bundle_bits bundle)
         ext = get_RRROpcodeExtension_X0(bundle);
         if (ext == UNARY_RRR_0_OPCODE_X0) {
             ext = get_UnaryOpcodeExtension_X0(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, X0), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, X0), dest, srca, bundle);
         }
         srcb = get_SrcB_X0(bundle);
         return gen_rrr_opcode(dc, OE(opc, ext, X0), dest, srca, srcb);
@@ -1891,7 +2196,7 @@ static TileExcp decode_x0(DisasContext *dc, tilegx_bundle_bits bundle)
         return gen_rri_opcode(dc, OE(opc, 0, X0), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1910,7 +2215,7 @@ static TileExcp decode_x1(DisasContext *dc, tilegx_bundle_bits bundle)
         switch (ext) {
         case UNARY_RRR_0_OPCODE_X1:
             ext = get_UnaryOpcodeExtension_X1(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, X1), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, X1), dest, srca, bundle);
         case ST1_RRR_0_OPCODE_X1:
             return gen_st_opcode(dc, dest, srca, srcb, MO_UB, "st1");
         case ST2_RRR_0_OPCODE_X1:
@@ -1981,7 +2286,7 @@ static TileExcp decode_x1(DisasContext *dc, tilegx_bundle_bits bundle)
         return gen_rri_opcode(dc, OE(opc, 0, X1), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1992,8 +2297,15 @@ static void notice_excp(DisasContext *dc, uint64_t bundle,
         return;
     }
     gen_exception(dc, excp);
-    if (excp == TILEGX_EXCP_OPCODE_UNIMPLEMENTED) {
+    switch (excp) {
+    case TILEGX_EXCP_OPCODE_UNIMPLEMENTED:
         qemu_log_mask(LOG_UNIMP, "UNIMP %s, [" FMT64X "]\n", type, bundle);
+        break;
+    case TILEGX_EXCP_OPCODE_UNKNOWN:
+        qemu_log_mask(LOG_UNIMP, "UNKNOWN %s, [" FMT64X "]\n", type, bundle);
+        break;
+    default:
+        break;
     }
 }
 
@@ -2008,10 +2320,6 @@ static void translate_one_bundle(DisasContext *dc, uint64_t bundle)
     }
     dc->num_wb = 0;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(dc->pc);
-    }
-
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "  %" PRIx64 ":  { ", dc->pc);
     if (get_Mode(bundle)) {
         notice_excp(dc, bundle, "y0", decode_y0(dc, bundle));
@@ -2053,17 +2361,14 @@ static void translate_one_bundle(DisasContext *dc, uint64_t bundle)
     }
 }
 
-static inline void gen_intermediate_code_internal(TileGXCPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+void gen_intermediate_code(CPUTLGState *env, struct TranslationBlock *tb)
 {
+    TileGXCPU *cpu = tilegx_env_get_cpu(env);
     DisasContext ctx;
     DisasContext *dc = &ctx;
     CPUState *cs = CPU(cpu);
-    CPUTLGState *env = &cpu->env;
     uint64_t pc_start = tb->pc;
     uint64_t next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    int j, lj = -1;
     int num_insns = 0;
     int max_insns = tb->cflags & CF_COUNT_MASK;
 
@@ -2085,21 +2390,15 @@ static inline void gen_intermediate_code_internal(TileGXCPU *cpu,
     if (cs->singlestep_enabled || singlestep) {
         max_insns = 1;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
     gen_tb_start(tb);
 
     while (1) {
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
+
         translate_one_bundle(dc, cpu_ldq_data(env, dc->pc));
 
         if (dc->exit_tb) {
@@ -2107,7 +2406,7 @@ static inline void gen_intermediate_code_internal(TileGXCPU *cpu,
             break;
         }
         dc->pc += TILEGX_BUNDLE_SIZE_IN_BYTES;
-        if (++num_insns >= max_insns
+        if (num_insns >= max_insns
             || dc->pc >= next_page_start
             || tcg_op_buf_full()) {
             /* Ending the TB due to TB size or page boundary.  Set PC.  */
@@ -2118,33 +2417,16 @@ static inline void gen_intermediate_code_internal(TileGXCPU *cpu,
     }
 
     gen_tb_end(tb, num_insns);
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "\n");
 }
 
-void gen_intermediate_code(CPUTLGState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(tilegx_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUTLGState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(tilegx_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUTLGState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUTLGState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
 
 void tilegx_tcg_init(void)
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 440f30a843..135c58347a 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -8266,21 +8266,26 @@ static void decode_opc(CPUTriCoreState *env, DisasContext *ctx, int *is_branch)
     }
 }
 
-static inline void
-gen_intermediate_code_internal(TriCoreCPU *cpu, struct TranslationBlock *tb,
-                              int search_pc)
+void gen_intermediate_code(CPUTriCoreState *env, struct TranslationBlock *tb)
 {
+    TriCoreCPU *cpu = tricore_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUTriCoreState *env = &cpu->env;
     DisasContext ctx;
     target_ulong pc_start;
-    int num_insns;
+    int num_insns, max_insns;
 
-    if (search_pc) {
-        qemu_log("search pc %d\n", search_pc);
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0) {
+        max_insns = CF_COUNT_MASK;
+    }
+    if (singlestep) {
+        max_insns = 1;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
     }
 
-    num_insns = 0;
     pc_start = tb->pc;
     ctx.pc = pc_start;
     ctx.saved_pc = -1;
@@ -8292,17 +8297,13 @@ gen_intermediate_code_internal(TriCoreCPU *cpu, struct TranslationBlock *tb,
     tcg_clear_temp_count();
     gen_tb_start(tb);
     while (ctx.bstate == BS_NONE) {
+        tcg_gen_insn_start(ctx.pc);
+        num_insns++;
+
         ctx.opcode = cpu_ldl_code(env, ctx.pc);
         decode_opc(env, &ctx, 0);
 
-        num_insns++;
-
-        if (tcg_op_buf_full()) {
-            gen_save_pc(ctx.next_pc);
-            tcg_gen_exit_tb(0);
-            break;
-        }
-        if (singlestep) {
+        if (num_insns >= max_insns || tcg_op_buf_full()) {
             gen_save_pc(ctx.next_pc);
             tcg_gen_exit_tb(0);
             break;
@@ -8311,12 +8312,9 @@ gen_intermediate_code_internal(TriCoreCPU *cpu, struct TranslationBlock *tb,
     }
 
     gen_tb_end(tb, num_insns);
-    if (search_pc) {
-        printf("done_generating search pc\n");
-    } else {
-        tb->size = ctx.pc - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = ctx.pc - pc_start;
+    tb->icount = num_insns;
+
     if (tcg_check_temp_count()) {
         printf("LEAK at %08x\n", env->PC);
     }
@@ -8331,21 +8329,10 @@ gen_intermediate_code_internal(TriCoreCPU *cpu, struct TranslationBlock *tb,
 }
 
 void
-gen_intermediate_code(CPUTriCoreState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(tricore_env_get_cpu(env), tb, false);
-}
-
-void
-gen_intermediate_code_pc(CPUTriCoreState *env, struct TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(tricore_env_get_cpu(env), tb, true);
-}
-
-void
-restore_state_to_opc(CPUTriCoreState *env, TranslationBlock *tb, int pc_pos)
+restore_state_to_opc(CPUTriCoreState *env, TranslationBlock *tb,
+                     target_ulong *data)
 {
-    env->PC = tcg_ctx.gen_opc_pc[pc_pos];
+    env->PC = data[0];
 }
 /*
  *
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index 2fc78e6f3e..48f89fb1c5 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -1794,10 +1794,6 @@ static void disas_uc32_insn(CPUUniCore32State *env, DisasContext *s)
     UniCore32CPU *cpu = uc32_env_get_cpu(env);
     unsigned int insn;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(s->pc);
-    }
-
     insn = cpu_ldl_code(env, s->pc);
     s->pc += 4;
 
@@ -1868,16 +1864,12 @@ static void disas_uc32_insn(CPUUniCore32State *env, DisasContext *s)
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
-   basic block 'tb'. If search_pc is TRUE, also generate PC
-   information for each intermediate instruction. */
-static inline void gen_intermediate_code_internal(UniCore32CPU *cpu,
-        TranslationBlock *tb, bool search_pc)
+   basic block 'tb'.  */
+void gen_intermediate_code(CPUUniCore32State *env, TranslationBlock *tb)
 {
+    UniCore32CPU *cpu = uc32_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUUniCore32State *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
-    CPUBreakpoint *bp;
-    int j, lj;
     target_ulong pc_start;
     uint32_t next_page_start;
     int num_insns;
@@ -1899,12 +1891,14 @@ static inline void gen_intermediate_code_internal(UniCore32CPU *cpu,
     cpu_F0d = tcg_temp_new_i64();
     cpu_F1d = tcg_temp_new_i64();
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
 #ifndef CONFIG_USER_ONLY
     if ((env->uncached_asr & ASR_M) == ASR_MODE_USER) {
@@ -1916,33 +1910,20 @@ static inline void gen_intermediate_code_internal(UniCore32CPU *cpu,
 
     gen_tb_start(tb);
     do {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == dc->pc) {
-                    gen_set_pc_im(dc->pc);
-                    gen_exception(EXCP_DEBUG);
-                    dc->is_jmp = DISAS_JUMP;
-                    /* Advance PC so that clearing the breakpoint will
-                       invalidate this TB.  */
-                    dc->pc += 2; /* FIXME */
-                    goto done_generating;
-                }
-            }
-        }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc->pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        tcg_gen_insn_start(dc->pc);
+        num_insns++;
+
+        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
+            gen_set_pc_im(dc->pc);
+            gen_exception(EXCP_DEBUG);
+            dc->is_jmp = DISAS_JUMP;
+            /* Advance PC so that clearing the breakpoint will
+               invalidate this TB.  */
+            dc->pc += 2; /* FIXME */
+            goto done_generating;
         }
 
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
 
@@ -1961,7 +1942,6 @@ static inline void gen_intermediate_code_internal(UniCore32CPU *cpu,
          * Otherwise the subsequent code could get translated several times.
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
-        num_insns++;
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
@@ -2043,26 +2023,8 @@ done_generating:
         qemu_log("\n");
     }
 #endif
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j) {
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-        }
-    } else {
-        tb->size = dc->pc - pc_start;
-        tb->icount = num_insns;
-    }
-}
-
-void gen_intermediate_code(CPUUniCore32State *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(uc32_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUUniCore32State *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(uc32_env_get_cpu(env), tb, true);
+    tb->size = dc->pc - pc_start;
+    tb->icount = num_insns;
 }
 
 static const char *cpu_mode_names[16] = {
@@ -2133,7 +2095,8 @@ void uc32_cpu_dump_state(CPUState *cs, FILE *f,
     cpu_dump_state_ucf64(env, f, cpu_fprintf, flags);
 }
 
-void restore_state_to_opc(CPUUniCore32State *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUUniCore32State *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->regs[31] = tcg_ctx.gen_opc_pc[pc_pos];
+    env->regs[31] = data[0];
 }
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 148a0f8d92..006bcb78b8 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -382,7 +382,6 @@ typedef struct CPUXtensaState {
 #include "cpu-qom.h"
 
 #define cpu_exec cpu_xtensa_exec
-#define cpu_gen_code cpu_xtensa_gen_code
 #define cpu_signal_handler cpu_xtensa_signal_handler
 #define cpu_list xtensa_cpu_list
 
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index a29b3e61bc..fda91b7e5d 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -2984,22 +2984,6 @@ static inline unsigned xtensa_insn_len(CPUXtensaState *env, DisasContext *dc)
     return xtensa_op0_insn_len(OP0);
 }
 
-static void check_breakpoint(CPUXtensaState *env, DisasContext *dc)
-{
-    CPUState *cs = CPU(xtensa_env_get_cpu(env));
-    CPUBreakpoint *bp;
-
-    if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == dc->pc) {
-                tcg_gen_movi_i32(cpu_pc, dc->pc);
-                gen_exception(dc, EXCP_DEBUG);
-                dc->is_jmp = DISAS_UPDATE;
-             }
-        }
-    }
-}
-
 static void gen_ibreak_check(CPUXtensaState *env, DisasContext *dc)
 {
     unsigned i;
@@ -3013,15 +2997,12 @@ static void gen_ibreak_check(CPUXtensaState *env, DisasContext *dc)
     }
 }
 
-static inline
-void gen_intermediate_code_internal(XtensaCPU *cpu,
-                                    TranslationBlock *tb, bool search_pc)
+void gen_intermediate_code(CPUXtensaState *env, TranslationBlock *tb)
 {
+    XtensaCPU *cpu = xtensa_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUXtensaState *env = &cpu->env;
     DisasContext dc;
     int insn_count = 0;
-    int j, lj = -1;
     int max_insns = tb->cflags & CF_COUNT_MASK;
     uint32_t pc_start = tb->pc;
     uint32_t next_page_start =
@@ -3030,6 +3011,9 @@ void gen_intermediate_code_internal(XtensaCPU *cpu,
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
     }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     dc.config = env->config;
     dc.singlestep_enabled = cs->singlestep_enabled;
@@ -3062,28 +3046,19 @@ void gen_intermediate_code_internal(XtensaCPU *cpu,
     }
 
     do {
-        check_breakpoint(env, &dc);
+        tcg_gen_insn_start(dc.pc);
+        ++insn_count;
 
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j) {
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-                }
-            }
-            tcg_ctx.gen_opc_pc[lj] = dc.pc;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = insn_count;
-        }
+        ++dc.ccount_delta;
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-            tcg_gen_debug_insn_start(dc.pc);
+        if (unlikely(cpu_breakpoint_test(cs, dc.pc, BP_ANY))) {
+            tcg_gen_movi_i32(cpu_pc, dc.pc);
+            gen_exception(&dc, EXCP_DEBUG);
+            dc.is_jmp = DISAS_UPDATE;
+            break;
         }
 
-        ++dc.ccount_delta;
-
-        if (insn_count + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+        if (insn_count == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
 
@@ -3104,7 +3079,6 @@ void gen_intermediate_code_internal(XtensaCPU *cpu,
         }
 
         disas_xtensa_insn(env, &dc);
-        ++insn_count;
         if (dc.icount) {
             tcg_gen_mov_i32(cpu_SR[ICOUNT], dc.next_icount);
         }
@@ -3142,24 +3116,8 @@ void gen_intermediate_code_internal(XtensaCPU *cpu,
         qemu_log("\n");
     }
 #endif
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        memset(tcg_ctx.gen_opc_instr_start + lj + 1, 0,
-                (j - lj) * sizeof(tcg_ctx.gen_opc_instr_start[0]));
-    } else {
-        tb->size = dc.pc - pc_start;
-        tb->icount = insn_count;
-    }
-}
-
-void gen_intermediate_code(CPUXtensaState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(xtensa_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUXtensaState *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(xtensa_env_get_cpu(env), tb, true);
+    tb->size = dc.pc - pc_start;
+    tb->icount = insn_count;
 }
 
 void xtensa_cpu_dump_state(CPUState *cs, FILE *f,
@@ -3213,7 +3171,8 @@ void xtensa_cpu_dump_state(CPUState *cs, FILE *f,
     }
 }
 
-void restore_state_to_opc(CPUXtensaState *env, TranslationBlock *tb, int pc_pos)
+void restore_state_to_opc(CPUXtensaState *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    env->pc = data[0];
 }
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 6da083a1e9..4e20dc1a6b 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -700,17 +700,53 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
 #error must include QEMU headers
 #endif
 
-/* debug info: write the PC of the corresponding QEMU CPU instruction */
-static inline void tcg_gen_debug_insn_start(uint64_t pc)
+#if TARGET_INSN_START_WORDS == 1
+# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+static inline void tcg_gen_insn_start(target_ulong pc)
 {
-    /* XXX: must really use a 32 bit size for TCGArg in all cases */
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-    tcg_gen_op2ii(INDEX_op_debug_insn_start,
-                  (uint32_t)(pc), (uint32_t)(pc >> 32));
+    tcg_gen_op1(&tcg_ctx, INDEX_op_insn_start, pc);
+}
+# else
+static inline void tcg_gen_insn_start(target_ulong pc)
+{
+    tcg_gen_op2(&tcg_ctx, INDEX_op_insn_start,
+                (uint32_t)pc, (uint32_t)(pc >> 32));
+}
+# endif
+#elif TARGET_INSN_START_WORDS == 2
+# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
+{
+    tcg_gen_op2(&tcg_ctx, INDEX_op_insn_start, pc, a1);
+}
+# else
+static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
+{
+    tcg_gen_op4(&tcg_ctx, INDEX_op_insn_start,
+                (uint32_t)pc, (uint32_t)(pc >> 32),
+                (uint32_t)a1, (uint32_t)(a1 >> 32));
+}
+# endif
+#elif TARGET_INSN_START_WORDS == 3
+# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
+                                      target_ulong a2)
+{
+    tcg_gen_op3(&tcg_ctx, INDEX_op_insn_start, pc, a1, a2);
+}
+# else
+static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
+                                      target_ulong a2)
+{
+    tcg_gen_op6(&tcg_ctx, INDEX_op_insn_start,
+                (uint32_t)pc, (uint32_t)(pc >> 32),
+                (uint32_t)a1, (uint32_t)(a1 >> 32),
+                (uint32_t)a2, (uint32_t)(a2 >> 32));
+}
+# endif
 #else
-    tcg_gen_op1i(INDEX_op_debug_insn_start, pc);
+# error "Unhandled number of operands to insn_start"
 #endif
-}
 
 static inline void tcg_gen_exit_tb(uintptr_t val)
 {
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 02bbf30387..c6f95703eb 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -175,9 +175,9 @@ DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64))
 
 /* QEMU specific */
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-DEF(debug_insn_start, 0, 0, 2, TCG_OPF_NOT_PRESENT)
+DEF(insn_start, 0, 0, 2 * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT)
 #else
-DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT)
+DEF(insn_start, 0, 0, TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT)
 #endif
 DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
 DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a2cb027a14..682af8a7c6 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -363,17 +363,39 @@ void tcg_context_init(TCGContext *s)
 
 void tcg_prologue_init(TCGContext *s)
 {
-    /* init global prologue and epilogue */
-    s->code_buf = s->code_gen_prologue;
-    s->code_ptr = s->code_buf;
+    size_t prologue_size, total_size;
+    void *buf0, *buf1;
+
+    /* Put the prologue at the beginning of code_gen_buffer.  */
+    buf0 = s->code_gen_buffer;
+    s->code_ptr = buf0;
+    s->code_buf = buf0;
+    s->code_gen_prologue = buf0;
+
+    /* Generate the prologue.  */
     tcg_target_qemu_prologue(s);
-    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+    buf1 = s->code_ptr;
+    flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
+
+    /* Deduct the prologue from the buffer.  */
+    prologue_size = tcg_current_code_size(s);
+    s->code_gen_ptr = buf1;
+    s->code_gen_buffer = buf1;
+    s->code_buf = buf1;
+    total_size = s->code_gen_buffer_size - prologue_size;
+    s->code_gen_buffer_size = total_size;
+
+    /* Compute a high-water mark, at which we voluntarily flush the buffer
+       and start over.  The size here is arbitrary, significantly larger
+       than we expect the code generation for any one opcode to require.  */
+    s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024);
+
+    tcg_register_jit(s->code_gen_buffer, total_size);
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
-        size_t size = tcg_current_code_size(s);
-        qemu_log("PROLOGUE: [size=%zu]\n", size);
-        log_disas(s->code_buf, size);
+        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
+        log_disas(buf0, prologue_size);
         qemu_log("\n");
         qemu_log_flush();
     }
@@ -990,17 +1012,18 @@ void tcg_dump_ops(TCGContext *s)
         def = &tcg_op_defs[c];
         args = &s->gen_opparam_buf[op->args];
 
-        if (c == INDEX_op_debug_insn_start) {
-            uint64_t pc;
+        if (c == INDEX_op_insn_start) {
+            qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
+
+            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
+                target_ulong a;
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-            pc = ((uint64_t)args[1] << 32) | args[0];
+                a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
 #else
-            pc = args[0];
+                a = args[i];
 #endif
-            if (oi != s->gen_first_op_idx) {
-                qemu_log("\n");
+                qemu_log(" " TARGET_FMT_lx, a);
             }
-            qemu_log(" ---- 0x%" PRIx64, pc);
         } else if (c == INDEX_op_call) {
             /* variable number of arguments */
             nb_oargs = op->callo;
@@ -1400,7 +1423,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                 }
             }
             break;
-        case INDEX_op_debug_insn_start:
+        case INDEX_op_insn_start:
             break;
         case INDEX_op_discard:
             /* mark the temporary as dead */
@@ -2289,11 +2312,27 @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
 #endif
 
 
-static inline int tcg_gen_code_common(TCGContext *s,
-                                      tcg_insn_unit *gen_code_buf,
-                                      long search_pc)
+int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
 {
-    int oi, oi_next;
+    int i, oi, oi_next, num_insns;
+
+#ifdef CONFIG_PROFILER
+    {
+        int n;
+
+        n = s->gen_last_op_idx + 1;
+        s->op_count += n;
+        if (n > s->op_count_max) {
+            s->op_count_max = n;
+        }
+
+        n = s->nb_temps;
+        s->temp_count += n;
+        if (n > s->temp_count_max) {
+            s->temp_count_max = n;
+        }
+    }
+#endif
 
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
@@ -2337,6 +2376,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
 
     tcg_out_tb_init(s);
 
+    num_insns = -1;
     for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
         TCGOp * const op = &s->gen_op_buf[oi];
         TCGArg * const args = &s->gen_opparam_buf[op->args];
@@ -2359,7 +2399,20 @@ static inline int tcg_gen_code_common(TCGContext *s,
         case INDEX_op_movi_i64:
             tcg_reg_alloc_movi(s, args, dead_args, sync_args);
             break;
-        case INDEX_op_debug_insn_start:
+        case INDEX_op_insn_start:
+            if (num_insns >= 0) {
+                s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
+            }
+            num_insns++;
+            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
+                target_ulong a;
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+                a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
+#else
+                a = args[i];
+#endif
+                s->gen_insn_data[num_insns][i] = a;
+            }
             break;
         case INDEX_op_discard:
             temp_dead(s, args[0]);
@@ -2383,40 +2436,22 @@ static inline int tcg_gen_code_common(TCGContext *s,
             tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
             break;
         }
-        if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) {
-            return oi;
-        }
 #ifndef NDEBUG
         check_regs(s);
 #endif
+        /* Test for (pending) buffer overflow.  The assumption is that any
+           one operation beginning below the high water mark cannot overrun
+           the buffer completely.  Thus we can test for overflow after
+           generating code without having to check during generation.  */
+        if (unlikely(s->code_gen_ptr > s->code_gen_highwater)) {
+            return -1;
+        }
     }
+    tcg_debug_assert(num_insns >= 0);
+    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
 
     /* Generate TB finalization at the end of block */
     tcg_out_tb_finalize(s);
-    return -1;
-}
-
-int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
-{
-#ifdef CONFIG_PROFILER
-    {
-        int n;
-
-        n = s->gen_last_op_idx + 1;
-        s->op_count += n;
-        if (n > s->op_count_max) {
-            s->op_count_max = n;
-        }
-
-        n = s->nb_temps;
-        s->temp_count += n;
-        if (n > s->temp_count_max) {
-            s->temp_count_max = n;
-        }
-    }
-#endif
-
-    tcg_gen_code_common(s, gen_code_buf, -1);
 
     /* flush instruction cache */
     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
@@ -2424,38 +2459,30 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
     return tcg_current_code_size(s);
 }
 
-/* Return the index of the micro operation such as the pc after is <
-   offset bytes from the start of the TB.  The contents of gen_code_buf must
-   not be changed, though writing the same values is ok.
-   Return -1 if not found. */
-int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
-                           long offset)
-{
-    return tcg_gen_code_common(s, gen_code_buf, offset);
-}
-
 #ifdef CONFIG_PROFILER
 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
 {
     TCGContext *s = &tcg_ctx;
-    int64_t tot;
+    int64_t tb_count = s->tb_count;
+    int64_t tb_div_count = tb_count ? tb_count : 1;
+    int64_t tot = s->interm_time + s->code_time;
 
-    tot = s->interm_time + s->code_time;
     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
                 tot, tot / 2.4e9);
     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 
-                s->tb_count, 
-                s->tb_count1 - s->tb_count,
-                s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
+                tb_count, s->tb_count1 - tb_count,
+                (double)(s->tb_count1 - s->tb_count)
+                / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n", 
-                s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
+                (double)s->op_count / tb_div_count, s->op_count_max);
     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
-                s->tb_count ? 
-                (double)s->del_op_count / s->tb_count : 0);
+                (double)s->del_op_count / tb_div_count);
     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
-                s->tb_count ? 
-                (double)s->temp_count / s->tb_count : 0,
-                s->temp_count_max);
+                (double)s->temp_count / tb_div_count, s->temp_count_max);
+    cpu_fprintf(f, "avg host code/TB    %0.1f\n",
+                (double)s->code_out_len / tb_div_count);
+    cpu_fprintf(f, "avg search data/TB  %0.1f\n",
+                (double)s->search_out_len / tb_div_count);
     
     cpu_fprintf(f, "cycles/op           %0.1f\n", 
                 s->op_count ? (double)tot / s->op_count : 0);
@@ -2463,8 +2490,11 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
                 s->code_in_len ? (double)tot / s->code_in_len : 0);
     cpu_fprintf(f, "cycles/out byte     %0.1f\n", 
                 s->code_out_len ? (double)tot / s->code_out_len : 0);
-    if (tot == 0)
+    cpu_fprintf(f, "cycles/search byte     %0.1f\n",
+                s->search_out_len ? (double)tot / s->search_out_len : 0);
+    if (tot == 0) {
         tot = 1;
+    }
     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n", 
                 (double)s->interm_time / tot * 100.0);
     cpu_fprintf(f, "  gen_code time     %0.1f%%\n", 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 879a665012..a696922420 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -129,6 +129,12 @@ typedef uint64_t TCGRegSet;
 # error "Missing unsigned widening multiply"
 #endif
 
+#ifndef TARGET_INSN_START_EXTRA_WORDS
+# define TARGET_INSN_START_WORDS 1
+#else
+# define TARGET_INSN_START_WORDS (1 + TARGET_INSN_START_EXTRA_WORDS)
+#endif
+
 typedef enum TCGOpcode {
 #define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
 #include "tcg-opc.h"
@@ -188,6 +194,7 @@ typedef struct TCGPool {
 #define TCG_POOL_CHUNK_SIZE 32768
 
 #define TCG_MAX_TEMPS 512
+#define TCG_MAX_INSNS 512
 
 /* when the size of the arguments of a called function is smaller than
    this value, they are statically allocated in the TB stack frame */
@@ -525,6 +532,7 @@ struct TCGContext {
     int64_t del_op_count;
     int64_t code_in_len;
     int64_t code_out_len;
+    int64_t search_out_len;
     int64_t interm_time;
     int64_t code_time;
     int64_t la_time;
@@ -551,10 +559,11 @@ struct TCGContext {
     void *code_gen_prologue;
     void *code_gen_buffer;
     size_t code_gen_buffer_size;
-    /* threshold to flush the translated code buffer */
-    size_t code_gen_buffer_max_size;
     void *code_gen_ptr;
 
+    /* Threshold to flush the translated code buffer.  */
+    void *code_gen_highwater;
+
     TBContext tb_ctx;
 
     /* The TCGBackendData structure is private to tcg-target.c.  */
@@ -570,9 +579,8 @@ struct TCGContext {
     TCGOp gen_op_buf[OPC_BUF_SIZE];
     TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
 
-    target_ulong gen_opc_pc[OPC_BUF_SIZE];
-    uint16_t gen_opc_icount[OPC_BUF_SIZE];
-    uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+    uint16_t gen_insn_end_off[TCG_MAX_INSNS];
+    target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
 };
 
 extern TCGContext tcg_ctx;
@@ -619,8 +627,6 @@ void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
 
 int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf);
-int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
-                           long offset);
 
 void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size);
 
diff --git a/tci.c b/tci.c
index 70eaab25eb..b5ed7b1f7a 100644
--- a/tci.c
+++ b/tci.c
@@ -1081,15 +1081,6 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
 
             /* QEMU specific operations. */
 
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        case INDEX_op_debug_insn_start:
-            TODO();
-            break;
-#else
-        case INDEX_op_debug_insn_start:
-            TODO();
-            break;
-#endif
         case INDEX_op_exit_tb:
             next_tb = *(uint64_t *)tb_ptr;
             goto exit;
diff --git a/tests/Makefile b/tests/Makefile
index 4063639a59..e6474ba31b 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -188,7 +188,9 @@ gcov-files-i386-y += hw/usb/hcd-xhci.c
 check-qtest-i386-y += tests/pc-cpu-test$(EXESUF)
 check-qtest-i386-y += tests/q35-test$(EXESUF)
 gcov-files-i386-y += hw/pci-host/q35.c
+ifeq ($(CONFIG_VHOST_NET),y)
 check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF)
+endif
 check-qtest-x86_64-y = $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
 gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
diff --git a/tests/ide-test.c b/tests/ide-test.c
index 559473812c..b6e9e1a232 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -633,7 +633,7 @@ static void send_scsi_cdb_read10(uint64_t lba, int nblocks)
 
     /* Send Packet */
     for (i = 0; i < sizeof(Read10CDB)/2; i++) {
-        outw(IDE_BASE + reg_data, ((uint16_t *)&pkt)[i]);
+        outw(IDE_BASE + reg_data, cpu_to_le16(((uint16_t *)&pkt)[i]));
     }
 }
 
@@ -733,7 +733,7 @@ static void cdrom_pio_impl(int nblocks)
         size_t offset = i * (limit / 2);
         size_t rem = (rxsize / 2) - offset;
         for (j = 0; j < MIN((limit / 2), rem); j++) {
-            rx[offset + j] = inw(IDE_BASE + reg_data);
+            rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
         }
         ide_wait_intr(IDE_PRIMARY_IRQ);
     }
diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c
index cf66b3e32c..adb2665c6d 100644
--- a/tests/libqos/ahci.c
+++ b/tests/libqos/ahci.c
@@ -742,7 +742,7 @@ AHCICommand *ahci_command_create(uint8_t command_name)
     g_assert(!(props->lba28 && props->lba48));
     g_assert(!(props->read && props->write));
     g_assert(!props->size || props->data);
-    g_assert(!props->ncq || (props->ncq && props->lba48));
+    g_assert(!props->ncq || props->lba48);
 
     /* Defaults and book-keeping */
     cmd->props = props;
diff --git a/translate-all.c b/translate-all.c
index 4a9ee33dac..333eba4f5d 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -168,127 +168,137 @@ void cpu_gen_init(void)
     tcg_context_init(&tcg_ctx); 
 }
 
-/* return non zero if the very first instruction is invalid so that
- * the virtual CPU can trigger an exception.
- *
- * '*gen_code_size_ptr' contains the size of the generated code (host
- * code).
- *
- * Called with mmap_lock held for user-mode emulation.
- */
-int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr)
+/* Encode VAL as a signed leb128 sequence at P.
+   Return P incremented past the encoded value.  */
+static uint8_t *encode_sleb128(uint8_t *p, target_long val)
 {
-    TCGContext *s = &tcg_ctx;
-    tcg_insn_unit *gen_code_buf;
-    int gen_code_size;
-#ifdef CONFIG_PROFILER
-    int64_t ti;
-#endif
+    int more, byte;
+
+    do {
+        byte = val & 0x7f;
+        val >>= 7;
+        more = !((val == 0 && (byte & 0x40) == 0)
+                 || (val == -1 && (byte & 0x40) != 0));
+        if (more) {
+            byte |= 0x80;
+        }
+        *p++ = byte;
+    } while (more);
 
-#ifdef CONFIG_PROFILER
-    s->tb_count1++; /* includes aborted translations because of
-                       exceptions */
-    ti = profile_getclock();
-#endif
-    tcg_func_start(s);
+    return p;
+}
 
-    gen_intermediate_code(env, tb);
+/* Decode a signed leb128 sequence at *PP; increment *PP past the
+   decoded value.  Return the decoded value.  */
+static target_long decode_sleb128(uint8_t **pp)
+{
+    uint8_t *p = *pp;
+    target_long val = 0;
+    int byte, shift = 0;
 
-    trace_translate_block(tb, tb->pc, tb->tc_ptr);
+    do {
+        byte = *p++;
+        val |= (target_ulong)(byte & 0x7f) << shift;
+        shift += 7;
+    } while (byte & 0x80);
+    if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
+        val |= -(target_ulong)1 << shift;
+    }
 
-    /* generate machine code */
-    gen_code_buf = tb->tc_ptr;
-    tb->tb_next_offset[0] = 0xffff;
-    tb->tb_next_offset[1] = 0xffff;
-    s->tb_next_offset = tb->tb_next_offset;
-#ifdef USE_DIRECT_JUMP
-    s->tb_jmp_offset = tb->tb_jmp_offset;
-    s->tb_next = NULL;
-#else
-    s->tb_jmp_offset = NULL;
-    s->tb_next = tb->tb_next;
-#endif
+    *pp = p;
+    return val;
+}
 
-#ifdef CONFIG_PROFILER
-    s->tb_count++;
-    s->interm_time += profile_getclock() - ti;
-    s->code_time -= profile_getclock();
-#endif
-    gen_code_size = tcg_gen_code(s, gen_code_buf);
-    *gen_code_size_ptr = gen_code_size;
-#ifdef CONFIG_PROFILER
-    s->code_time += profile_getclock();
-    s->code_in_len += tb->size;
-    s->code_out_len += gen_code_size;
-#endif
+/* Encode the data collected about the instructions while compiling TB.
+   Place the data at BLOCK, and return the number of bytes consumed.
 
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
-        qemu_log("OUT: [size=%d]\n", gen_code_size);
-        log_disas(tb->tc_ptr, gen_code_size);
-        qemu_log("\n");
-        qemu_log_flush();
+   The logical table consisits of TARGET_INSN_START_WORDS target_ulong's,
+   which come from the target's insn_start data, followed by a uintptr_t
+   which comes from the host pc of the end of the code implementing the insn.
+
+   Each line of the table is encoded as sleb128 deltas from the previous
+   line.  The seed for the first line is { tb->pc, 0..., tb->tc_ptr }.
+   That is, the first column is seeded with the guest pc, the last column
+   with the host pc, and the middle columns with zeros.  */
+
+static int encode_search(TranslationBlock *tb, uint8_t *block)
+{
+    uint8_t *highwater = tcg_ctx.code_gen_highwater;
+    uint8_t *p = block;
+    int i, j, n;
+
+    tb->tc_search = block;
+
+    for (i = 0, n = tb->icount; i < n; ++i) {
+        target_ulong prev;
+
+        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
+            if (i == 0) {
+                prev = (j == 0 ? tb->pc : 0);
+            } else {
+                prev = tcg_ctx.gen_insn_data[i - 1][j];
+            }
+            p = encode_sleb128(p, tcg_ctx.gen_insn_data[i][j] - prev);
+        }
+        prev = (i == 0 ? 0 : tcg_ctx.gen_insn_end_off[i - 1]);
+        p = encode_sleb128(p, tcg_ctx.gen_insn_end_off[i] - prev);
+
+        /* Test for (pending) buffer overflow.  The assumption is that any
+           one row beginning below the high water mark cannot overrun
+           the buffer completely.  Thus we can test for overflow after
+           encoding a row without having to check during encoding.  */
+        if (unlikely(p > highwater)) {
+            return -1;
+        }
     }
-#endif
-    return 0;
+
+    return p - block;
 }
 
-/* The cpu state corresponding to 'searched_pc' is restored.
- */
+/* The cpu state corresponding to 'searched_pc' is restored.  */
 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                      uintptr_t searched_pc)
 {
+    target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
+    uintptr_t host_pc = (uintptr_t)tb->tc_ptr;
     CPUArchState *env = cpu->env_ptr;
-    TCGContext *s = &tcg_ctx;
-    int j;
-    uintptr_t tc_ptr;
+    uint8_t *p = tb->tc_search;
+    int i, j, num_insns = tb->icount;
 #ifdef CONFIG_PROFILER
-    int64_t ti;
+    int64_t ti = profile_getclock();
 #endif
 
-#ifdef CONFIG_PROFILER
-    ti = profile_getclock();
-#endif
-    tcg_func_start(s);
+    if (searched_pc < host_pc) {
+        return -1;
+    }
 
-    gen_intermediate_code_pc(env, tb);
+    /* Reconstruct the stored insn data while looking for the point at
+       which the end of the insn exceeds the searched_pc.  */
+    for (i = 0; i < num_insns; ++i) {
+        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
+            data[j] += decode_sleb128(&p);
+        }
+        host_pc += decode_sleb128(&p);
+        if (host_pc > searched_pc) {
+            goto found;
+        }
+    }
+    return -1;
 
+ found:
     if (tb->cflags & CF_USE_ICOUNT) {
         assert(use_icount);
         /* Reset the cycle counter to the start of the block.  */
-        cpu->icount_decr.u16.low += tb->icount;
+        cpu->icount_decr.u16.low += num_insns;
         /* Clear the IO flag.  */
         cpu->can_do_io = 0;
     }
-
-    /* find opc index corresponding to search_pc */
-    tc_ptr = (uintptr_t)tb->tc_ptr;
-    if (searched_pc < tc_ptr)
-        return -1;
-
-    s->tb_next_offset = tb->tb_next_offset;
-#ifdef USE_DIRECT_JUMP
-    s->tb_jmp_offset = tb->tb_jmp_offset;
-    s->tb_next = NULL;
-#else
-    s->tb_jmp_offset = NULL;
-    s->tb_next = tb->tb_next;
-#endif
-    j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
-                               searched_pc - tc_ptr);
-    if (j < 0)
-        return -1;
-    /* now find start of instruction before */
-    while (s->gen_opc_instr_start[j] == 0) {
-        j--;
-    }
-    cpu->icount_decr.u16.low -= s->gen_opc_icount[j];
-
-    restore_state_to_opc(env, tb, j);
+    cpu->icount_decr.u16.low -= i;
+    restore_state_to_opc(env, tb, data);
 
 #ifdef CONFIG_PROFILER
-    s->restore_time += profile_getclock() - ti;
-    s->restore_count++;
+    tcg_ctx.restore_time += profile_getclock() - ti;
+    tcg_ctx.restore_count++;
 #endif
     return 0;
 }
@@ -311,31 +321,6 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
     return false;
 }
 
-#ifdef _WIN32
-static __attribute__((unused)) void map_exec(void *addr, long size)
-{
-    DWORD old_protect;
-    VirtualProtect(addr, size,
-                   PAGE_EXECUTE_READWRITE, &old_protect);
-}
-#else
-static __attribute__((unused)) void map_exec(void *addr, long size)
-{
-    unsigned long start, end, page_size;
-
-    page_size = getpagesize();
-    start = (unsigned long)addr;
-    start &= ~(page_size - 1);
-
-    end = (unsigned long)addr + size;
-    end += page_size - 1;
-    end &= ~(page_size - 1);
-
-    mprotect((void *)start, end - start,
-             PROT_READ | PROT_WRITE | PROT_EXEC);
-}
-#endif
-
 void page_size_init(void)
 {
     /* NOTE: we can always suppose that qemu_host_page_size >=
@@ -472,14 +457,6 @@ static inline PageDesc *page_find(tb_page_addr_t index)
 #define USE_STATIC_CODE_GEN_BUFFER
 #endif
 
-/* ??? Should configure for this, not list operating systems here.  */
-#if (defined(__linux__) \
-    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__))
-# define USE_MMAP
-#endif
-
 /* Minimum size of the code gen buffer.  This number is randomly chosen,
    but not so small that we can't have a fair number of TB's live.  */
 #define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
@@ -567,22 +544,102 @@ static inline void *split_cross_256mb(void *buf1, size_t size1)
 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
     __attribute__((aligned(CODE_GEN_ALIGN)));
 
+# ifdef _WIN32
+static inline void do_protect(void *addr, long size, int prot)
+{
+    DWORD old_protect;
+    VirtualProtect(addr, size, prot, &old_protect);
+}
+
+static inline void map_exec(void *addr, long size)
+{
+    do_protect(addr, size, PAGE_EXECUTE_READWRITE);
+}
+
+static inline void map_none(void *addr, long size)
+{
+    do_protect(addr, size, PAGE_NOACCESS);
+}
+# else
+static inline void do_protect(void *addr, long size, int prot)
+{
+    uintptr_t start, end;
+
+    start = (uintptr_t)addr;
+    start &= qemu_real_host_page_mask;
+
+    end = (uintptr_t)addr + size;
+    end = ROUND_UP(end, qemu_real_host_page_size);
+
+    mprotect((void *)start, end - start, prot);
+}
+
+static inline void map_exec(void *addr, long size)
+{
+    do_protect(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
+}
+
+static inline void map_none(void *addr, long size)
+{
+    do_protect(addr, size, PROT_NONE);
+}
+# endif /* WIN32 */
+
 static inline void *alloc_code_gen_buffer(void)
 {
     void *buf = static_code_gen_buffer;
+    size_t full_size, size;
+
+    /* The size of the buffer, rounded down to end on a page boundary.  */
+    full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
+                 & qemu_real_host_page_mask) - (uintptr_t)buf;
+
+    /* Reserve a guard page.  */
+    size = full_size - qemu_real_host_page_size;
+
+    /* Honor a command-line option limiting the size of the buffer.  */
+    if (size > tcg_ctx.code_gen_buffer_size) {
+        size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size)
+                & qemu_real_host_page_mask) - (uintptr_t)buf;
+    }
+    tcg_ctx.code_gen_buffer_size = size;
+
 #ifdef __mips__
-    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
-        buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
+    if (cross_256mb(buf, size)) {
+        buf = split_cross_256mb(buf, size);
+        size = tcg_ctx.code_gen_buffer_size;
     }
 #endif
-    map_exec(buf, tcg_ctx.code_gen_buffer_size);
+
+    map_exec(buf, size);
+    map_none(buf + size, qemu_real_host_page_size);
+    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
+
     return buf;
 }
-#elif defined(USE_MMAP)
+#elif defined(_WIN32)
+static inline void *alloc_code_gen_buffer(void)
+{
+    size_t size = tcg_ctx.code_gen_buffer_size;
+    void *buf1, *buf2;
+
+    /* Perform the allocation in two steps, so that the guard page
+       is reserved but uncommitted.  */
+    buf1 = VirtualAlloc(NULL, size + qemu_real_host_page_size,
+                        MEM_RESERVE, PAGE_NOACCESS);
+    if (buf1 != NULL) {
+        buf2 = VirtualAlloc(buf1, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+        assert(buf1 == buf2);
+    }
+
+    return buf1;
+}
+#else
 static inline void *alloc_code_gen_buffer(void)
 {
     int flags = MAP_PRIVATE | MAP_ANONYMOUS;
     uintptr_t start = 0;
+    size_t size = tcg_ctx.code_gen_buffer_size;
     void *buf;
 
     /* Constrain the position of the buffer based on the host cpu.
@@ -598,86 +655,70 @@ static inline void *alloc_code_gen_buffer(void)
        Leave the choice of exact location with the kernel.  */
     flags |= MAP_32BIT;
     /* Cannot expect to map more than 800MB in low memory.  */
-    if (tcg_ctx.code_gen_buffer_size > 800u * 1024 * 1024) {
-        tcg_ctx.code_gen_buffer_size = 800u * 1024 * 1024;
+    if (size > 800u * 1024 * 1024) {
+        tcg_ctx.code_gen_buffer_size = size = 800u * 1024 * 1024;
     }
 # elif defined(__sparc__)
     start = 0x40000000ul;
 # elif defined(__s390x__)
     start = 0x90000000ul;
 # elif defined(__mips__)
-    /* ??? We ought to more explicitly manage layout for softmmu too.  */
-#  ifdef CONFIG_USER_ONLY
-    start = 0x68000000ul;
-#  elif _MIPS_SIM == _ABI64
+#  if _MIPS_SIM == _ABI64
     start = 0x128000000ul;
 #  else
     start = 0x08000000ul;
 #  endif
 # endif
 
-    buf = mmap((void *)start, tcg_ctx.code_gen_buffer_size,
-               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+    buf = mmap((void *)start, size + qemu_real_host_page_size,
+               PROT_NONE, flags, -1, 0);
     if (buf == MAP_FAILED) {
         return NULL;
     }
 
 #ifdef __mips__
-    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
+    if (cross_256mb(buf, size)) {
         /* Try again, with the original still mapped, to avoid re-acquiring
            that 256mb crossing.  This time don't specify an address.  */
-        size_t size2, size1 = tcg_ctx.code_gen_buffer_size;
-        void *buf2 = mmap(NULL, size1, PROT_WRITE | PROT_READ | PROT_EXEC,
-                          flags, -1, 0);
-        if (buf2 != MAP_FAILED) {
-            if (!cross_256mb(buf2, size1)) {
+        size_t size2;
+        void *buf2 = mmap(NULL, size + qemu_real_host_page_size,
+                          PROT_NONE, flags, -1, 0);
+        switch (buf2 != MAP_FAILED) {
+        case 1:
+            if (!cross_256mb(buf2, size)) {
                 /* Success!  Use the new buffer.  */
-                munmap(buf, size1);
-                return buf2;
+                munmap(buf, size);
+                break;
             }
             /* Failure.  Work with what we had.  */
-            munmap(buf2, size1);
+            munmap(buf2, size);
+            /* fallthru */
+        default:
+            /* Split the original buffer.  Free the smaller half.  */
+            buf2 = split_cross_256mb(buf, size);
+            size2 = tcg_ctx.code_gen_buffer_size;
+            if (buf == buf2) {
+                munmap(buf + size2 + qemu_real_host_page_size, size - size2);
+            } else {
+                munmap(buf, size - size2);
+            }
+            size = size2;
+            break;
         }
-
-        /* Split the original buffer.  Free the smaller half.  */
-        buf2 = split_cross_256mb(buf, size1);
-        size2 = tcg_ctx.code_gen_buffer_size;
-        munmap(buf + (buf == buf2 ? size2 : 0), size1 - size2);
-        return buf2;
+        buf = buf2;
     }
 #endif
 
-    return buf;
-}
-#else
-static inline void *alloc_code_gen_buffer(void)
-{
-    void *buf = g_try_malloc(tcg_ctx.code_gen_buffer_size);
+    /* Make the final buffer accessible.  The guard page at the end
+       will remain inaccessible with PROT_NONE.  */
+    mprotect(buf, size, PROT_WRITE | PROT_READ | PROT_EXEC);
 
-    if (buf == NULL) {
-        return NULL;
-    }
+    /* Request large pages for the buffer.  */
+    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
 
-#ifdef __mips__
-    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
-        void *buf2 = g_malloc(tcg_ctx.code_gen_buffer_size);
-        if (buf2 != NULL && !cross_256mb(buf2, size1)) {
-            /* Success!  Use the new buffer.  */
-            free(buf);
-            buf = buf2;
-        } else {
-            /* Failure.  Work with what we had.  Since this is malloc
-               and not mmap, we can't free the other half.  */
-            free(buf2);
-            buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
-        }
-    }
-#endif
-
-    map_exec(buf, tcg_ctx.code_gen_buffer_size);
     return buf;
 }
-#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
 
 static inline void code_gen_alloc(size_t tb_size)
 {
@@ -688,24 +729,13 @@ static inline void code_gen_alloc(size_t tb_size)
         exit(1);
     }
 
-    qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size,
-            QEMU_MADV_HUGEPAGE);
-
-    /* Steal room for the prologue at the end of the buffer.  This ensures
-       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
-       from TB's to the prologue are going to be in range.  It also means
-       that we don't need to mark (additional) portions of the data segment
-       as executable.  */
-    tcg_ctx.code_gen_prologue = tcg_ctx.code_gen_buffer +
-            tcg_ctx.code_gen_buffer_size - 1024;
-    tcg_ctx.code_gen_buffer_size -= 1024;
-
-    tcg_ctx.code_gen_buffer_max_size = tcg_ctx.code_gen_buffer_size -
-        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
-    tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
-            CODE_GEN_AVG_BLOCK_SIZE;
-    tcg_ctx.tb_ctx.tbs =
-            g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
+    /* Estimate a good size for the number of TBs we can support.  We
+       still haven't deducted the prologue from the buffer size here,
+       but that's minimal and won't affect the estimate much.  */
+    tcg_ctx.code_gen_max_blocks
+        = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
+    tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks);
+
     qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
 }
 
@@ -715,10 +745,8 @@ static inline void code_gen_alloc(size_t tb_size)
 void tcg_exec_init(unsigned long tb_size)
 {
     cpu_gen_init();
-    code_gen_alloc(tb_size);
-    tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
-    tcg_register_jit(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size);
     page_init();
+    code_gen_alloc(tb_size);
 #if defined(CONFIG_SOFTMMU)
     /* There's no guest base to take into account, so go ahead and
        initialize the prologue now.  */
@@ -737,9 +765,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
 {
     TranslationBlock *tb;
 
-    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
-        (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
-         tcg_ctx.code_gen_buffer_max_size) {
+    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
         return NULL;
     }
     tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
@@ -1034,28 +1060,98 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     TranslationBlock *tb;
     tb_page_addr_t phys_pc, phys_page2;
     target_ulong virt_page2;
-    int code_gen_size;
+    tcg_insn_unit *gen_code_buf;
+    int gen_code_size, search_size;
+#ifdef CONFIG_PROFILER
+    int64_t ti;
+#endif
 
     phys_pc = get_page_addr_code(env, pc);
     if (use_icount) {
         cflags |= CF_USE_ICOUNT;
     }
+
     tb = tb_alloc(pc);
-    if (!tb) {
+    if (unlikely(!tb)) {
+ buffer_overflow:
         /* flush must be done */
         tb_flush(cpu);
         /* cannot fail at this point */
         tb = tb_alloc(pc);
+        assert(tb != NULL);
         /* Don't forget to invalidate previous TB info.  */
         tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
     }
-    tb->tc_ptr = tcg_ctx.code_gen_ptr;
+
+    gen_code_buf = tcg_ctx.code_gen_ptr;
+    tb->tc_ptr = gen_code_buf;
     tb->cs_base = cs_base;
     tb->flags = flags;
     tb->cflags = cflags;
-    cpu_gen_code(env, tb, &code_gen_size);
-    tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)tcg_ctx.code_gen_ptr +
-            code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+
+#ifdef CONFIG_PROFILER
+    tcg_ctx.tb_count1++; /* includes aborted translations because of
+                       exceptions */
+    ti = profile_getclock();
+#endif
+
+    tcg_func_start(&tcg_ctx);
+
+    gen_intermediate_code(env, tb);
+
+    trace_translate_block(tb, tb->pc, tb->tc_ptr);
+
+    /* generate machine code */
+    tb->tb_next_offset[0] = 0xffff;
+    tb->tb_next_offset[1] = 0xffff;
+    tcg_ctx.tb_next_offset = tb->tb_next_offset;
+#ifdef USE_DIRECT_JUMP
+    tcg_ctx.tb_jmp_offset = tb->tb_jmp_offset;
+    tcg_ctx.tb_next = NULL;
+#else
+    tcg_ctx.tb_jmp_offset = NULL;
+    tcg_ctx.tb_next = tb->tb_next;
+#endif
+
+#ifdef CONFIG_PROFILER
+    tcg_ctx.tb_count++;
+    tcg_ctx.interm_time += profile_getclock() - ti;
+    tcg_ctx.code_time -= profile_getclock();
+#endif
+
+    /* ??? Overflow could be handled better here.  In particular, we
+       don't need to re-do gen_intermediate_code, nor should we re-do
+       the tcg optimization currently hidden inside tcg_gen_code.  All
+       that should be required is to flush the TBs, allocate a new TB,
+       re-initialize it per above, and re-do the actual code generation.  */
+    gen_code_size = tcg_gen_code(&tcg_ctx, gen_code_buf);
+    if (unlikely(gen_code_size < 0)) {
+        goto buffer_overflow;
+    }
+    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
+    if (unlikely(search_size < 0)) {
+        goto buffer_overflow;
+    }
+
+#ifdef CONFIG_PROFILER
+    tcg_ctx.code_time += profile_getclock();
+    tcg_ctx.code_in_len += tb->size;
+    tcg_ctx.code_out_len += gen_code_size;
+    tcg_ctx.search_out_len += search_size;
+#endif
+
+#ifdef DEBUG_DISAS
+    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
+        qemu_log("OUT: [size=%d]\n", gen_code_size);
+        log_disas(tb->tc_ptr, gen_code_size);
+        qemu_log("\n");
+        qemu_log_flush();
+    }
+#endif
+
+    tcg_ctx.code_gen_ptr = (void *)
+        ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
+                 CODE_GEN_ALIGN);
 
     /* check next page if needed */
     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
@@ -1606,7 +1702,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     cpu_fprintf(f, "Translation buffer state:\n");
     cpu_fprintf(f, "gen code size       %td/%zd\n",
                 tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
-                tcg_ctx.code_gen_buffer_max_size);
+                tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
     cpu_fprintf(f, "TB count            %d/%d\n",
             tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
     cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
diff --git a/vl.c b/vl.c
index 8d1846c06c..f2bd8d20fb 100644
--- a/vl.c
+++ b/vl.c
@@ -1223,7 +1223,13 @@ static void smp_parse(QemuOpts *opts)
             exit(1);
         }
 
-        max_cpus = qemu_opt_get_number(opts, "maxcpus", 0);
+        max_cpus = qemu_opt_get_number(opts, "maxcpus", cpus);
+        if (sockets * cores * threads > max_cpus) {
+            fprintf(stderr, "cpu topology: error: "
+                    "sockets (%u) * cores (%u) * threads (%u) > maxcpus (%u)\n",
+                    sockets, cores, threads, max_cpus);
+            exit(1);
+        }
 
         smp_cpus = cpus;
         smp_cores = cores > 0 ? cores : 1;