summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.travis.yml1
-rw-r--r--MAINTAINERS1
-rw-r--r--Makefile.objs2
-rw-r--r--arch_init.c52
-rw-r--r--block/commit.c8
-rw-r--r--block/iscsi.c16
-rw-r--r--block/mirror.c78
-rw-r--r--block/sheepdog.c7
-rw-r--r--block/vhdx-log.c13
-rw-r--r--block/vhdx.c22
-rw-r--r--block/vhdx.h5
-rw-r--r--block/vmdk.c173
-rw-r--r--blockdev.c9
-rwxr-xr-xconfigure384
-rw-r--r--cpu-exec.c50
-rw-r--r--cpus.c5
-rw-r--r--cputlb.c32
-rw-r--r--default-configs/aarch64-linux-user.mak3
-rw-r--r--exec.c76
-rw-r--r--fpu/softfloat.c1055
-rw-r--r--hw/acpi/core.c18
-rw-r--r--hw/acpi/ich9.c24
-rw-r--r--hw/acpi/piix4.c33
-rw-r--r--hw/alpha/typhoon.c2
-rw-r--r--hw/arm/highbank.c7
-rw-r--r--hw/arm/mainstone.c15
-rw-r--r--hw/arm/versatilepb.c1
-rw-r--r--hw/arm/xilinx_zynq.c17
-rw-r--r--hw/arm/z2.c2
-rw-r--r--hw/audio/intel-hda.c4
-rw-r--r--hw/audio/marvell_88w8618.c2
-rw-r--r--hw/audio/pcspk.c3
-rw-r--r--hw/audio/pl041.c1
-rw-r--r--hw/block/dataplane/virtio-blk.c86
-rw-r--r--hw/block/fdc.c1
-rw-r--r--hw/char/cadence_uart.c153
-rw-r--r--hw/char/exynos4210_uart.c6
-rw-r--r--hw/core/qdev.c54
-rw-r--r--hw/core/sysbus.c7
-rw-r--r--hw/cpu/icc_bus.c14
-rw-r--r--hw/display/pl110.c1
-rw-r--r--hw/dma/pl080.c1
-rw-r--r--hw/dma/sparc32_dma.c2
-rw-r--r--hw/gpio/omap_gpio.c4
-rw-r--r--hw/i2c/omap_i2c.c2
-rw-r--r--hw/i2c/smbus_eeprom.c2
-rw-r--r--hw/i2c/smbus_ich9.c6
-rw-r--r--hw/i386/acpi-dsdt-cpu-hotplug.dsl1
-rw-r--r--hw/i386/kvm/apic.c14
-rw-r--r--hw/i386/kvm/clock.c1
-rw-r--r--hw/i386/kvm/ioapic.c8
-rw-r--r--hw/i386/kvmvapic.c9
-rw-r--r--hw/i386/pc.c24
-rw-r--r--hw/i386/pc_piix.c19
-rw-r--r--hw/i386/pc_sysfw.c105
-rw-r--r--hw/i386/q35-acpi-dsdt.dsl4
-rw-r--r--hw/ide/piix.c3
-rw-r--r--hw/ide/via.c1
-rw-r--r--hw/input/pckbd.c1
-rw-r--r--hw/input/pxa2xx_keypad.c6
-rw-r--r--hw/input/vmmouse.c3
-rw-r--r--hw/intc/apic.c48
-rw-r--r--hw/intc/apic_common.c79
-rw-r--r--hw/intc/arm_gic.c28
-rw-r--r--hw/intc/arm_gic_common.c5
-rw-r--r--hw/intc/arm_gic_kvm.c1
-rw-r--r--hw/intc/etraxfs_pic.c4
-rw-r--r--hw/intc/gic_internal.h7
-rw-r--r--hw/intc/grlib_irqmp.c2
-rw-r--r--hw/intc/i8259_common.c8
-rw-r--r--hw/intc/ioapic.c13
-rw-r--r--hw/intc/ioapic_common.c16
-rw-r--r--hw/intc/omap_intc.c4
-rw-r--r--hw/intc/pl190.c1
-rw-r--r--hw/isa/isa-bus.c1
-rw-r--r--hw/isa/lpc_ich9.c7
-rw-r--r--hw/isa/piix4.c6
-rw-r--r--hw/isa/vt82c686.c6
-rw-r--r--hw/microblaze/Makefile.objs1
-rw-r--r--hw/microblaze/petalogix_ml605_mmu.c9
-rw-r--r--hw/microblaze/petalogix_s3adsp1800_mmu.c9
-rw-r--r--hw/microblaze/pic_cpu.c47
-rw-r--r--hw/microblaze/pic_cpu.h8
-rw-r--r--hw/mips/gt64xxx_pci.c6
-rw-r--r--hw/misc/arm_l2x0.c1
-rw-r--r--hw/misc/exynos4210_pmu.c3
-rw-r--r--hw/misc/vmport.c3
-rw-r--r--hw/net/etraxfs_eth.c2
-rw-r--r--hw/net/lance.c2
-rw-r--r--hw/nvram/fw_cfg.c1
-rw-r--r--hw/pci-bridge/dec.c6
-rw-r--r--hw/pci-host/apb.c6
-rw-r--r--hw/pci-host/bonito.c8
-rw-r--r--hw/pci-host/grackle.c8
-rw-r--r--hw/pci-host/piix.c30
-rw-r--r--hw/pci-host/ppce500.c5
-rw-r--r--hw/pci-host/prep.c7
-rw-r--r--hw/pci-host/q35.c5
-rw-r--r--hw/pci-host/uninorth.c24
-rw-r--r--hw/pci-host/versatile.c6
-rw-r--r--hw/pci/pci.c42
-rw-r--r--hw/pci/pci_bridge.c4
-rw-r--r--hw/ppc/ppc4xx_pci.c5
-rw-r--r--hw/ppc/spapr_vio.c2
-rw-r--r--hw/s390x/ipl.c1
-rw-r--r--hw/s390x/s390-virtio-bus.c2
-rw-r--r--hw/s390x/virtio-ccw.c7
-rw-r--r--hw/scsi/scsi-disk.c20
-rw-r--r--hw/sd/pl181.c1
-rw-r--r--hw/sh4/sh_pci.c6
-rw-r--r--hw/timer/arm_mptimer.c1
-rw-r--r--hw/timer/hpet.c1
-rw-r--r--hw/timer/i8254_common.c7
-rw-r--r--hw/timer/m48t59.c3
-rw-r--r--hw/timer/mc146818rtc.c3
-rw-r--r--hw/timer/pl031.c1
-rw-r--r--hw/virtio/dataplane/Makefile.objs2
-rw-r--r--hw/virtio/dataplane/hostmem.c183
-rw-r--r--hw/virtio/dataplane/vring.c253
-rw-r--r--hw/virtio/virtio.c2
-rw-r--r--hw/xen/xen_apic.c6
-rw-r--r--include/block/block_int.h22
-rw-r--r--include/exec/cpu-all.h3
-rw-r--r--include/exec/memory-internal.h90
-rw-r--r--include/exec/memory.h12
-rw-r--r--include/exec/ram_addr.h147
-rw-r--r--include/fpu/softfloat.h96
-rw-r--r--include/hw/acpi/acpi.h8
-rw-r--r--include/hw/arm/pxa.h8
-rw-r--r--include/hw/cpu/icc_bus.h2
-rw-r--r--include/hw/i386/apic_internal.h2
-rw-r--r--include/hw/i386/ioapic_internal.h3
-rw-r--r--include/hw/i386/pc.h1
-rw-r--r--include/hw/intc/arm_gic_common.h2
-rw-r--r--include/hw/pci/pci.h1
-rw-r--r--include/hw/qdev-core.h30
-rw-r--r--include/hw/qdev-properties.h17
-rw-r--r--include/hw/virtio/dataplane/hostmem.h58
-rw-r--r--include/hw/virtio/dataplane/vring.h10
-rw-r--r--include/migration/migration.h11
-rw-r--r--include/migration/qemu-file.h4
-rw-r--r--include/qemu/bitmap.h86
-rw-r--r--include/qemu/bitops.h14
-rw-r--r--include/qom/object.h4
-rw-r--r--kvm-all.c28
-rw-r--r--linux-user/aarch64/syscall.h1
-rw-r--r--linux-user/aarch64/target_cpu.h5
-rw-r--r--linux-user/arm/target_cpu.h2
-rw-r--r--linux-user/elfload.c3
-rw-r--r--linux-user/flatload.c3
-rw-r--r--linux-user/linuxload.c4
-rw-r--r--linux-user/main.c154
-rw-r--r--linux-user/qemu.h6
-rw-r--r--linux-user/signal.c18
-rw-r--r--linux-user/syscall.c16
-rw-r--r--linux-user/syscall_defs.h1
-rw-r--r--memory.c17
-rw-r--r--migration.c33
-rw-r--r--net/net.c2
-rw-r--r--qapi-schema.json7
-rw-r--r--qdev-monitor.c15
-rw-r--r--qemu-char.c3
-rw-r--r--qemu-doc.texi15
-rw-r--r--qemu-file.c826
-rw-r--r--qemu-img.texi4
-rw-r--r--qemu-options.hx2
-rw-r--r--qemu-seccomp.c1
-rw-r--r--qom/cpu.c6
-rw-r--r--qom/object.c50
-rw-r--r--savevm.c1512
-rw-r--r--target-arm/cpu.c9
-rw-r--r--target-arm/cpu.h122
-rw-r--r--target-arm/cpu64.c9
-rw-r--r--target-arm/helper-a64.c45
-rw-r--r--target-arm/helper-a64.h4
-rw-r--r--target-arm/helper.c452
-rw-r--r--target-arm/helper.h40
-rw-r--r--target-arm/kvm-consts.h37
-rw-r--r--target-arm/machine.c12
-rw-r--r--target-arm/neon_helper.c12
-rw-r--r--target-arm/translate-a64.c2790
-rw-r--r--target-arm/translate.c112
-rw-r--r--target-arm/translate.h2
-rw-r--r--target-i386/cpu-qom.h6
-rw-r--r--target-i386/cpu.c108
-rw-r--r--target-i386/cpu.h4
-rw-r--r--target-i386/helper.c18
-rw-r--r--target-i386/kvm.c23
-rw-r--r--target-i386/misc_helper.c8
-rw-r--r--target-i386/translate.c2645
-rw-r--r--target-microblaze/cpu.c21
-rw-r--r--target-microblaze/cpu.h4
-rw-r--r--target-openrisc/translate.c2
-rw-r--r--tests/.gitignore1
-rw-r--r--tests/Makefile10
-rw-r--r--tests/acpi-test.c5
-rw-r--r--tests/check-qom-interface.c105
-rw-r--r--tests/i440fx-test.c167
-rwxr-xr-xtests/qemu-iotests/04074
-rw-r--r--tests/qemu-iotests/051.out1
-rwxr-xr-xtests/qemu-iotests/05914
-rw-r--r--tests/qemu-iotests/059.out5
-rw-r--r--tests/test-vmstate.c357
-rw-r--r--translate-all.c5
-rw-r--r--ui/cocoa.m103
-rw-r--r--util/bitmap.c60
-rw-r--r--vl.c5
-rw-r--r--vmstate.c650
208 files changed, 9767 insertions, 5289 deletions
diff --git a/.travis.yml b/.travis.yml
index 90f167630a..c7ff4da29c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,6 +16,7 @@ env:
   matrix:
   - TARGETS=alpha-softmmu,alpha-linux-user
   - TARGETS=arm-softmmu,arm-linux-user
+  - TARGETS=aarch64-softmmu,aarch64-linux-user
   - TARGETS=cris-softmmu
   - TARGETS=i386-softmmu,x86_64-softmmu
   - TARGETS=lm32-softmmu
diff --git a/MAINTAINERS b/MAINTAINERS
index a5ab8f8cea..fb5324285c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -714,6 +714,7 @@ F: ui/
 
 Cocoa graphics
 M: Andreas Färber <andreas.faerber@web.de>
+M: Peter Maydell <peter.maydell@linaro.org>
 S: Odd Fixes
 F: ui/cocoa.m
 
diff --git a/Makefile.objs b/Makefile.objs
index 2b6c1fe2a8..857bb53ae4 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -51,6 +51,8 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 common-obj-$(CONFIG_LINUX) += fsdev/
 
 common-obj-y += migration.o migration-tcp.o
+common-obj-y += vmstate.o
+common-obj-y += qemu-file.o
 common-obj-$(CONFIG_RDMA) += migration-rdma.o
 common-obj-y += qemu-char.o #aio.o
 common-obj-y += block-migration.o
diff --git a/arch_init.c b/arch_init.c
index e0acbc5661..77912e7a7d 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -48,7 +48,9 @@
 #include "qmp-commands.h"
 #include "trace.h"
 #include "exec/cpu-all.h"
+#include "exec/ram_addr.h"
 #include "hw/acpi/acpi.h"
+#include "qemu/host-utils.h"
 
 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
@@ -359,11 +361,10 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
     return (next - base) << TARGET_PAGE_BITS;
 }
 
-static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
-                                              ram_addr_t offset)
+static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
 {
     bool ret;
-    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+    int nr = addr >> TARGET_PAGE_BITS;
 
     ret = test_and_set_bit(nr, migration_bitmap);
 
@@ -373,12 +374,47 @@ static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
     return ret;
 }
 
+static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
+{
+    ram_addr_t addr;
+    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
+
+    /* start address is aligned at the start of a word? */
+    if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
+        int k;
+        int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
+        unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION];
+
+        for (k = page; k < page + nr; k++) {
+            if (src[k]) {
+                unsigned long new_dirty;
+                new_dirty = ~migration_bitmap[k];
+                migration_bitmap[k] |= src[k];
+                new_dirty &= src[k];
+                migration_dirty_pages += ctpopl(new_dirty);
+                src[k] = 0;
+            }
+        }
+    } else {
+        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
+            if (cpu_physical_memory_get_dirty(start + addr,
+                                              TARGET_PAGE_SIZE,
+                                              DIRTY_MEMORY_MIGRATION)) {
+                cpu_physical_memory_reset_dirty(start + addr,
+                                                TARGET_PAGE_SIZE,
+                                                DIRTY_MEMORY_MIGRATION);
+                migration_bitmap_set_dirty(start + addr);
+            }
+        }
+    }
+}
+
+
 /* Needs iothread lock! */
 
 static void migration_bitmap_sync(void)
 {
     RAMBlock *block;
-    ram_addr_t addr;
     uint64_t num_dirty_pages_init = migration_dirty_pages;
     MigrationState *s = migrate_get_current();
     static int64_t start_time;
@@ -399,13 +435,7 @@ static void migration_bitmap_sync(void)
     address_space_sync_dirty_bitmap(&address_space_memory);
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
-            if (memory_region_test_and_clear_dirty(block->mr,
-                                                   addr, TARGET_PAGE_SIZE,
-                                                   DIRTY_MEMORY_MIGRATION)) {
-                migration_bitmap_set_dirty(block->mr, addr);
-            }
-        }
+        migration_bitmap_sync_range(block->mr->ram_addr, block->length);
     }
     trace_migration_bitmap_sync_end(migration_dirty_pages
                                     - num_dirty_pages_init);
diff --git a/block/commit.c b/block/commit.c
index d4090cbf7d..acec4ac5a8 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -198,13 +198,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
         return;
     }
 
-    /* Once we support top == active layer, remove this check */
-    if (top == bs) {
-        error_setg(errp,
-                   "Top image as the active layer is currently unsupported");
-        return;
-    }
-
+    assert(top != bs);
     if (top == base) {
         error_setg(errp, "Invalid files for merge: top and base are the same");
         return;
diff --git a/block/iscsi.c b/block/iscsi.c
index 02eba5d14f..c0ea0c4543 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -68,6 +68,7 @@ typedef struct IscsiTask {
     int do_retry;
     struct scsi_task *task;
     Coroutine *co;
+    QEMUBH *bh;
 } IscsiTask;
 
 typedef struct IscsiAIOCB {
@@ -123,6 +124,13 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
     qemu_bh_schedule(acb->bh);
 }
 
+static void iscsi_co_generic_bh_cb(void *opaque)
+{
+    struct IscsiTask *iTask = opaque;
+    qemu_bh_delete(iTask->bh);
+    qemu_coroutine_enter(iTask->co, NULL);
+}
+
 static void
 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                         void *command_data, void *opaque)
@@ -147,7 +155,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 
 out:
     if (iTask->co) {
-        qemu_coroutine_enter(iTask->co, NULL);
+        iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask);
+        qemu_bh_schedule(iTask->bh);
     }
 }
 
@@ -359,7 +368,10 @@ retry:
     default:
         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
                                        num_sectors * iscsilun->block_size,
-                                       iscsilun->block_size, 0, 0, 0, 0, 0,
+                                       iscsilun->block_size,
+#if !defined(CONFIG_LIBISCSI_1_4) /* API change from 1.4.0 to 1.5.0 */
+                                       0, 0, 0, 0, 0,
+#endif
                                        iscsi_co_generic_cb, &iTask);
         break;
     }
diff --git a/block/mirror.c b/block/mirror.c
index 6dc27ad35d..2932bab27a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -31,7 +31,8 @@ typedef struct MirrorBlockJob {
     BlockJob common;
     RateLimit limit;
     BlockDriverState *target;
-    MirrorSyncMode mode;
+    BlockDriverState *base;
+    bool is_none_mode;
     BlockdevOnError on_source_error, on_target_error;
     bool synced;
     bool should_complete;
@@ -335,10 +336,9 @@ static void coroutine_fn mirror_run(void *opaque)
     sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
     mirror_free_init(s);
 
-    if (s->mode != MIRROR_SYNC_MODE_NONE) {
+    if (!s->is_none_mode) {
         /* First part, loop on the sectors and initialize the dirty bitmap.  */
-        BlockDriverState *base;
-        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
+        BlockDriverState *base = s->base;
         for (sector_num = 0; sector_num < end; ) {
             int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
             ret = bdrv_is_allocated_above(bs, base,
@@ -481,8 +481,14 @@ immediate_exit:
             bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
         }
         bdrv_swap(s->target, s->common.bs);
+        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
+            /* drop the bs loop chain formed by the swap: break the loop then
+             * trigger the unref from the top one */
+            BlockDriverState *p = s->base->backing_hd;
+            s->base->backing_hd = NULL;
+            bdrv_unref(p);
+        }
     }
-    bdrv_close(s->target);
     bdrv_unref(s->target);
     block_job_completed(&s->common, ret);
 }
@@ -536,12 +542,24 @@ static const BlockJobDriver mirror_job_driver = {
     .complete      = mirror_complete,
 };
 
-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, int64_t granularity, int64_t buf_size,
-                  MirrorSyncMode mode, BlockdevOnError on_source_error,
-                  BlockdevOnError on_target_error,
-                  BlockDriverCompletionFunc *cb,
-                  void *opaque, Error **errp)
+static const BlockJobDriver commit_active_job_driver = {
+    .instance_size = sizeof(MirrorBlockJob),
+    .job_type      = BLOCK_JOB_TYPE_COMMIT,
+    .set_speed     = mirror_set_speed,
+    .iostatus_reset
+                   = mirror_iostatus_reset,
+    .complete      = mirror_complete,
+};
+
+static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
+                            int64_t speed, int64_t granularity,
+                            int64_t buf_size,
+                            BlockdevOnError on_source_error,
+                            BlockdevOnError on_target_error,
+                            BlockDriverCompletionFunc *cb,
+                            void *opaque, Error **errp,
+                            const BlockJobDriver *driver,
+                            bool is_none_mode, BlockDriverState *base)
 {
     MirrorBlockJob *s;
 
@@ -566,7 +584,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
-    s = block_job_create(&mirror_job_driver, bs, speed, cb, opaque, errp);
+
+    s = block_job_create(driver, bs, speed, cb, opaque, errp);
     if (!s) {
         return;
     }
@@ -574,7 +593,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
     s->target = target;
-    s->mode = mode;
+    s->is_none_mode = is_none_mode;
+    s->base = base;
     s->granularity = granularity;
     s->buf_size = MAX(buf_size, granularity);
 
@@ -586,3 +606,35 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     trace_mirror_start(bs, s, s->common.co, opaque);
     qemu_coroutine_enter(s->common.co, s);
 }
+
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  int64_t speed, int64_t granularity, int64_t buf_size,
+                  MirrorSyncMode mode, BlockdevOnError on_source_error,
+                  BlockdevOnError on_target_error,
+                  BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
+{
+    bool is_none_mode;
+    BlockDriverState *base;
+
+    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
+    base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
+    mirror_start_job(bs, target, speed, granularity, buf_size,
+                     on_source_error, on_target_error, cb, opaque, errp,
+                     &mirror_job_driver, is_none_mode, base);
+}
+
+void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
+                         int64_t speed,
+                         BlockdevOnError on_error,
+                         BlockDriverCompletionFunc *cb,
+                         void *opaque, Error **errp)
+{
+    if (bdrv_reopen(base, bs->open_flags, errp)) {
+        return;
+    }
+    bdrv_ref(base);
+    mirror_start_job(bs, base, speed, 0, 0,
+                     on_error, on_error, cb, opaque, errp,
+                     &commit_active_job_driver, false, base);
+}
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 5ce0658111..b94ab6e10a 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -2048,13 +2048,14 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
 {
     SheepdogAIOCB *acb;
     int ret;
+    int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
+    BDRVSheepdogState *s = bs->opaque;
 
-    if (bs->growable && sector_num + nb_sectors > bs->total_sectors) {
-        ret = sd_truncate(bs, (sector_num + nb_sectors) * BDRV_SECTOR_SIZE);
+    if (bs->growable && offset > s->inode.vdi_size) {
+        ret = sd_truncate(bs, offset);
         if (ret < 0) {
             return ret;
         }
-        bs->total_sectors = sector_num + nb_sectors;
     }
 
     acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index ee5583c309..8c9ae0d8e7 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -706,7 +706,8 @@ exit:
  *
  * If read-only, we must replay the log in RAM (or refuse to open
  * a dirty VHDX file read-only) */
-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed)
+int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
+                   Error **errp)
 {
     int ret = 0;
     VHDXHeader *hdr;
@@ -761,6 +762,16 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed)
     }
 
     if (logs.valid) {
+        if (bs->read_only) {
+            ret = -EPERM;
+            error_setg_errno(errp, EPERM,
+                             "VHDX image file '%s' opened read-only, but "
+                             "contains a log that needs to be replayed.  To "
+                             "replay the log, execute:\n qemu-img check -r "
+                             "all '%s'",
+                             bs->filename, bs->filename);
+            goto exit;
+        }
         /* now flush the log */
         ret = vhdx_log_flush(bs, s, &logs);
         if (ret < 0) {
diff --git a/block/vhdx.c b/block/vhdx.c
index 67bbe103a1..1995778945 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -878,7 +878,6 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     int ret = 0;
     uint32_t i;
     uint64_t signature;
-    bool log_flushed = false;
 
 
     s->bat = NULL;
@@ -907,7 +906,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    ret = vhdx_parse_log(bs, s, &log_flushed);
+    ret = vhdx_parse_log(bs, s, &s->log_replayed_on_open, errp);
     if (ret < 0) {
         goto fail;
     }
@@ -1854,6 +1853,24 @@ exit:
     return ret;
 }
 
+/* If opened r/w, the VHDX driver will automatically replay the log,
+ * if one is present, inside the vhdx_open() call.
+ *
+ * If qemu-img check -r all is called, the image is automatically opened
+ * r/w and any log has already been replayed, so there is nothing (currently)
+ * for us to do here
+ */
+static int vhdx_check(BlockDriverState *bs, BdrvCheckResult *result,
+                       BdrvCheckMode fix)
+{
+    BDRVVHDXState *s = bs->opaque;
+
+    if (s->log_replayed_on_open) {
+        result->corruptions_fixed++;
+    }
+    return 0;
+}
+
 static QEMUOptionParameter vhdx_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -1898,6 +1915,7 @@ static BlockDriver bdrv_vhdx = {
     .bdrv_co_writev         = vhdx_co_writev,
     .bdrv_create            = vhdx_create,
     .bdrv_get_info          = vhdx_get_info,
+    .bdrv_check             = vhdx_check,
 
     .create_options         = vhdx_create_options,
 };
diff --git a/block/vhdx.h b/block/vhdx.h
index 51183b243c..2acd7c2d19 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -394,6 +394,8 @@ typedef struct BDRVVHDXState {
 
     Error *migration_blocker;
 
+    bool log_replayed_on_open;
+
     QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions;
 } BDRVVHDXState;
 
@@ -408,7 +410,8 @@ uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
 
 bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
 
-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed);
+int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
+                   Error **errp);
 
 int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
                              void *data, uint32_t length, uint64_t offset);
diff --git a/block/vmdk.c b/block/vmdk.c
index 0734bc200c..c6b60b4a91 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -749,9 +749,14 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
                 return -EINVAL;
             }
         } else if (!strcmp(type, "VMFS")) {
-            flat_offset = 0;
+            if (ret == 4) {
+                flat_offset = 0;
+            } else {
+                error_setg(errp, "Invalid extent lines:\n%s", p);
+                return -EINVAL;
+            }
         } else if (ret != 4) {
-            error_setg(errp, "Invalid extent lines: \n%s", p);
+            error_setg(errp, "Invalid extent lines:\n%s", p);
             return -EINVAL;
         }
 
@@ -1447,23 +1452,33 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
 }
 
 static int vmdk_create_extent(const char *filename, int64_t filesize,
-                              bool flat, bool compress, bool zeroed_grain)
+                              bool flat, bool compress, bool zeroed_grain,
+                              Error **errp)
 {
     int ret, i;
-    int fd = 0;
+    BlockDriverState *bs = NULL;
     VMDK4Header header;
-    uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
+    Error *local_err;
+    uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
+    uint32_t *gd_buf = NULL;
+    int gd_buf_size;
+
+    ret = bdrv_create_file(filename, NULL, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto exit;
+    }
 
-    fd = qemu_open(filename,
-                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-                   0644);
-    if (fd < 0) {
-        return -errno;
+    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto exit;
     }
+
     if (flat) {
-        ret = ftruncate(fd, filesize);
+        ret = bdrv_truncate(bs, filesize);
         if (ret < 0) {
-            ret = -errno;
+            error_setg(errp, "Could not truncate file");
         }
         goto exit;
     }
@@ -1474,24 +1489,23 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
                    | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
                    | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
     header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
-    header.capacity = filesize / 512;
+    header.capacity = filesize / BDRV_SECTOR_SIZE;
     header.granularity = 128;
-    header.num_gtes_per_gt = 512;
+    header.num_gtes_per_gt = BDRV_SECTOR_SIZE;
 
-    grains = (filesize / 512 + header.granularity - 1) / header.granularity;
-    gt_size = ((header.num_gtes_per_gt * sizeof(uint32_t)) + 511) >> 9;
-    gt_count =
-        (grains + header.num_gtes_per_gt - 1) / header.num_gtes_per_gt;
-    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
+    grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity);
+    gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t),
+                           BDRV_SECTOR_SIZE);
+    gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt);
+    gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE);
 
     header.desc_offset = 1;
     header.desc_size = 20;
     header.rgd_offset = header.desc_offset + header.desc_size;
-    header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
+    header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count);
     header.grain_offset =
-       ((header.gd_offset + gd_size + (gt_size * gt_count) +
-         header.granularity - 1) / header.granularity) *
-        header.granularity;
+        ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count),
+                 header.granularity);
     /* swap endianness for all header fields */
     header.version = cpu_to_le32(header.version);
     header.flags = cpu_to_le32(header.flags);
@@ -1511,48 +1525,55 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
     header.check_bytes[3] = 0xa;
 
     /* write all the data */
-    ret = qemu_write_full(fd, &magic, sizeof(magic));
-    if (ret != sizeof(magic)) {
-        ret = -errno;
+    ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
+    if (ret < 0) {
+        error_set(errp, QERR_IO_ERROR);
         goto exit;
     }
-    ret = qemu_write_full(fd, &header, sizeof(header));
-    if (ret != sizeof(header)) {
-        ret = -errno;
+    ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
+    if (ret < 0) {
+        error_set(errp, QERR_IO_ERROR);
         goto exit;
     }
 
-    ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
+    ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
     if (ret < 0) {
-        ret = -errno;
+        error_setg(errp, "Could not truncate file");
         goto exit;
     }
 
     /* write grain directory */
-    lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
-    for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
+    gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE;
+    gd_buf = g_malloc0(gd_buf_size);
+    for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors;
          i < gt_count; i++, tmp += gt_size) {
-        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
-        if (ret != sizeof(tmp)) {
-            ret = -errno;
-            goto exit;
-        }
+        gd_buf[i] = cpu_to_le32(tmp);
+    }
+    ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
+                      gd_buf, gd_buf_size);
+    if (ret < 0) {
+        error_set(errp, QERR_IO_ERROR);
+        goto exit;
     }
 
     /* write backup grain directory */
-    lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
-    for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
+    for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_sectors;
          i < gt_count; i++, tmp += gt_size) {
-        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
-        if (ret != sizeof(tmp)) {
-            ret = -errno;
-            goto exit;
-        }
+        gd_buf[i] = cpu_to_le32(tmp);
+    }
+    ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
+                      gd_buf, gd_buf_size);
+    if (ret < 0) {
+        error_set(errp, QERR_IO_ERROR);
+        goto exit;
     }
 
     ret = 0;
- exit:
-    qemu_close(fd);
+exit:
+    if (bs) {
+        bdrv_unref(bs);
+    }
+    g_free(gd_buf);
     return ret;
 }
 
@@ -1599,7 +1620,9 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
 static int vmdk_create(const char *filename, QEMUOptionParameter *options,
                        Error **errp)
 {
-    int fd, idx = 0;
+    int idx = 0;
+    BlockDriverState *new_bs = NULL;
+    Error *local_err;
     char *desc = NULL;
     int64_t total_size = 0, filesize;
     const char *adapter_type = NULL;
@@ -1616,6 +1639,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
     uint32_t parent_cid = 0xffffffff;
     uint32_t number_heads = 16;
     bool zeroed_grain = false;
+    uint32_t desc_offset = 0, desc_len;
     const char desc_template[] =
         "# Disk DescriptorFile\n"
         "version=1\n"
@@ -1749,7 +1773,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
                 path, desc_filename);
 
         if (vmdk_create_extent(ext_filename, size,
-                               flat, compress, zeroed_grain)) {
+                               flat, compress, zeroed_grain, errp)) {
             ret = -EINVAL;
             goto exit;
         }
@@ -1757,7 +1781,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
 
         /* Format description line */
         snprintf(desc_line, sizeof(desc_line),
-                    desc_extent_line, size / 512, desc_filename);
+                    desc_extent_line, size / BDRV_SECTOR_SIZE, desc_filename);
         g_string_append(ext_desc_lines, desc_line);
     }
     /* generate descriptor file */
@@ -1768,36 +1792,43 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
                            parent_desc_line,
                            ext_desc_lines->str,
                            (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
-                           total_size / (int64_t)(63 * number_heads * 512),
+                           total_size /
+                               (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
                            number_heads,
                            adapter_type);
-    if (split || flat) {
-        fd = qemu_open(filename,
-                       O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-                       0644);
+    desc_len = strlen(desc);
+    /* the descriptor offset = 0x200 */
+    if (!split && !flat) {
+        desc_offset = 0x200;
     } else {
-        fd = qemu_open(filename,
-                       O_WRONLY | O_BINARY | O_LARGEFILE,
-                       0644);
+        ret = bdrv_create_file(filename, options, &local_err);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Could not create image file");
+            goto exit;
+        }
     }
-    if (fd < 0) {
-        ret = -errno;
+    ret = bdrv_file_open(&new_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not write description");
         goto exit;
     }
-    /* the descriptor offset = 0x200 */
-    if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) {
-        ret = -errno;
-        goto close_exit;
+    ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not write description");
+        goto exit;
     }
-    ret = qemu_write_full(fd, desc, strlen(desc));
-    if (ret != strlen(desc)) {
-        ret = -errno;
-        goto close_exit;
+    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
+     * for description file */
+    if (desc_offset == 0) {
+        ret = bdrv_truncate(new_bs, desc_len);
+        if (ret < 0) {
+            error_setg(errp, "Could not truncate file");
+        }
     }
-    ret = 0;
-close_exit:
-    qemu_close(fd);
 exit:
+    if (new_bs) {
+        bdrv_unref(new_bs);
+    }
     g_free(desc);
     g_string_free(ext_desc_lines, true);
     return ret;
diff --git a/blockdev.c b/blockdev.c
index 1cb6f4cb04..e457494342 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1822,8 +1822,13 @@ void qmp_block_commit(const char *device,
         return;
     }
 
-    commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
-                &local_err);
+    if (top_bs == bs) {
+        commit_active_start(bs, base_bs, speed, on_error, block_job_cb,
+                            bs, &local_err);
+    } else {
+        commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
+                    &local_err);
+    }
     if (local_err != NULL) {
         error_propagate(errp, local_err);
         return;
diff --git a/configure b/configure
index 07b6be34ff..3782a6a26a 100755
--- a/configure
+++ b/configure
@@ -1004,6 +1004,25 @@ for opt do
   esac
 done
 
+if ! has $python; then
+  error_exit "Python not found. Use --python=/path/to/python"
+fi
+
+# Note that if the Python conditional here evaluates True we will exit
+# with status 1 which is a shell 'false' value.
+if ! $python -c 'import sys; sys.exit(sys.version_info < (2,4) or sys.version_info >= (3,))'; then
+  error_exit "Cannot use '$python', Python 2.4 or later is required." \
+      "Note that Python 3 or later is not yet supported." \
+      "Use --python=/path/to/python to specify a supported Python."
+fi
+
+# The -B switch was added in Python 2.6.
+# If it is supplied, compiled files are not written.
+# Use it for Python versions which support it.
+if $python -B -c 'import sys; sys.exit(0)' 2>/dev/null; then
+  python="$python -B"
+fi
+
 case "$cpu" in
     ppc)
            CPU_CFLAGS="-m32"
@@ -1074,169 +1093,169 @@ cat << EOF
 Usage: configure [options]
 Options: [defaults in brackets after descriptions]
 
+Standard options:
+  --help                   print this message
+  --prefix=PREFIX          install in PREFIX [$prefix]
+  --interp-prefix=PREFIX   where to find shared libraries, etc.
+                           use %M for cpu name [$interp_prefix]
+  --target-list=LIST       set target list (default: build everything)
+$(echo Available targets: $default_target_list | \
+  fold -s -w 53 | sed -e 's/^/                           /')
+
+Advanced options (experts only):
+  --source-path=PATH       path of source code [$source_path]
+  --cross-prefix=PREFIX    use PREFIX for compile tools [$cross_prefix]
+  --cc=CC                  use C compiler CC [$cc]
+  --iasl=IASL              use ACPI compiler IASL [$iasl]
+  --host-cc=CC             use C compiler CC [$host_cc] for code run at
+                           build time
+  --cxx=CXX                use C++ compiler CXX [$cxx]
+  --objcc=OBJCC            use Objective-C compiler OBJCC [$objcc]
+  --extra-cflags=CFLAGS    append extra C compiler flags QEMU_CFLAGS
+  --extra-ldflags=LDFLAGS  append extra linker flags LDFLAGS
+  --make=MAKE              use specified make [$make]
+  --install=INSTALL        use specified install [$install]
+  --python=PYTHON          use specified python [$python]
+  --smbd=SMBD              use specified smbd [$smbd]
+  --static                 enable static build [$static]
+  --mandir=PATH            install man pages in PATH
+  --datadir=PATH           install firmware in PATH$confsuffix
+  --docdir=PATH            install documentation in PATH$confsuffix
+  --bindir=PATH            install binaries in PATH
+  --libdir=PATH            install libraries in PATH
+  --sysconfdir=PATH        install config in PATH$confsuffix
+  --localstatedir=PATH     install local state in PATH (set at runtime on win32)
+  --with-confsuffix=SUFFIX suffix for QEMU data inside datadir and sysconfdir [$confsuffix]
+  --enable-debug-tcg       enable TCG debugging
+  --disable-debug-tcg      disable TCG debugging (default)
+  --enable-debug-info       enable debugging information (default)
+  --disable-debug-info      disable debugging information
+  --enable-debug           enable common debug build options
+  --enable-sparse          enable sparse checker
+  --disable-sparse         disable sparse checker (default)
+  --disable-strip          disable stripping binaries
+  --disable-werror         disable compilation abort on warning
+  --disable-sdl            disable SDL
+  --enable-sdl             enable SDL
+  --disable-gtk            disable gtk UI
+  --enable-gtk             enable gtk UI
+  --disable-virtfs         disable VirtFS
+  --enable-virtfs          enable VirtFS
+  --disable-vnc            disable VNC
+  --enable-vnc             enable VNC
+  --disable-cocoa          disable Cocoa (Mac OS X only)
+  --enable-cocoa           enable Cocoa (default on Mac OS X)
+  --audio-drv-list=LIST    set audio drivers list:
+                           Available drivers: $audio_possible_drivers
+  --block-drv-whitelist=L  Same as --block-drv-rw-whitelist=L
+  --block-drv-rw-whitelist=L
+                           set block driver read-write whitelist
+                           (affects only QEMU, not qemu-img)
+  --block-drv-ro-whitelist=L
+                           set block driver read-only whitelist
+                           (affects only QEMU, not qemu-img)
+  --disable-xen            disable xen backend driver support
+  --enable-xen             enable xen backend driver support
+  --disable-xen-pci-passthrough
+  --enable-xen-pci-passthrough
+  --disable-brlapi         disable BrlAPI
+  --enable-brlapi          enable BrlAPI
+  --disable-vnc-tls        disable TLS encryption for VNC server
+  --enable-vnc-tls         enable TLS encryption for VNC server
+  --disable-vnc-sasl       disable SASL encryption for VNC server
+  --enable-vnc-sasl        enable SASL encryption for VNC server
+  --disable-vnc-jpeg       disable JPEG lossy compression for VNC server
+  --enable-vnc-jpeg        enable JPEG lossy compression for VNC server
+  --disable-vnc-png        disable PNG compression for VNC server (default)
+  --enable-vnc-png         enable PNG compression for VNC server
+  --disable-vnc-ws         disable Websockets support for VNC server
+  --enable-vnc-ws          enable Websockets support for VNC server
+  --disable-curses         disable curses output
+  --enable-curses          enable curses output
+  --disable-curl           disable curl connectivity
+  --enable-curl            enable curl connectivity
+  --disable-fdt            disable fdt device tree
+  --enable-fdt             enable fdt device tree
+  --disable-bluez          disable bluez stack connectivity
+  --enable-bluez           enable bluez stack connectivity
+  --disable-slirp          disable SLIRP userspace network connectivity
+  --disable-kvm            disable KVM acceleration support
+  --enable-kvm             enable KVM acceleration support
+  --disable-rdma           disable RDMA-based migration support
+  --enable-rdma            enable RDMA-based migration support
+  --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)
+  --enable-system          enable all system emulation targets
+  --disable-system         disable all system emulation targets
+  --enable-user            enable supported user emulation targets
+  --disable-user           disable all user emulation targets
+  --enable-linux-user      enable all linux usermode emulation targets
+  --disable-linux-user     disable all linux usermode emulation targets
+  --enable-bsd-user        enable all BSD usermode emulation targets
+  --disable-bsd-user       disable all BSD usermode emulation targets
+  --enable-guest-base      enable GUEST_BASE support for usermode
+                           emulation targets
+  --disable-guest-base     disable GUEST_BASE support
+  --enable-pie             build Position Independent Executables
+  --disable-pie            do not build Position Independent Executables
+  --fmod-lib               path to FMOD library
+  --fmod-inc               path to FMOD includes
+  --oss-lib                path to OSS library
+  --enable-uname-release=R Return R for uname -r in usermode emulation
+  --cpu=CPU                Build for host CPU [$cpu]
+  --disable-uuid           disable uuid support
+  --enable-uuid            enable uuid support
+  --disable-vde            disable support for vde network
+  --enable-vde             enable support for vde network
+  --disable-netmap         disable support for netmap network
+  --enable-netmap          enable support for netmap network
+  --disable-linux-aio      disable Linux AIO support
+  --enable-linux-aio       enable Linux AIO support
+  --disable-cap-ng         disable libcap-ng support
+  --enable-cap-ng          enable libcap-ng support
+  --disable-attr           disables attr and xattr support
+  --enable-attr            enable attr and xattr support
+  --disable-blobs          disable installing provided firmware blobs
+  --enable-docs            enable documentation build
+  --disable-docs           disable documentation build
+  --disable-vhost-net      disable vhost-net acceleration support
+  --enable-vhost-net       enable vhost-net acceleration support
+  --enable-trace-backend=B Set trace backend
+                           Available backends: $($python $source_path/scripts/tracetool.py --list-backends)
+  --with-trace-file=NAME   Full PATH,NAME of file to store traces
+                           Default:trace-<pid>
+  --disable-spice          disable spice
+  --enable-spice           enable spice
+  --enable-rbd             enable building the rados block device (rbd)
+  --disable-libiscsi       disable iscsi support
+  --enable-libiscsi        enable iscsi support
+  --disable-smartcard-nss  disable smartcard nss support
+  --enable-smartcard-nss   enable smartcard nss support
+  --disable-libusb         disable libusb (for usb passthrough)
+  --enable-libusb          enable libusb (for usb passthrough)
+  --disable-usb-redir      disable usb network redirection support
+  --enable-usb-redir       enable usb network redirection support
+  --disable-guest-agent    disable building of the QEMU Guest Agent
+  --enable-guest-agent     enable building of the QEMU Guest Agent
+  --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent
+  --with-win-sdk=SDK-path  path to Windows Platform SDK (to build VSS .tlb)
+  --disable-seccomp        disable seccomp support
+  --enable-seccomp         enables seccomp support
+  --with-coroutine=BACKEND coroutine backend. Supported options:
+                           gthread, ucontext, sigaltstack, windows
+  --disable-coroutine-pool disable coroutine freelist (worse performance)
+  --enable-coroutine-pool  enable coroutine freelist (better performance)
+  --enable-glusterfs       enable GlusterFS backend
+  --disable-glusterfs      disable GlusterFS backend
+  --enable-gcov            enable test coverage analysis with gcov
+  --gcov=GCOV              use specified gcov [$gcov_tool]
+  --enable-tpm             enable TPM support
+  --disable-libssh2        disable ssh block device support
+  --enable-libssh2         enable ssh block device support
+  --disable-vhdx           disables support for the Microsoft VHDX image format
+  --enable-vhdx            enable support for the Microsoft VHDX image format
+
+NOTE: The object files are built at the place where configure is launched
 EOF
-echo "Standard options:"
-echo "  --help                   print this message"
-echo "  --prefix=PREFIX          install in PREFIX [$prefix]"
-echo "  --interp-prefix=PREFIX   where to find shared libraries, etc."
-echo "                           use %M for cpu name [$interp_prefix]"
-echo "  --target-list=LIST       set target list (default: build everything)"
-echo "Available targets: $default_target_list" | \
-    fold -s -w 53 | sed -e 's/^/                           /'
-echo ""
-echo "Advanced options (experts only):"
-echo "  --source-path=PATH       path of source code [$source_path]"
-echo "  --cross-prefix=PREFIX    use PREFIX for compile tools [$cross_prefix]"
-echo "  --cc=CC                  use C compiler CC [$cc]"
-echo "  --iasl=IASL              use ACPI compiler IASL [$iasl]"
-echo "  --host-cc=CC             use C compiler CC [$host_cc] for code run at"
-echo "                           build time"
-echo "  --cxx=CXX                use C++ compiler CXX [$cxx]"
-echo "  --objcc=OBJCC            use Objective-C compiler OBJCC [$objcc]"
-echo "  --extra-cflags=CFLAGS    append extra C compiler flags QEMU_CFLAGS"
-echo "  --extra-ldflags=LDFLAGS  append extra linker flags LDFLAGS"
-echo "  --make=MAKE              use specified make [$make]"
-echo "  --install=INSTALL        use specified install [$install]"
-echo "  --python=PYTHON          use specified python [$python]"
-echo "  --smbd=SMBD              use specified smbd [$smbd]"
-echo "  --static                 enable static build [$static]"
-echo "  --mandir=PATH            install man pages in PATH"
-echo "  --datadir=PATH           install firmware in PATH$confsuffix"
-echo "  --docdir=PATH            install documentation in PATH$confsuffix"
-echo "  --bindir=PATH            install binaries in PATH"
-echo "  --libdir=PATH            install libraries in PATH"
-echo "  --sysconfdir=PATH        install config in PATH$confsuffix"
-echo "  --localstatedir=PATH     install local state in PATH (set at runtime on win32)"
-echo "  --with-confsuffix=SUFFIX suffix for QEMU data inside datadir and sysconfdir [$confsuffix]"
-echo "  --enable-debug-tcg       enable TCG debugging"
-echo "  --disable-debug-tcg      disable TCG debugging (default)"
-echo "  --enable-debug-info       enable debugging information (default)"
-echo "  --disable-debug-info      disable debugging information"
-echo "  --enable-debug           enable common debug build options"
-echo "  --enable-sparse          enable sparse checker"
-echo "  --disable-sparse         disable sparse checker (default)"
-echo "  --disable-strip          disable stripping binaries"
-echo "  --disable-werror         disable compilation abort on warning"
-echo "  --disable-sdl            disable SDL"
-echo "  --enable-sdl             enable SDL"
-echo "  --disable-gtk            disable gtk UI"
-echo "  --enable-gtk             enable gtk UI"
-echo "  --disable-virtfs         disable VirtFS"
-echo "  --enable-virtfs          enable VirtFS"
-echo "  --disable-vnc            disable VNC"
-echo "  --enable-vnc             enable VNC"
-echo "  --disable-cocoa          disable Cocoa (Mac OS X only)"
-echo "  --enable-cocoa           enable Cocoa (default on Mac OS X)"
-echo "  --audio-drv-list=LIST    set audio drivers list:"
-echo "                           Available drivers: $audio_possible_drivers"
-echo "  --block-drv-whitelist=L  Same as --block-drv-rw-whitelist=L"
-echo "  --block-drv-rw-whitelist=L"
-echo "                           set block driver read-write whitelist"
-echo "                           (affects only QEMU, not qemu-img)"
-echo "  --block-drv-ro-whitelist=L"
-echo "                           set block driver read-only whitelist"
-echo "                           (affects only QEMU, not qemu-img)"
-echo "  --disable-xen            disable xen backend driver support"
-echo "  --enable-xen             enable xen backend driver support"
-echo "  --disable-xen-pci-passthrough"
-echo "  --enable-xen-pci-passthrough"
-echo "  --disable-brlapi         disable BrlAPI"
-echo "  --enable-brlapi          enable BrlAPI"
-echo "  --disable-vnc-tls        disable TLS encryption for VNC server"
-echo "  --enable-vnc-tls         enable TLS encryption for VNC server"
-echo "  --disable-vnc-sasl       disable SASL encryption for VNC server"
-echo "  --enable-vnc-sasl        enable SASL encryption for VNC server"
-echo "  --disable-vnc-jpeg       disable JPEG lossy compression for VNC server"
-echo "  --enable-vnc-jpeg        enable JPEG lossy compression for VNC server"
-echo "  --disable-vnc-png        disable PNG compression for VNC server (default)"
-echo "  --enable-vnc-png         enable PNG compression for VNC server"
-echo "  --disable-vnc-ws         disable Websockets support for VNC server"
-echo "  --enable-vnc-ws          enable Websockets support for VNC server"
-echo "  --disable-curses         disable curses output"
-echo "  --enable-curses          enable curses output"
-echo "  --disable-curl           disable curl connectivity"
-echo "  --enable-curl            enable curl connectivity"
-echo "  --disable-fdt            disable fdt device tree"
-echo "  --enable-fdt             enable fdt device tree"
-echo "  --disable-bluez          disable bluez stack connectivity"
-echo "  --enable-bluez           enable bluez stack connectivity"
-echo "  --disable-slirp          disable SLIRP userspace network connectivity"
-echo "  --disable-kvm            disable KVM acceleration support"
-echo "  --enable-kvm             enable KVM acceleration support"
-echo "  --disable-rdma           disable RDMA-based migration support"
-echo "  --enable-rdma            enable RDMA-based migration support"
-echo "  --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)"
-echo "  --enable-system          enable all system emulation targets"
-echo "  --disable-system         disable all system emulation targets"
-echo "  --enable-user            enable supported user emulation targets"
-echo "  --disable-user           disable all user emulation targets"
-echo "  --enable-linux-user      enable all linux usermode emulation targets"
-echo "  --disable-linux-user     disable all linux usermode emulation targets"
-echo "  --enable-bsd-user        enable all BSD usermode emulation targets"
-echo "  --disable-bsd-user       disable all BSD usermode emulation targets"
-echo "  --enable-guest-base      enable GUEST_BASE support for usermode"
-echo "                           emulation targets"
-echo "  --disable-guest-base     disable GUEST_BASE support"
-echo "  --enable-pie             build Position Independent Executables"
-echo "  --disable-pie            do not build Position Independent Executables"
-echo "  --fmod-lib               path to FMOD library"
-echo "  --fmod-inc               path to FMOD includes"
-echo "  --oss-lib                path to OSS library"
-echo "  --enable-uname-release=R Return R for uname -r in usermode emulation"
-echo "  --cpu=CPU                Build for host CPU [$cpu]"
-echo "  --disable-uuid           disable uuid support"
-echo "  --enable-uuid            enable uuid support"
-echo "  --disable-vde            disable support for vde network"
-echo "  --enable-vde             enable support for vde network"
-echo "  --disable-netmap         disable support for netmap network"
-echo "  --enable-netmap          enable support for netmap network"
-echo "  --disable-linux-aio      disable Linux AIO support"
-echo "  --enable-linux-aio       enable Linux AIO support"
-echo "  --disable-cap-ng         disable libcap-ng support"
-echo "  --enable-cap-ng          enable libcap-ng support"
-echo "  --disable-attr           disables attr and xattr support"
-echo "  --enable-attr            enable attr and xattr support"
-echo "  --disable-blobs          disable installing provided firmware blobs"
-echo "  --enable-docs            enable documentation build"
-echo "  --disable-docs           disable documentation build"
-echo "  --disable-vhost-net      disable vhost-net acceleration support"
-echo "  --enable-vhost-net       enable vhost-net acceleration support"
-echo "  --enable-trace-backend=B Set trace backend"
-echo "                           Available backends:" $($python "$source_path"/scripts/tracetool.py --list-backends)
-echo "  --with-trace-file=NAME   Full PATH,NAME of file to store traces"
-echo "                           Default:trace-<pid>"
-echo "  --disable-spice          disable spice"
-echo "  --enable-spice           enable spice"
-echo "  --enable-rbd             enable building the rados block device (rbd)"
-echo "  --disable-libiscsi       disable iscsi support"
-echo "  --enable-libiscsi        enable iscsi support"
-echo "  --disable-smartcard-nss  disable smartcard nss support"
-echo "  --enable-smartcard-nss   enable smartcard nss support"
-echo "  --disable-libusb         disable libusb (for usb passthrough)"
-echo "  --enable-libusb          enable libusb (for usb passthrough)"
-echo "  --disable-usb-redir      disable usb network redirection support"
-echo "  --enable-usb-redir       enable usb network redirection support"
-echo "  --disable-guest-agent    disable building of the QEMU Guest Agent"
-echo "  --enable-guest-agent     enable building of the QEMU Guest Agent"
-echo "  --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent"
-echo "  --with-win-sdk=SDK-path  path to Windows Platform SDK (to build VSS .tlb)"
-echo "  --disable-seccomp        disable seccomp support"
-echo "  --enable-seccomp         enables seccomp support"
-echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
-echo "                           gthread, ucontext, sigaltstack, windows"
-echo "  --disable-coroutine-pool disable coroutine freelist (worse performance)"
-echo "  --enable-coroutine-pool  enable coroutine freelist (better performance)"
-echo "  --enable-glusterfs       enable GlusterFS backend"
-echo "  --disable-glusterfs      disable GlusterFS backend"
-echo "  --enable-gcov            enable test coverage analysis with gcov"
-echo "  --gcov=GCOV              use specified gcov [$gcov_tool]"
-echo "  --enable-tpm             enable TPM support"
-echo "  --disable-libssh2        disable ssh block device support"
-echo "  --enable-libssh2         enable ssh block device support"
-echo "  --disable-vhdx           disables support for the Microsoft VHDX image format"
-echo "  --enable-vhdx            enable support for the Microsoft VHDX image format"
-echo ""
-echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
 fi
 
@@ -1419,25 +1438,6 @@ if test "$solaris" = "yes" ; then
   fi
 fi
 
-if ! has $python; then
-  error_exit "Python not found. Use --python=/path/to/python"
-fi
-
-# Note that if the Python conditional here evaluates True we will exit
-# with status 1 which is a shell 'false' value.
-if ! $python -c 'import sys; sys.exit(sys.version_info < (2,4) or sys.version_info >= (3,))'; then
-  error_exit "Cannot use '$python', Python 2.4 or later is required." \
-      "Note that Python 3 or later is not yet supported." \
-      "Use --python=/path/to/python to specify a supported Python."
-fi
-
-# The -B switch was added in Python 2.6.
-# If it is supplied, compiled files are not written.
-# Use it for Python versions which support it.
-if $python -B -c 'import sys; sys.exit(0)' 2>/dev/null; then
-  python="$python -B"
-fi
-
 if test -z "${target_list+xxx}" ; then
     target_list="$default_target_list"
 else
@@ -3078,6 +3078,21 @@ EOF
   fi
 fi
 
+# We also need to know the API version because there was an
+# API change from 1.4.0 to 1.5.0.
+if test "$libiscsi" = "yes"; then
+  cat >$TMPC <<EOF
+#include <iscsi/iscsi.h>
+int main(void)
+{
+  iscsi_read10_task(0, 0, 0, 0, 0, 0, 0);
+  return 0;
+}
+EOF
+  if compile_prog "" "-liscsi"; then
+    libiscsi_version="1.4.0"
+  fi
+fi
 
 ##########################################
 # Do we need libm
@@ -3805,7 +3820,11 @@ echo "nss used          $smartcard_nss"
 echo "libusb            $libusb"
 echo "usb net redir     $usb_redir"
 echo "GLX support       $glx"
+if test "$libiscsi_version" = "1.4.0"; then
+echo "libiscsi support  $libiscsi (1.4.0)"
+else
 echo "libiscsi support  $libiscsi"
+fi
 echo "build guest agent $guest_agent"
 echo "QGA VSS support   $guest_agent_with_vss"
 echo "seccomp support   $seccomp"
@@ -4137,6 +4156,9 @@ fi
 
 if test "$libiscsi" = "yes" ; then
   echo "CONFIG_LIBISCSI=y" >> $config_host_mak
+  if test "$libiscsi_version" = "1.4.0"; then
+    echo "CONFIG_LIBISCSI_1_4=y" >> $config_host_mak
+  fi
 fi
 
 if test "$seccomp" = "yes"; then
diff --git a/cpu-exec.c b/cpu-exec.c
index 30cfa2a63a..a6c01f4193 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -53,7 +53,25 @@ void cpu_resume_from_signal(CPUArchState *env, void *puc)
 static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
 {
     CPUArchState *env = cpu->env_ptr;
-    uintptr_t next_tb = tcg_qemu_tb_exec(env, tb_ptr);
+    uintptr_t next_tb;
+
+#if defined(DEBUG_DISAS)
+    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
+#if defined(TARGET_I386)
+        log_cpu_state(cpu, CPU_DUMP_CCOP);
+#elif defined(TARGET_M68K)
+        /* ??? Should not modify env state for dumping.  */
+        cpu_m68k_flush_flags(env, env->cc_op);
+        env->cc_op = CC_OP_FLAGS;
+        env->sr = (env->sr & 0xffe0) | env->cc_dest | (env->cc_x << 4);
+        log_cpu_state(cpu, 0);
+#else
+        log_cpu_state(cpu, 0);
+#endif
+    }
+#endif /* DEBUG_DISAS */
+
+    next_tb = tcg_qemu_tb_exec(env, tb_ptr);
     if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
         /* We didn't start executing this TB (eg because the instruction
          * counter hit zero); we must restore the guest PC to the address
@@ -206,6 +224,9 @@ int cpu_exec(CPUArchState *env)
       (defined(TARGET_M68K) || defined(TARGET_PPC) || defined(TARGET_S390X)))
     CPUClass *cc = CPU_GET_CLASS(cpu);
 #endif
+#ifdef TARGET_I386
+    X86CPU *x86_cpu = X86_CPU(cpu);
+#endif
     int ret, interrupt_request;
     TranslationBlock *tb;
     uint8_t *tc_ptr;
@@ -320,24 +341,24 @@ int cpu_exec(CPUArchState *env)
 #if !defined(CONFIG_USER_ONLY)
                     if (interrupt_request & CPU_INTERRUPT_POLL) {
                         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
-                        apic_poll_irq(env->apic_state);
+                        apic_poll_irq(x86_cpu->apic_state);
                     }
 #endif
                     if (interrupt_request & CPU_INTERRUPT_INIT) {
                             cpu_svm_check_intercept_param(env, SVM_EXIT_INIT,
                                                           0);
-                            do_cpu_init(x86_env_get_cpu(env));
+                            do_cpu_init(x86_cpu);
                             env->exception_index = EXCP_HALTED;
                             cpu_loop_exit(env);
                     } else if (interrupt_request & CPU_INTERRUPT_SIPI) {
-                            do_cpu_sipi(x86_env_get_cpu(env));
+                            do_cpu_sipi(x86_cpu);
                     } else if (env->hflags2 & HF2_GIF_MASK) {
                         if ((interrupt_request & CPU_INTERRUPT_SMI) &&
                             !(env->hflags & HF_SMM_MASK)) {
                             cpu_svm_check_intercept_param(env, SVM_EXIT_SMI,
                                                           0);
                             cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
-                            do_smm_enter(x86_env_get_cpu(env));
+                            do_smm_enter(x86_cpu);
                             next_tb = 0;
                         } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
                                    !(env->hflags2 & HF2_NMI_MASK)) {
@@ -579,22 +600,6 @@ int cpu_exec(CPUArchState *env)
                     env->exception_index = EXCP_INTERRUPT;
                     cpu_loop_exit(env);
                 }
-#if defined(DEBUG_DISAS)
-                if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
-                    /* restore flags in standard format */
-#if defined(TARGET_I386)
-                    log_cpu_state(cpu, CPU_DUMP_CCOP);
-#elif defined(TARGET_M68K)
-                    cpu_m68k_flush_flags(env, env->cc_op);
-                    env->cc_op = CC_OP_FLAGS;
-                    env->sr = (env->sr & 0xffe0)
-                              | env->cc_dest | (env->cc_x << 4);
-                    log_cpu_state(cpu, 0);
-#else
-                    log_cpu_state(cpu, 0);
-#endif
-                }
-#endif /* DEBUG_DISAS */
                 spin_lock(&tcg_ctx.tb_ctx.tb_lock);
                 tb = tb_find_fast(env);
                 /* Note: we do it here to avoid a gcc bug on Mac OS X when
@@ -685,6 +690,9 @@ int cpu_exec(CPUArchState *env)
       (defined(TARGET_M68K) || defined(TARGET_PPC) || defined(TARGET_S390X)))
             cc = CPU_GET_CLASS(cpu);
 #endif
+#ifdef TARGET_I386
+            x86_cpu = X86_CPU(cpu);
+#endif
         }
     } /* for(;;) */
 
diff --git a/cpus.c b/cpus.c
index 01d128d7af..ca4c59fe0b 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1458,12 +1458,11 @@ void qmp_inject_nmi(Error **errp)
 
     CPU_FOREACH(cs) {
         X86CPU *cpu = X86_CPU(cs);
-        CPUX86State *env = &cpu->env;
 
-        if (!env->apic_state) {
+        if (!cpu->apic_state) {
             cpu_interrupt(cs, CPU_INTERRUPT_NMI);
         } else {
-            apic_deliver_nmi(env->apic_state);
+            apic_deliver_nmi(cpu->apic_state);
         }
     }
 #elif defined(TARGET_S390X)
diff --git a/cputlb.c b/cputlb.c
index fff0afbd4a..b533f3f372 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -26,6 +26,7 @@
 #include "exec/cputlb.h"
 
 #include "exec/memory-internal.h"
+#include "exec/ram_addr.h"
 
 //#define DEBUG_TLB
 //#define DEBUG_TLB_CHECK
@@ -33,13 +34,6 @@
 /* statistics */
 int tlb_flush_count;
 
-static const CPUTLBEntry s_cputlb_empty_entry = {
-    .addr_read  = -1,
-    .addr_write = -1,
-    .addr_code  = -1,
-    .addend     = -1,
-};
-
 /* NOTE:
  * If flush_global is true (the usual case), flush all tlb entries.
  * If flush_global is false, flush (at least) all tlb entries not
@@ -55,7 +49,6 @@ static const CPUTLBEntry s_cputlb_empty_entry = {
 void tlb_flush(CPUArchState *env, int flush_global)
 {
     CPUState *cpu = ENV_GET_CPU(env);
-    int i;
 
 #if defined(DEBUG_TLB)
     printf("tlb_flush:\n");
@@ -64,15 +57,8 @@ void tlb_flush(CPUArchState *env, int flush_global)
        links while we are modifying them */
     cpu->current_tb = NULL;
 
-    for (i = 0; i < CPU_TLB_SIZE; i++) {
-        int mmu_idx;
-
-        for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-            env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
-        }
-    }
-
-    memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
+    memset(env->tlb_table, -1, sizeof(env->tlb_table));
+    memset(env->tb_jmp_cache, 0, sizeof(env->tb_jmp_cache));
 
     env->tlb_flush_addr = -1;
     env->tlb_flush_mask = 0;
@@ -87,7 +73,7 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
                  (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
         addr == (tlb_entry->addr_code &
                  (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        *tlb_entry = s_cputlb_empty_entry;
+        memset(tlb_entry, -1, sizeof(*tlb_entry));
     }
 }
 
@@ -127,9 +113,8 @@ void tlb_flush_page(CPUArchState *env, target_ulong addr)
    can be detected */
 void tlb_protect_code(ram_addr_t ram_addr)
 {
-    cpu_physical_memory_reset_dirty(ram_addr,
-                                    ram_addr + TARGET_PAGE_SIZE,
-                                    CODE_DIRTY_FLAG);
+    cpu_physical_memory_reset_dirty(ram_addr, TARGET_PAGE_SIZE,
+                                    DIRTY_MEMORY_CODE);
 }
 
 /* update the TLB so that writes in physical page 'phys_addr' are no longer
@@ -137,7 +122,7 @@ void tlb_protect_code(ram_addr_t ram_addr)
 void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
                              target_ulong vaddr)
 {
-    cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
 }
 
 static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
@@ -299,7 +284,8 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
             /* Write access calls the I/O callback.  */
             te->addr_write = address | TLB_MMIO;
         } else if (memory_region_is_ram(section->mr)
-                   && !cpu_physical_memory_is_dirty(section->mr->ram_addr + xlat)) {
+                   && cpu_physical_memory_is_clean(section->mr->ram_addr
+                                                   + xlat)) {
             te->addr_write = address | TLB_NOTDIRTY;
         } else {
             te->addr_write = address;
diff --git a/default-configs/aarch64-linux-user.mak b/default-configs/aarch64-linux-user.mak
new file mode 100644
index 0000000000..3df7de5b8f
--- /dev/null
+++ b/default-configs/aarch64-linux-user.mak
@@ -0,0 +1,3 @@
+# Default configuration for aarch64-linux-user
+
+CONFIG_GDBSTUB_XML=y
diff --git a/exec.c b/exec.c
index 7e49e8e555..b387d2856d 100644
--- a/exec.c
+++ b/exec.c
@@ -50,6 +50,7 @@
 #include "translate-all.h"
 
 #include "exec/memory-internal.h"
+#include "exec/ram_addr.h"
 #include "qemu/cache-utils.h"
 
 #include "qemu/range.h"
@@ -57,7 +58,7 @@
 //#define DEBUG_SUBPAGE
 
 #if !defined(CONFIG_USER_ONLY)
-static int in_migration;
+static bool in_migration;
 
 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
 
@@ -724,11 +725,14 @@ found:
     return block;
 }
 
-static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
-                                      uintptr_t length)
+static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 {
-    RAMBlock *block;
     ram_addr_t start1;
+    RAMBlock *block;
+    ram_addr_t end;
+
+    end = TARGET_PAGE_ALIGN(start + length);
+    start &= TARGET_PAGE_MASK;
 
     block = qemu_get_ram_block(start);
     assert(block == qemu_get_ram_block(end - 1));
@@ -737,29 +741,21 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
 }
 
 /* Note: start and end must be within the same ram block.  */
-void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
-                                     int dirty_flags)
+void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
+                                     unsigned client)
 {
-    uintptr_t length;
-
-    start &= TARGET_PAGE_MASK;
-    end = TARGET_PAGE_ALIGN(end);
-
-    length = end - start;
     if (length == 0)
         return;
-    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
+    cpu_physical_memory_clear_dirty_range(start, length, client);
 
     if (tcg_enabled()) {
-        tlb_reset_dirty_range_all(start, end, length);
+        tlb_reset_dirty_range_all(start, length);
     }
 }
 
-static int cpu_physical_memory_set_dirty_tracking(int enable)
+static void cpu_physical_memory_set_dirty_tracking(bool enable)
 {
-    int ret = 0;
     in_migration = enable;
-    return ret;
 }
 
 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
@@ -1211,6 +1207,9 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr)
 {
     RAMBlock *block, *new_block;
+    ram_addr_t old_ram_size, new_ram_size;
+
+    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
 
     size = TARGET_PAGE_ALIGN(size);
     new_block = g_malloc0(sizeof(*new_block));
@@ -1271,11 +1270,17 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     ram_list.version++;
     qemu_mutex_unlock_ramlist();
 
-    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
-                                       last_ram_offset() >> TARGET_PAGE_BITS);
-    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
-           0, size >> TARGET_PAGE_BITS);
-    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
+    new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
+
+    if (new_ram_size > old_ram_size) {
+        int i;
+        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
+            ram_list.dirty_memory[i] =
+                bitmap_zero_extend(ram_list.dirty_memory[i],
+                                   old_ram_size, new_ram_size);
+       }
+    }
+    cpu_physical_memory_set_dirty_range(new_block->offset, size);
 
     qemu_ram_setup_dump(new_block->host, size);
     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
@@ -1485,11 +1490,8 @@ found:
 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
                                uint64_t val, unsigned size)
 {
-    int dirty_flags;
-    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
-    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
+    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
         tb_invalidate_phys_page_fast(ram_addr, size);
-        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
     }
     switch (size) {
     case 1:
@@ -1504,11 +1506,11 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
     default:
         abort();
     }
-    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
-    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
     /* we remove the notdirty callback only if the code has been
        flushed */
-    if (dirty_flags == 0xff) {
+    if (!cpu_physical_memory_is_clean(ram_addr)) {
         CPUArchState *env = current_cpu->env_ptr;
         tlb_set_dirty(env, env->mem_io_vaddr);
     }
@@ -1795,12 +1797,12 @@ static void tcg_commit(MemoryListener *listener)
 
 static void core_log_global_start(MemoryListener *listener)
 {
-    cpu_physical_memory_set_dirty_tracking(1);
+    cpu_physical_memory_set_dirty_tracking(true);
 }
 
 static void core_log_global_stop(MemoryListener *listener)
 {
-    cpu_physical_memory_set_dirty_tracking(0);
+    cpu_physical_memory_set_dirty_tracking(false);
 }
 
 static MemoryListener core_memory_listener = {
@@ -1911,11 +1913,12 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
 static void invalidate_and_set_dirty(hwaddr addr,
                                      hwaddr length)
 {
-    if (!cpu_physical_memory_is_dirty(addr)) {
+    if (cpu_physical_memory_is_clean(addr)) {
         /* invalidate code */
         tb_invalidate_phys_page_range(addr, addr + length, 0);
         /* set dirty bit */
-        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
+        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
+        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
     }
     xen_modified_memory(addr, length);
 }
@@ -2526,12 +2529,13 @@ void stl_phys_notdirty(hwaddr addr, uint32_t val)
         stl_p(ptr, val);
 
         if (unlikely(in_migration)) {
-            if (!cpu_physical_memory_is_dirty(addr1)) {
+            if (cpu_physical_memory_is_clean(addr1)) {
                 /* invalidate code */
                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
                 /* set dirty bit */
-                cpu_physical_memory_set_dirty_flags(
-                    addr1, (0xff & ~CODE_DIRTY_FLAG));
+                cpu_physical_memory_set_dirty_flag(addr1,
+                                                   DIRTY_MEMORY_MIGRATION);
+                cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
             }
         }
     }
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index dbda61bc8e..e0ea599769 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -42,6 +42,9 @@ these four paragraphs for those parts of this code that are retained.
 
 #include "fpu/softfloat.h"
 
+/* We only need stdlib for abort() */
+#include <stdlib.h>
+
 /*----------------------------------------------------------------------------
 | Primitive arithmetic functions, including multi-word arithmetic, and
 | division and square root approximations.  (Can be specialized to target if
@@ -59,21 +62,6 @@ these four paragraphs for those parts of this code that are retained.
 *----------------------------------------------------------------------------*/
 #include "softfloat-specialize.h"
 
-void set_float_rounding_mode(int val STATUS_PARAM)
-{
-    STATUS(float_rounding_mode) = val;
-}
-
-void set_float_exception_flags(int val STATUS_PARAM)
-{
-    STATUS(float_exception_flags) = val;
-}
-
-void set_floatx80_rounding_precision(int val STATUS_PARAM)
-{
-    STATUS(floatx80_rounding_precision) = val;
-}
-
 /*----------------------------------------------------------------------------
 | Returns the fraction bits of the half-precision floating-point value `a'.
 *----------------------------------------------------------------------------*/
@@ -121,20 +109,22 @@ static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
 
     roundingMode = STATUS(float_rounding_mode);
     roundNearestEven = ( roundingMode == float_round_nearest_even );
-    roundIncrement = 0x40;
-    if ( ! roundNearestEven ) {
-        if ( roundingMode == float_round_to_zero ) {
-            roundIncrement = 0;
-        }
-        else {
-            roundIncrement = 0x7F;
-            if ( zSign ) {
-                if ( roundingMode == float_round_up ) roundIncrement = 0;
-            }
-            else {
-                if ( roundingMode == float_round_down ) roundIncrement = 0;
-            }
-        }
+    switch (roundingMode) {
+    case float_round_nearest_even:
+    case float_round_ties_away:
+        roundIncrement = 0x40;
+        break;
+    case float_round_to_zero:
+        roundIncrement = 0;
+        break;
+    case float_round_up:
+        roundIncrement = zSign ? 0 : 0x7f;
+        break;
+    case float_round_down:
+        roundIncrement = zSign ? 0x7f : 0;
+        break;
+    default:
+        abort();
     }
     roundBits = absZ & 0x7F;
     absZ = ( absZ + roundIncrement )>>7;
@@ -170,19 +160,22 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU
 
     roundingMode = STATUS(float_rounding_mode);
     roundNearestEven = ( roundingMode == float_round_nearest_even );
-    increment = ( (int64_t) absZ1 < 0 );
-    if ( ! roundNearestEven ) {
-        if ( roundingMode == float_round_to_zero ) {
-            increment = 0;
-        }
-        else {
-            if ( zSign ) {
-                increment = ( roundingMode == float_round_down ) && absZ1;
-            }
-            else {
-                increment = ( roundingMode == float_round_up ) && absZ1;
-            }
-        }
+    switch (roundingMode) {
+    case float_round_nearest_even:
+    case float_round_ties_away:
+        increment = ((int64_t) absZ1 < 0);
+        break;
+    case float_round_to_zero:
+        increment = 0;
+        break;
+    case float_round_up:
+        increment = !zSign && absZ1;
+        break;
+    case float_round_down:
+        increment = zSign && absZ1;
+        break;
+    default:
+        abort();
     }
     if ( increment ) {
         ++absZ0;
@@ -204,6 +197,61 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU
 }
 
 /*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit unsigned integer corresponding to the
+| input.  Ordinarily, the fixed-point input is simply rounded to an integer,
+| with the inexact exception raised if the input cannot be represented exactly
+| as an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackUint64(flag zSign, uint64_t absZ0,
+                                uint64_t absZ1 STATUS_PARAM)
+{
+    int8 roundingMode;
+    flag roundNearestEven, increment;
+
+    roundingMode = STATUS(float_rounding_mode);
+    roundNearestEven = (roundingMode == float_round_nearest_even);
+    switch (roundingMode) {
+    case float_round_nearest_even:
+    case float_round_ties_away:
+        increment = ((int64_t)absZ1 < 0);
+        break;
+    case float_round_to_zero:
+        increment = 0;
+        break;
+    case float_round_up:
+        increment = !zSign && absZ1;
+        break;
+    case float_round_down:
+        increment = zSign && absZ1;
+        break;
+    default:
+        abort();
+    }
+    if (increment) {
+        ++absZ0;
+        if (absZ0 == 0) {
+            float_raise(float_flag_invalid STATUS_VAR);
+            return LIT64(0xFFFFFFFFFFFFFFFF);
+        }
+        absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
+    }
+
+    if (zSign && absZ0) {
+        float_raise(float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+
+    if (absZ1) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    return absZ0;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the fraction bits of the single-precision floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
@@ -319,20 +367,23 @@ static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig
 
     roundingMode = STATUS(float_rounding_mode);
     roundNearestEven = ( roundingMode == float_round_nearest_even );
-    roundIncrement = 0x40;
-    if ( ! roundNearestEven ) {
-        if ( roundingMode == float_round_to_zero ) {
-            roundIncrement = 0;
-        }
-        else {
-            roundIncrement = 0x7F;
-            if ( zSign ) {
-                if ( roundingMode == float_round_up ) roundIncrement = 0;
-            }
-            else {
-                if ( roundingMode == float_round_down ) roundIncrement = 0;
-            }
-        }
+    switch (roundingMode) {
+    case float_round_nearest_even:
+    case float_round_ties_away:
+        roundIncrement = 0x40;
+        break;
+    case float_round_to_zero:
+        roundIncrement = 0;
+        break;
+    case float_round_up:
+        roundIncrement = zSign ? 0 : 0x7f;
+        break;
+    case float_round_down:
+        roundIncrement = zSign ? 0x7f : 0;
+        break;
+    default:
+        abort();
+        break;
     }
     roundBits = zSig & 0x7F;
     if ( 0xFD <= (uint16_t) zExp ) {
@@ -501,20 +552,22 @@ static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig
 
     roundingMode = STATUS(float_rounding_mode);
     roundNearestEven = ( roundingMode == float_round_nearest_even );
-    roundIncrement = 0x200;
-    if ( ! roundNearestEven ) {
-        if ( roundingMode == float_round_to_zero ) {
-            roundIncrement = 0;
-        }
-        else {
-            roundIncrement = 0x3FF;
-            if ( zSign ) {
-                if ( roundingMode == float_round_up ) roundIncrement = 0;
-            }
-            else {
-                if ( roundingMode == float_round_down ) roundIncrement = 0;
-            }
-        }
+    switch (roundingMode) {
+    case float_round_nearest_even:
+    case float_round_ties_away:
+        roundIncrement = 0x200;
+        break;
+    case float_round_to_zero:
+        roundIncrement = 0;
+        break;
+    case float_round_up:
+        roundIncrement = zSign ? 0 : 0x3ff;
+        break;
+    case float_round_down:
+        roundIncrement = zSign ? 0x3ff : 0;
+        break;
+    default:
+        abort();
     }
     roundBits = zSig & 0x3FF;
     if ( 0x7FD <= (uint16_t) zExp ) {
@@ -684,19 +737,21 @@ static floatx80
         goto precision80;
     }
     zSig0 |= ( zSig1 != 0 );
-    if ( ! roundNearestEven ) {
-        if ( roundingMode == float_round_to_zero ) {
-            roundIncrement = 0;
-        }
-        else {
-            roundIncrement = roundMask;
-            if ( zSign ) {
-                if ( roundingMode == float_round_up ) roundIncrement = 0;
-            }
-            else {
-                if ( roundingMode == float_round_down ) roundIncrement = 0;
-            }
-        }
+    switch (roundingMode) {
+    case float_round_nearest_even:
+    case float_round_ties_away:
+        break;
+    case float_round_to_zero:
+        roundIncrement = 0;
+        break;
+    case float_round_up:
+        roundIncrement = zSign ? 0 : roundMask;
+        break;
+    case float_round_down:
+        roundIncrement = zSign ? roundMask : 0;
+        break;
+    default:
+        abort();
     }
     roundBits = zSig0 & roundMask;
     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
@@ -743,19 +798,22 @@ static floatx80
     if ( zSig0 == 0 ) zExp = 0;
     return packFloatx80( zSign, zExp, zSig0 );
  precision80:
-    increment = ( (int64_t) zSig1 < 0 );
-    if ( ! roundNearestEven ) {
-        if ( roundingMode == float_round_to_zero ) {
-            increment = 0;
-        }
-        else {
-            if ( zSign ) {
-                increment = ( roundingMode == float_round_down ) && zSig1;
-            }
-            else {
-                increment = ( roundingMode == float_round_up ) && zSig1;
-            }
-        }
+    switch (roundingMode) {
+    case float_round_nearest_even:
+    case float_round_ties_away:
+        increment = ((int64_t)zSig1 < 0);
+        break;
+    case float_round_to_zero:
+        increment = 0;
+        break;
+    case float_round_up:
+        increment = !zSign && zSig1;
+        break;
+    case float_round_down:
+        increment = zSign && zSig1;
+        break;
+    default:
+        abort();
     }
     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
         if (    ( 0x7FFE < zExp )
@@ -785,16 +843,22 @@ static floatx80
             zExp = 0;
             if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
             if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
-            if ( roundNearestEven ) {
-                increment = ( (int64_t) zSig1 < 0 );
-            }
-            else {
-                if ( zSign ) {
-                    increment = ( roundingMode == float_round_down ) && zSig1;
-                }
-                else {
-                    increment = ( roundingMode == float_round_up ) && zSig1;
-                }
+            switch (roundingMode) {
+            case float_round_nearest_even:
+            case float_round_ties_away:
+                increment = ((int64_t)zSig1 < 0);
+                break;
+            case float_round_to_zero:
+                increment = 0;
+                break;
+            case float_round_up:
+                increment = !zSign && zSig1;
+                break;
+            case float_round_down:
+                increment = zSign && zSig1;
+                break;
+            default:
+                abort();
             }
             if ( increment ) {
                 ++zSig0;
@@ -994,19 +1058,22 @@ static float128
 
     roundingMode = STATUS(float_rounding_mode);
     roundNearestEven = ( roundingMode == float_round_nearest_even );
-    increment = ( (int64_t) zSig2 < 0 );
-    if ( ! roundNearestEven ) {
-        if ( roundingMode == float_round_to_zero ) {
-            increment = 0;
-        }
-        else {
-            if ( zSign ) {
-                increment = ( roundingMode == float_round_down ) && zSig2;
-            }
-            else {
-                increment = ( roundingMode == float_round_up ) && zSig2;
-            }
-        }
+    switch (roundingMode) {
+    case float_round_nearest_even:
+    case float_round_ties_away:
+        increment = ((int64_t)zSig2 < 0);
+        break;
+    case float_round_to_zero:
+        increment = 0;
+        break;
+    case float_round_up:
+        increment = !zSign && zSig2;
+        break;
+    case float_round_down:
+        increment = zSign && zSig2;
+        break;
+    default:
+        abort();
     }
     if ( 0x7FFD <= (uint32_t) zExp ) {
         if (    ( 0x7FFD < zExp )
@@ -1054,16 +1121,22 @@ static float128
                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
             zExp = 0;
             if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
-            if ( roundNearestEven ) {
-                increment = ( (int64_t) zSig2 < 0 );
-            }
-            else {
-                if ( zSign ) {
-                    increment = ( roundingMode == float_round_down ) && zSig2;
-                }
-                else {
-                    increment = ( roundingMode == float_round_up ) && zSig2;
-                }
+            switch (roundingMode) {
+            case float_round_nearest_even:
+            case float_round_ties_away:
+                increment = ((int64_t)zSig2 < 0);
+                break;
+            case float_round_to_zero:
+                increment = 0;
+                break;
+            case float_round_up:
+                increment = !zSign && zSig2;
+                break;
+            case float_round_down:
+                increment = zSign && zSig2;
+                break;
+            default:
+                abort();
             }
         }
     }
@@ -1121,7 +1194,7 @@ static float128
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-float32 int32_to_float32( int32 a STATUS_PARAM )
+float32 int32_to_float32(int32_t a STATUS_PARAM)
 {
     flag zSign;
 
@@ -1138,7 +1211,7 @@ float32 int32_to_float32( int32 a STATUS_PARAM )
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-float64 int32_to_float64( int32 a STATUS_PARAM )
+float64 int32_to_float64(int32_t a STATUS_PARAM)
 {
     flag zSign;
     uint32 absA;
@@ -1161,7 +1234,7 @@ float64 int32_to_float64( int32 a STATUS_PARAM )
 | Arithmetic.
 *----------------------------------------------------------------------------*/
 
-floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
+floatx80 int32_to_floatx80(int32_t a STATUS_PARAM)
 {
     flag zSign;
     uint32 absA;
@@ -1183,7 +1256,7 @@ floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-float128 int32_to_float128( int32 a STATUS_PARAM )
+float128 int32_to_float128(int32_t a STATUS_PARAM)
 {
     flag zSign;
     uint32 absA;
@@ -1205,7 +1278,7 @@ float128 int32_to_float128( int32 a STATUS_PARAM )
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-float32 int64_to_float32( int64 a STATUS_PARAM )
+float32 int64_to_float32(int64_t a STATUS_PARAM)
 {
     flag zSign;
     uint64 absA;
@@ -1231,7 +1304,7 @@ float32 int64_to_float32( int64 a STATUS_PARAM )
 
 }
 
-float32 uint64_to_float32( uint64 a STATUS_PARAM )
+float32 uint64_to_float32(uint64_t a STATUS_PARAM)
 {
     int8 shiftCount;
 
@@ -1258,7 +1331,7 @@ float32 uint64_to_float32( uint64 a STATUS_PARAM )
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-float64 int64_to_float64( int64 a STATUS_PARAM )
+float64 int64_to_float64(int64_t a STATUS_PARAM)
 {
     flag zSign;
 
@@ -1271,7 +1344,7 @@ float64 int64_to_float64( int64 a STATUS_PARAM )
 
 }
 
-float64 uint64_to_float64(uint64 a STATUS_PARAM)
+float64 uint64_to_float64(uint64_t a STATUS_PARAM)
 {
     int exp =  0x43C;
 
@@ -1292,7 +1365,7 @@ float64 uint64_to_float64(uint64 a STATUS_PARAM)
 | Arithmetic.
 *----------------------------------------------------------------------------*/
 
-floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
+floatx80 int64_to_floatx80(int64_t a STATUS_PARAM)
 {
     flag zSign;
     uint64 absA;
@@ -1312,7 +1385,7 @@ floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-float128 int64_to_float128( int64 a STATUS_PARAM )
+float128 int64_to_float128(int64_t a STATUS_PARAM)
 {
     flag zSign;
     uint64 absA;
@@ -1339,7 +1412,7 @@ float128 int64_to_float128( int64 a STATUS_PARAM )
 
 }
 
-float128 uint64_to_float128(uint64 a STATUS_PARAM)
+float128 uint64_to_float128(uint64_t a STATUS_PARAM)
 {
     if (a == 0) {
         return float128_zero;
@@ -1509,6 +1582,52 @@ int64 float32_to_int64( float32 a STATUS_PARAM )
 
 /*----------------------------------------------------------------------------
 | Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit unsigned integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| unsigned integer is returned.  Otherwise, if the conversion overflows, the
+| largest unsigned integer is returned.  If the 'a' is negative, the result
+| is rounded and zero is returned; values that do not round to zero will
+| raise the inexact exception flag.
+*----------------------------------------------------------------------------*/
+
+uint64 float32_to_uint64(float32 a STATUS_PARAM)
+{
+    flag aSign;
+    int_fast16_t aExp, shiftCount;
+    uint32_t aSig;
+    uint64_t aSig64, aSigExtra;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    if ((aSign) && (aExp > 126)) {
+        float_raise(float_flag_invalid STATUS_VAR);
+        if (float32_is_any_nan(a)) {
+            return LIT64(0xFFFFFFFFFFFFFFFF);
+        } else {
+            return 0;
+        }
+    }
+    shiftCount = 0xBE - aExp;
+    if (aExp) {
+        aSig |= 0x00800000;
+    }
+    if (shiftCount < 0) {
+        float_raise(float_flag_invalid STATUS_VAR);
+        return LIT64(0xFFFFFFFFFFFFFFFF);
+    }
+
+    aSig64 = aSig;
+    aSig64 <<= 40;
+    shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
+    return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
 | `a' to the 64-bit two's complement integer format.  The conversion is
 | performed according to the IEC/IEEE Standard for Binary Floating-Point
 | Arithmetic, except that the conversion is always rounded toward zero.  If
@@ -1656,7 +1775,6 @@ float32 float32_round_to_int( float32 a STATUS_PARAM)
     flag aSign;
     int_fast16_t aExp;
     uint32_t lastBitMask, roundBitsMask;
-    int8 roundingMode;
     uint32_t z;
     a = float32_squash_input_denormal(a STATUS_VAR);
 
@@ -1677,6 +1795,11 @@ float32 float32_round_to_int( float32 a STATUS_PARAM)
                 return packFloat32( aSign, 0x7F, 0 );
             }
             break;
+        case float_round_ties_away:
+            if (aExp == 0x7E) {
+                return packFloat32(aSign, 0x7F, 0);
+            }
+            break;
          case float_round_down:
             return make_float32(aSign ? 0xBF800000 : 0);
          case float_round_up:
@@ -1688,15 +1811,30 @@ float32 float32_round_to_int( float32 a STATUS_PARAM)
     lastBitMask <<= 0x96 - aExp;
     roundBitsMask = lastBitMask - 1;
     z = float32_val(a);
-    roundingMode = STATUS(float_rounding_mode);
-    if ( roundingMode == float_round_nearest_even ) {
+    switch (STATUS(float_rounding_mode)) {
+    case float_round_nearest_even:
         z += lastBitMask>>1;
-        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
-    }
-    else if ( roundingMode != float_round_to_zero ) {
-        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
+        if ((z & roundBitsMask) == 0) {
+            z &= ~lastBitMask;
+        }
+        break;
+    case float_round_ties_away:
+        z += lastBitMask >> 1;
+        break;
+    case float_round_to_zero:
+        break;
+    case float_round_up:
+        if (!extractFloat32Sign(make_float32(z))) {
+            z += roundBitsMask;
+        }
+        break;
+    case float_round_down:
+        if (extractFloat32Sign(make_float32(z))) {
             z += roundBitsMask;
         }
+        break;
+    default:
+        abort();
     }
     z &= ~ roundBitsMask;
     if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
@@ -3005,6 +3143,128 @@ static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig)
         (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
 }
 
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper half-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the half-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal half-
+| precision floating-point number.
+| The `ieee' flag indicates whether to use IEEE standard half precision, or
+| ARM-style "alternative representation", which omits the NaN and Inf
+| encodings in order to raise the maximum representable exponent by one.
+|     The input significand `zSig' has its binary point between bits 22
+| and 23, which is 13 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| Note the slightly odd position of the binary point in zSig compared with the
+| other roundAndPackFloat functions. This should probably be fixed if we
+| need to implement more float16 routines than just conversion.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp,
+                                   uint32_t zSig, flag ieee STATUS_PARAM)
+{
+    int maxexp = ieee ? 29 : 30;
+    uint32_t mask;
+    uint32_t increment;
+    bool rounding_bumps_exp;
+    bool is_tiny = false;
+
+    /* Calculate the mask of bits of the mantissa which are not
+     * representable in half-precision and will be lost.
+     */
+    if (zExp < 1) {
+        /* Will be denormal in halfprec */
+        mask = 0x00ffffff;
+        if (zExp >= -11) {
+            mask >>= 11 + zExp;
+        }
+    } else {
+        /* Normal number in halfprec */
+        mask = 0x00001fff;
+    }
+
+    switch (STATUS(float_rounding_mode)) {
+    case float_round_nearest_even:
+        increment = (mask + 1) >> 1;
+        if ((zSig & mask) == increment) {
+            increment = zSig & (increment << 1);
+        }
+        break;
+    case float_round_ties_away:
+        increment = (mask + 1) >> 1;
+        break;
+    case float_round_up:
+        increment = zSign ? 0 : mask;
+        break;
+    case float_round_down:
+        increment = zSign ? mask : 0;
+        break;
+    default: /* round_to_zero */
+        increment = 0;
+        break;
+    }
+
+    rounding_bumps_exp = (zSig + increment >= 0x01000000);
+
+    if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
+        if (ieee) {
+            float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
+            return packFloat16(zSign, 0x1f, 0);
+        } else {
+            float_raise(float_flag_invalid STATUS_VAR);
+            return packFloat16(zSign, 0x1f, 0x3ff);
+        }
+    }
+
+    if (zExp < 0) {
+        /* Note that flush-to-zero does not affect half-precision results */
+        is_tiny =
+            (STATUS(float_detect_tininess) == float_tininess_before_rounding)
+            || (zExp < -1)
+            || (!rounding_bumps_exp);
+    }
+    if (zSig & mask) {
+        float_raise(float_flag_inexact STATUS_VAR);
+        if (is_tiny) {
+            float_raise(float_flag_underflow STATUS_VAR);
+        }
+    }
+
+    zSig += increment;
+    if (rounding_bumps_exp) {
+        zSig >>= 1;
+        zExp++;
+    }
+
+    if (zExp < -10) {
+        return packFloat16(zSign, 0, 0);
+    }
+    if (zExp < 0) {
+        zSig >>= -zExp;
+        zExp = 0;
+    }
+    return packFloat16(zSign, zExp, zSig >> 13);
+}
+
+static void normalizeFloat16Subnormal(uint32_t aSig, int_fast16_t *zExpPtr,
+                                      uint32_t *zSigPtr)
+{
+    int8_t shiftCount = countLeadingZeros32(aSig) - 21;
+    *zSigPtr = aSig << shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
 /* Half precision floats come in two formats: standard IEEE and "ARM" format.
    The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
 
@@ -3025,15 +3285,12 @@ float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
         return packFloat32(aSign, 0xff, 0);
     }
     if (aExp == 0) {
-        int8 shiftCount;
-
         if (aSig == 0) {
             return packFloat32(aSign, 0, 0);
         }
 
-        shiftCount = countLeadingZeros32( aSig ) - 21;
-        aSig = aSig << shiftCount;
-        aExp = -shiftCount;
+        normalizeFloat16Subnormal(aSig, &aExp, &aSig);
+        aExp--;
     }
     return packFloat32( aSign, aExp + 0x70, aSig << 13);
 }
@@ -3043,9 +3300,7 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
     flag aSign;
     int_fast16_t aExp;
     uint32_t aSig;
-    uint32_t mask;
-    uint32_t increment;
-    int8 roundingMode;
+
     a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
@@ -3054,11 +3309,12 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
     if ( aExp == 0xFF ) {
         if (aSig) {
             /* Input is a NaN */
-            float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
             if (!ieee) {
+                float_raise(float_flag_invalid STATUS_VAR);
                 return packFloat16(aSign, 0, 0);
             }
-            return r;
+            return commonNaNToFloat16(
+                float32ToCommonNaN(a STATUS_VAR) STATUS_VAR);
         }
         /* Infinity */
         if (!ieee) {
@@ -3070,66 +3326,92 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
     if (aExp == 0 && aSig == 0) {
         return packFloat16(aSign, 0, 0);
     }
-    /* Decimal point between bits 22 and 23.  */
+    /* Decimal point between bits 22 and 23. Note that we add the 1 bit
+     * even if the input is denormal; however this is harmless because
+     * the largest possible single-precision denormal is still smaller
+     * than the smallest representable half-precision denormal, and so we
+     * will end up ignoring aSig and returning via the "always return zero"
+     * codepath.
+     */
     aSig |= 0x00800000;
-    aExp -= 0x7f;
-    if (aExp < -14) {
-        mask = 0x00ffffff;
-        if (aExp >= -24) {
-            mask >>= 25 + aExp;
+    aExp -= 0x71;
+
+    return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR);
+}
+
+float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM)
+{
+    flag aSign;
+    int_fast16_t aExp;
+    uint32_t aSig;
+
+    aSign = extractFloat16Sign(a);
+    aExp = extractFloat16Exp(a);
+    aSig = extractFloat16Frac(a);
+
+    if (aExp == 0x1f && ieee) {
+        if (aSig) {
+            return commonNaNToFloat64(
+                float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
         }
-    } else {
-        mask = 0x00001fff;
+        return packFloat64(aSign, 0x7ff, 0);
     }
-    if (aSig & mask) {
-        float_raise( float_flag_underflow STATUS_VAR );
-        roundingMode = STATUS(float_rounding_mode);
-        switch (roundingMode) {
-        case float_round_nearest_even:
-            increment = (mask + 1) >> 1;
-            if ((aSig & mask) == increment) {
-                increment = aSig & (increment << 1);
-            }
-            break;
-        case float_round_up:
-            increment = aSign ? 0 : mask;
-            break;
-        case float_round_down:
-            increment = aSign ? mask : 0;
-            break;
-        default: /* round_to_zero */
-            increment = 0;
-            break;
-        }
-        aSig += increment;
-        if (aSig >= 0x01000000) {
-            aSig >>= 1;
-            aExp++;
+    if (aExp == 0) {
+        if (aSig == 0) {
+            return packFloat64(aSign, 0, 0);
         }
-    } else if (aExp < -14
-          && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
-        float_raise( float_flag_underflow STATUS_VAR);
+
+        normalizeFloat16Subnormal(aSig, &aExp, &aSig);
+        aExp--;
     }
+    return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
+}
 
-    if (ieee) {
-        if (aExp > 15) {
-            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
-            return packFloat16(aSign, 0x1f, 0);
+float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM)
+{
+    flag aSign;
+    int_fast16_t aExp;
+    uint64_t aSig;
+    uint32_t zSig;
+
+    a = float64_squash_input_denormal(a STATUS_VAR);
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (aExp == 0x7FF) {
+        if (aSig) {
+            /* Input is a NaN */
+            if (!ieee) {
+                float_raise(float_flag_invalid STATUS_VAR);
+                return packFloat16(aSign, 0, 0);
+            }
+            return commonNaNToFloat16(
+                float64ToCommonNaN(a STATUS_VAR) STATUS_VAR);
         }
-    } else {
-        if (aExp > 16) {
-            float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
+        /* Infinity */
+        if (!ieee) {
+            float_raise(float_flag_invalid STATUS_VAR);
             return packFloat16(aSign, 0x1f, 0x3ff);
         }
+        return packFloat16(aSign, 0x1f, 0);
     }
-    if (aExp < -24) {
+    shift64RightJamming(aSig, 29, &aSig);
+    zSig = aSig;
+    if (aExp == 0 && zSig == 0) {
         return packFloat16(aSign, 0, 0);
     }
-    if (aExp < -14) {
-        aSig >>= -14 - aExp;
-        aExp = -14;
-    }
-    return packFloat16(aSign, aExp + 14, aSig >> 13);
+    /* Decimal point between bits 22 and 23. Note that we add the 1 bit
+     * even if the input is denormal; however this is harmless because
+     * the largest possible single-precision denormal is still smaller
+     * than the smallest representable half-precision denormal, and so we
+     * will end up ignoring aSig and returning via the "always return zero"
+     * codepath.
+     */
+    zSig |= 0x00800000;
+    aExp -= 0x3F1;
+
+    return roundAndPackFloat16(aSign, aExp, zSig, ieee STATUS_VAR);
 }
 
 /*----------------------------------------------------------------------------
@@ -3206,7 +3488,6 @@ float64 float64_round_to_int( float64 a STATUS_PARAM )
     flag aSign;
     int_fast16_t aExp;
     uint64_t lastBitMask, roundBitsMask;
-    int8 roundingMode;
     uint64_t z;
     a = float64_squash_input_denormal(a STATUS_VAR);
 
@@ -3227,6 +3508,11 @@ float64 float64_round_to_int( float64 a STATUS_PARAM )
                 return packFloat64( aSign, 0x3FF, 0 );
             }
             break;
+        case float_round_ties_away:
+            if (aExp == 0x3FE) {
+                return packFloat64(aSign, 0x3ff, 0);
+            }
+            break;
          case float_round_down:
             return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
          case float_round_up:
@@ -3239,15 +3525,30 @@ float64 float64_round_to_int( float64 a STATUS_PARAM )
     lastBitMask <<= 0x433 - aExp;
     roundBitsMask = lastBitMask - 1;
     z = float64_val(a);
-    roundingMode = STATUS(float_rounding_mode);
-    if ( roundingMode == float_round_nearest_even ) {
-        z += lastBitMask>>1;
-        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
-    }
-    else if ( roundingMode != float_round_to_zero ) {
-        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
+    switch (STATUS(float_rounding_mode)) {
+    case float_round_nearest_even:
+        z += lastBitMask >> 1;
+        if ((z & roundBitsMask) == 0) {
+            z &= ~lastBitMask;
+        }
+        break;
+    case float_round_ties_away:
+        z += lastBitMask >> 1;
+        break;
+    case float_round_to_zero:
+        break;
+    case float_round_up:
+        if (!extractFloat64Sign(make_float64(z))) {
+            z += roundBitsMask;
+        }
+        break;
+    case float_round_down:
+        if (extractFloat64Sign(make_float64(z))) {
             z += roundBitsMask;
         }
+        break;
+    default:
+        abort();
     }
     z &= ~ roundBitsMask;
     if ( z != float64_val(a) )
@@ -4475,7 +4776,6 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
     flag aSign;
     int32 aExp;
     uint64_t lastBitMask, roundBitsMask;
-    int8 roundingMode;
     floatx80 z;
 
     aExp = extractFloatx80Exp( a );
@@ -4500,6 +4800,11 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
             }
             break;
+        case float_round_ties_away:
+            if (aExp == 0x3FFE) {
+                return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
+            }
+            break;
          case float_round_down:
             return
                   aSign ?
@@ -4516,15 +4821,30 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
     lastBitMask <<= 0x403E - aExp;
     roundBitsMask = lastBitMask - 1;
     z = a;
-    roundingMode = STATUS(float_rounding_mode);
-    if ( roundingMode == float_round_nearest_even ) {
+    switch (STATUS(float_rounding_mode)) {
+    case float_round_nearest_even:
         z.low += lastBitMask>>1;
-        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
-    }
-    else if ( roundingMode != float_round_to_zero ) {
-        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+        if ((z.low & roundBitsMask) == 0) {
+            z.low &= ~lastBitMask;
+        }
+        break;
+    case float_round_ties_away:
+        z.low += lastBitMask >> 1;
+        break;
+    case float_round_to_zero:
+        break;
+    case float_round_up:
+        if (!extractFloatx80Sign(z)) {
             z.low += roundBitsMask;
         }
+        break;
+    case float_round_down:
+        if (extractFloatx80Sign(z)) {
+            z.low += roundBitsMask;
+        }
+        break;
+    default:
+        abort();
     }
     z.low &= ~ roundBitsMask;
     if ( z.low == 0 ) {
@@ -5550,7 +5870,6 @@ float128 float128_round_to_int( float128 a STATUS_PARAM )
     flag aSign;
     int32 aExp;
     uint64_t lastBitMask, roundBitsMask;
-    int8 roundingMode;
     float128 z;
 
     aExp = extractFloat128Exp( a );
@@ -5567,8 +5886,8 @@ float128 float128_round_to_int( float128 a STATUS_PARAM )
         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
         roundBitsMask = lastBitMask - 1;
         z = a;
-        roundingMode = STATUS(float_rounding_mode);
-        if ( roundingMode == float_round_nearest_even ) {
+        switch (STATUS(float_rounding_mode)) {
+        case float_round_nearest_even:
             if ( lastBitMask ) {
                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
@@ -5579,12 +5898,30 @@ float128 float128_round_to_int( float128 a STATUS_PARAM )
                     if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
                 }
             }
-        }
-        else if ( roundingMode != float_round_to_zero ) {
-            if (   extractFloat128Sign( z )
-                 ^ ( roundingMode == float_round_up ) ) {
-                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+            break;
+        case float_round_ties_away:
+            if (lastBitMask) {
+                add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
+            } else {
+                if ((int64_t) z.low < 0) {
+                    ++z.high;
+                }
+            }
+            break;
+        case float_round_to_zero:
+            break;
+        case float_round_up:
+            if (!extractFloat128Sign(z)) {
+                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
+            }
+            break;
+        case float_round_down:
+            if (extractFloat128Sign(z)) {
+                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
             }
+            break;
+        default:
+            abort();
         }
         z.low &= ~ roundBitsMask;
     }
@@ -5602,6 +5939,11 @@ float128 float128_round_to_int( float128 a STATUS_PARAM )
                     return packFloat128( aSign, 0x3FFF, 0, 0 );
                 }
                 break;
+            case float_round_ties_away:
+                if (aExp == 0x3FFE) {
+                    return packFloat128(aSign, 0x3FFF, 0, 0);
+                }
+                break;
              case float_round_down:
                 return
                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
@@ -5618,19 +5960,32 @@ float128 float128_round_to_int( float128 a STATUS_PARAM )
         roundBitsMask = lastBitMask - 1;
         z.low = 0;
         z.high = a.high;
-        roundingMode = STATUS(float_rounding_mode);
-        if ( roundingMode == float_round_nearest_even ) {
+        switch (STATUS(float_rounding_mode)) {
+        case float_round_nearest_even:
             z.high += lastBitMask>>1;
             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
                 z.high &= ~ lastBitMask;
             }
-        }
-        else if ( roundingMode != float_round_to_zero ) {
-            if (   extractFloat128Sign( z )
-                 ^ ( roundingMode == float_round_up ) ) {
+            break;
+        case float_round_ties_away:
+            z.high += lastBitMask>>1;
+            break;
+        case float_round_to_zero:
+            break;
+        case float_round_up:
+            if (!extractFloat128Sign(z)) {
                 z.high |= ( a.low != 0 );
                 z.high += roundBitsMask;
             }
+            break;
+        case float_round_down:
+            if (extractFloat128Sign(z)) {
+                z.high |= (a.low != 0);
+                z.high += roundBitsMask;
+            }
+            break;
+        default:
+            abort();
         }
         z.high &= ~ roundBitsMask;
     }
@@ -6418,12 +6773,12 @@ int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
 }
 
 /* misc functions */
-float32 uint32_to_float32( uint32 a STATUS_PARAM )
+float32 uint32_to_float32(uint32_t a STATUS_PARAM)
 {
     return int64_to_float32(a STATUS_VAR);
 }
 
-float64 uint32_to_float64( uint32 a STATUS_PARAM )
+float64 uint32_to_float64(uint32_t a STATUS_PARAM)
 {
     return int64_to_float64(a STATUS_VAR);
 }
@@ -6432,17 +6787,18 @@ uint32 float32_to_uint32( float32 a STATUS_PARAM )
 {
     int64_t v;
     uint32 res;
+    int old_exc_flags = get_float_exception_flags(status);
 
     v = float32_to_int64(a STATUS_VAR);
     if (v < 0) {
         res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
     } else if (v > 0xffffffff) {
         res = 0xffffffff;
-        float_raise( float_flag_invalid STATUS_VAR);
     } else {
-        res = v;
+        return v;
     }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
     return res;
 }
 
@@ -6450,17 +6806,58 @@ uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
 {
     int64_t v;
     uint32 res;
+    int old_exc_flags = get_float_exception_flags(status);
 
     v = float32_to_int64_round_to_zero(a STATUS_VAR);
     if (v < 0) {
         res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
     } else if (v > 0xffffffff) {
         res = 0xffffffff;
-        float_raise( float_flag_invalid STATUS_VAR);
     } else {
-        res = v;
+        return v;
+    }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
+    return res;
+}
+
+int_fast16_t float32_to_int16(float32 a STATUS_PARAM)
+{
+    int32_t v;
+    int_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
+
+    v = float32_to_int32(a STATUS_VAR);
+    if (v < -0x8000) {
+        res = -0x8000;
+    } else if (v > 0x7fff) {
+        res = 0x7fff;
+    } else {
+        return v;
     }
+
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
+    return res;
+}
+
+uint_fast16_t float32_to_uint16(float32 a STATUS_PARAM)
+{
+    int32_t v;
+    uint_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
+
+    v = float32_to_int32(a STATUS_VAR);
+    if (v < 0) {
+        res = 0;
+    } else if (v > 0xffff) {
+        res = 0xffff;
+    } else {
+        return v;
+    }
+
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
     return res;
 }
 
@@ -6468,53 +6865,92 @@ uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM)
 {
     int64_t v;
     uint_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
 
     v = float32_to_int64_round_to_zero(a STATUS_VAR);
     if (v < 0) {
         res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
     } else if (v > 0xffff) {
         res = 0xffff;
-        float_raise( float_flag_invalid STATUS_VAR);
     } else {
-        res = v;
+        return v;
     }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
     return res;
 }
 
 uint32 float64_to_uint32( float64 a STATUS_PARAM )
 {
-    int64_t v;
+    uint64_t v;
     uint32 res;
+    int old_exc_flags = get_float_exception_flags(status);
 
-    v = float64_to_int64(a STATUS_VAR);
-    if (v < 0) {
-        res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
-    } else if (v > 0xffffffff) {
+    v = float64_to_uint64(a STATUS_VAR);
+    if (v > 0xffffffff) {
         res = 0xffffffff;
-        float_raise( float_flag_invalid STATUS_VAR);
     } else {
-        res = v;
+        return v;
     }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
     return res;
 }
 
 uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
 {
-    int64_t v;
+    uint64_t v;
     uint32 res;
+    int old_exc_flags = get_float_exception_flags(status);
 
-    v = float64_to_int64_round_to_zero(a STATUS_VAR);
+    v = float64_to_uint64_round_to_zero(a STATUS_VAR);
+    if (v > 0xffffffff) {
+        res = 0xffffffff;
+    } else {
+        return v;
+    }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
+    return res;
+}
+
+int_fast16_t float64_to_int16(float64 a STATUS_PARAM)
+{
+    int64_t v;
+    int_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
+
+    v = float64_to_int32(a STATUS_VAR);
+    if (v < -0x8000) {
+        res = -0x8000;
+    } else if (v > 0x7fff) {
+        res = 0x7fff;
+    } else {
+        return v;
+    }
+
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
+    return res;
+}
+
+uint_fast16_t float64_to_uint16(float64 a STATUS_PARAM)
+{
+    int64_t v;
+    uint_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
+
+    v = float64_to_int32(a STATUS_VAR);
     if (v < 0) {
         res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
-    } else if (v > 0xffffffff) {
-        res = 0xffffffff;
-        float_raise( float_flag_invalid STATUS_VAR);
+    } else if (v > 0xffff) {
+        res = 0xffff;
     } else {
-        res = v;
+        return v;
     }
+
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
     return res;
 }
 
@@ -6522,41 +6958,75 @@ uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
 {
     int64_t v;
     uint_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
 
     v = float64_to_int64_round_to_zero(a STATUS_VAR);
     if (v < 0) {
         res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
     } else if (v > 0xffff) {
         res = 0xffff;
-        float_raise( float_flag_invalid STATUS_VAR);
     } else {
-        res = v;
+        return v;
     }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
     return res;
 }
 
-/* FIXME: This looks broken.  */
-uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
-{
-    int64_t v;
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit unsigned integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  If the conversion overflows, the
+| largest unsigned integer is returned.  If 'a' is negative, the value is
+| rounded and zero is returned; negative values that do not round to zero
+| will raise the inexact exception.
+*----------------------------------------------------------------------------*/
 
-    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
-    v += float64_val(a);
-    v = float64_to_int64(make_float64(v) STATUS_VAR);
+uint64_t float64_to_uint64(float64 a STATUS_PARAM)
+{
+    flag aSign;
+    int_fast16_t aExp, shiftCount;
+    uint64_t aSig, aSigExtra;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
-    return v - INT64_MIN;
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (aSign && (aExp > 1022)) {
+        float_raise(float_flag_invalid STATUS_VAR);
+        if (float64_is_any_nan(a)) {
+            return LIT64(0xFFFFFFFFFFFFFFFF);
+        } else {
+            return 0;
+        }
+    }
+    if (aExp) {
+        aSig |= LIT64(0x0010000000000000);
+    }
+    shiftCount = 0x433 - aExp;
+    if (shiftCount <= 0) {
+        if (0x43E < aExp) {
+            float_raise(float_flag_invalid STATUS_VAR);
+            return LIT64(0xFFFFFFFFFFFFFFFF);
+        }
+        aSigExtra = 0;
+        aSig <<= -shiftCount;
+    } else {
+        shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
+    }
+    return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR);
 }
 
 uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
 {
-    int64_t v;
-
-    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
-    v += float64_val(a);
-    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
-
-    return v - INT64_MIN;
+    signed char current_rounding_mode = STATUS(float_rounding_mode);
+    set_float_rounding_mode(float_round_to_zero STATUS_VAR);
+    int64_t v = float64_to_uint64(a STATUS_VAR);
+    set_float_rounding_mode(current_rounding_mode STATUS_VAR);
+    return v;
 }
 
 #define COMPARE(s, nan_exp)                                                  \
@@ -6795,10 +7265,13 @@ float32 float32_scalbn( float32 a, int n STATUS_PARAM )
         }
         return a;
     }
-    if ( aExp != 0 )
+    if (aExp != 0) {
         aSig |= 0x00800000;
-    else if ( aSig == 0 )
+    } else if (aSig == 0) {
         return a;
+    } else {
+        aExp++;
+    }
 
     if (n > 0x200) {
         n = 0x200;
@@ -6828,10 +7301,13 @@ float64 float64_scalbn( float64 a, int n STATUS_PARAM )
         }
         return a;
     }
-    if ( aExp != 0 )
+    if (aExp != 0) {
         aSig |= LIT64( 0x0010000000000000 );
-    else if ( aSig == 0 )
+    } else if (aSig == 0) {
         return a;
+    } else {
+        aExp++;
+    }
 
     if (n > 0x1000) {
         n = 0x1000;
@@ -6861,8 +7337,12 @@ floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
         return a;
     }
 
-    if (aExp == 0 && aSig == 0)
-        return a;
+    if (aExp == 0) {
+        if (aSig == 0) {
+            return a;
+        }
+        aExp++;
+    }
 
     if (n > 0x10000) {
         n = 0x10000;
@@ -6891,10 +7371,13 @@ float128 float128_scalbn( float128 a, int n STATUS_PARAM )
         }
         return a;
     }
-    if ( aExp != 0 )
+    if (aExp != 0) {
         aSig0 |= LIT64( 0x0001000000000000 );
-    else if ( aSig0 == 0 && aSig1 == 0 )
+    } else if (aSig0 == 0 && aSig1 == 0) {
         return a;
+    } else {
+        aExp++;
+    }
 
     if (n > 0x10000) {
         n = 0x10000;
diff --git a/hw/acpi/core.c b/hw/acpi/core.c
index 58308a3406..79414b44c7 100644
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -662,3 +662,21 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr)
 
     return val;
 }
+
+void acpi_update_sci(ACPIREGS *regs, qemu_irq irq)
+{
+    int sci_level, pm1a_sts;
+
+    pm1a_sts = acpi_pm1_evt_get_sts(regs);
+
+    sci_level = ((pm1a_sts &
+                  regs->pm1.evt.en & ACPI_BITMASK_PM1_COMMON_ENABLED) != 0) ||
+                ((regs->gpe.sts[0] & regs->gpe.en[0]) != 0);
+
+    qemu_set_irq(irq, sci_level);
+
+    /* schedule a timer interruption if needed */
+    acpi_pm_tmr_update(regs,
+                       (regs->pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) &&
+                       !(pm1a_sts & ACPI_BITMASK_TIMER_STATUS));
+}
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 7e0429e0f9..30f0df8713 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -44,29 +44,10 @@ do { printf("%s "fmt, __func__, ## __VA_ARGS__); } while (0)
 #define ICH9_DEBUG(fmt, ...)    do { } while (0)
 #endif
 
-static void pm_update_sci(ICH9LPCPMRegs *pm)
-{
-    int sci_level, pm1a_sts;
-
-    pm1a_sts = acpi_pm1_evt_get_sts(&pm->acpi_regs);
-
-    sci_level = (((pm1a_sts & pm->acpi_regs.pm1.evt.en) &
-                  (ACPI_BITMASK_RT_CLOCK_ENABLE |
-                   ACPI_BITMASK_POWER_BUTTON_ENABLE |
-                   ACPI_BITMASK_GLOBAL_LOCK_ENABLE |
-                   ACPI_BITMASK_TIMER_ENABLE)) != 0);
-    qemu_set_irq(pm->irq, sci_level);
-
-    /* schedule a timer interruption if needed */
-    acpi_pm_tmr_update(&pm->acpi_regs,
-                       (pm->acpi_regs.pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) &&
-                       !(pm1a_sts & ACPI_BITMASK_TIMER_STATUS));
-}
-
 static void ich9_pm_update_sci_fn(ACPIREGS *regs)
 {
     ICH9LPCPMRegs *pm = container_of(regs, ICH9LPCPMRegs, acpi_regs);
-    pm_update_sci(pm);
+    acpi_update_sci(&pm->acpi_regs, pm->irq);
 }
 
 static uint64_t ich9_gpe_readb(void *opaque, hwaddr addr, unsigned width)
@@ -80,6 +61,7 @@ static void ich9_gpe_writeb(void *opaque, hwaddr addr, uint64_t val,
 {
     ICH9LPCPMRegs *pm = opaque;
     acpi_gpe_ioport_writeb(&pm->acpi_regs, addr, val);
+    acpi_update_sci(&pm->acpi_regs, pm->irq);
 }
 
 static const MemoryRegionOps ich9_gpe_ops = {
@@ -193,7 +175,7 @@ static void pm_reset(void *opaque)
         pm->smi_en |= ICH9_PMIO_SMI_EN_APMC_EN;
     }
 
-    pm_update_sci(pm);
+    acpi_update_sci(&pm->acpi_regs, pm->irq);
 }
 
 static void pm_powerdown_req(Notifier *n, void *opaque)
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 93849c8d36..20353b983e 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -112,29 +112,10 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
 #define ACPI_ENABLE 0xf1
 #define ACPI_DISABLE 0xf0
 
-static void pm_update_sci(PIIX4PMState *s)
-{
-    int sci_level, pmsts;
-
-    pmsts = acpi_pm1_evt_get_sts(&s->ar);
-    sci_level = (((pmsts & s->ar.pm1.evt.en) &
-                  (ACPI_BITMASK_RT_CLOCK_ENABLE |
-                   ACPI_BITMASK_POWER_BUTTON_ENABLE |
-                   ACPI_BITMASK_GLOBAL_LOCK_ENABLE |
-                   ACPI_BITMASK_TIMER_ENABLE)) != 0) ||
-        (((s->ar.gpe.sts[0] & s->ar.gpe.en[0]) &
-          (PIIX4_PCI_HOTPLUG_STATUS | PIIX4_CPU_HOTPLUG_STATUS)) != 0);
-
-    qemu_set_irq(s->irq, sci_level);
-    /* schedule a timer interruption if needed */
-    acpi_pm_tmr_update(&s->ar, (s->ar.pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) &&
-                       !(pmsts & ACPI_BITMASK_TIMER_STATUS));
-}
-
 static void pm_tmr_timer(ACPIREGS *ar)
 {
     PIIX4PMState *s = container_of(ar, PIIX4PMState, ar);
-    pm_update_sci(s);
+    acpi_update_sci(&s->ar, s->irq);
 }
 
 static void apm_ctrl_changed(uint32_t val, void *arg)
@@ -544,9 +525,13 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data)
     k->revision = 0x03;
     k->class_id = PCI_CLASS_BRIDGE_OTHER;
     dc->desc = "PM";
-    dc->no_user = 1;
     dc->vmsd = &vmstate_acpi;
     dc->props = piix4_pm_properties;
+    /*
+     * Reason: part of PIIX4 southbridge, needs to be wired up,
+     * e.g. by mips_malta_init()
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo piix4_pm_info = {
@@ -578,7 +563,7 @@ static void gpe_writeb(void *opaque, hwaddr addr, uint64_t val,
     PIIX4PMState *s = opaque;
 
     acpi_gpe_ioport_writeb(&s->ar, addr, val);
-    pm_update_sci(s);
+    acpi_update_sci(&s->ar, s->irq);
 
     PIIX4_DPRINTF("gpe write %" HWADDR_PRIx " <== %" PRIu64 "\n", addr, val);
 }
@@ -694,7 +679,7 @@ static void piix4_cpu_hotplug_req(PIIX4PMState *s, CPUState *cpu,
     } else {
         g->sts[cpu_id / 8] &= ~(1 << (cpu_id % 8));
     }
-    pm_update_sci(s);
+    acpi_update_sci(&s->ar, s->irq);
 }
 
 static void piix4_cpu_added_req(Notifier *n, void *opaque)
@@ -768,7 +753,7 @@ static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
         disable_device(s, slot);
     }
 
-    pm_update_sci(s);
+    acpi_update_sci(&s->ar, s->irq);
 
     return 0;
 }
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 59e1bb8388..71a5a37fdc 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -934,11 +934,9 @@ static int typhoon_pcihost_init(SysBusDevice *dev)
 
 static void typhoon_pcihost_class_init(ObjectClass *klass, void *data)
 {
-    DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = typhoon_pcihost_init;
-    dc->no_user = 1;
 }
 
 static const TypeInfo typhoon_pcihost_info = {
diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index c75b425c01..d76a1d1f78 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -126,7 +126,7 @@ typedef struct {
     SysBusDevice parent_obj;
     /*< public >*/
 
-    MemoryRegion *iomem;
+    MemoryRegion iomem;
     uint32_t regs[NUM_REGS];
 } HighbankRegsState;
 
@@ -155,10 +155,9 @@ static int highbank_regs_init(SysBusDevice *dev)
 {
     HighbankRegsState *s = HIGHBANK_REGISTERS(dev);
 
-    s->iomem = g_new(MemoryRegion, 1);
-    memory_region_init_io(s->iomem, OBJECT(s), &hb_mem_ops, s->regs,
+    memory_region_init_io(&s->iomem, OBJECT(s), &hb_mem_ops, s->regs,
                           "highbank_regs", 0x1000);
-    sysbus_init_mmio(dev, s->iomem);
+    sysbus_init_mmio(dev, &s->iomem);
 
     return 0;
 }
diff --git a/hw/arm/mainstone.c b/hw/arm/mainstone.c
index 9402c841e9..d8e075e26d 100644
--- a/hw/arm/mainstone.c
+++ b/hw/arm/mainstone.c
@@ -45,7 +45,7 @@
 #define S1_STSCHG_IRQ 14
 #define S1_IRQ        15
 
-static struct keymap map[0xE0] = {
+static const struct keymap map[0xE0] = {
     [0 ... 0xDF] = { -1, -1 },
     [0x1e] = {0,0}, /* a */
     [0x30] = {0,1}, /* b */
@@ -75,9 +75,18 @@ static struct keymap map[0xE0] = {
     [0x2c] = {4,3}, /* z */
     [0xc7] = {5,0}, /* Home */
     [0x2a] = {5,1}, /* shift */
-    [0x39] = {5,2}, /* space */
+    /*
+     * There are two matrix positions which map to space,
+     * but QEMU can only use one of them for the reverse
+     * mapping, so simply use the second one.
+     */
+    /* [0x39] = {5,2}, space */
     [0x39] = {5,3}, /* space */
-    [0x1c] = {5,5}, /*  enter */
+    /*
+     * Matrix position {5,4} and other keys are missing here.
+     * TODO: Compare with Linux code and test real hardware.
+     */
+    [0x1c] = {5,5}, /* enter (TODO: might be wrong) */
     [0xc8] = {6,0}, /* up */
     [0xd0] = {6,1}, /* down */
     [0xcb] = {6,2}, /* left */
diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c
index f7e8b7e8fa..aef2bde0c4 100644
--- a/hw/arm/versatilepb.c
+++ b/hw/arm/versatilepb.c
@@ -390,7 +390,6 @@ static void vpb_sic_class_init(ObjectClass *klass, void *data)
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = vpb_sic_init;
-    dc->no_user = 1;
     dc->vmsd = &vmstate_vpb_sic;
 }
 
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 17251c7a65..98e0958a77 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -49,9 +49,11 @@ static void gem_init(NICInfo *nd, uint32_t base, qemu_irq irq)
     DeviceState *dev;
     SysBusDevice *s;
 
-    qemu_check_nic_model(nd, "cadence_gem");
     dev = qdev_create(NULL, "cadence_gem");
-    qdev_set_nic_properties(dev, nd);
+    if (nd->used) {
+        qemu_check_nic_model(nd, "cadence_gem");
+        qdev_set_nic_properties(dev, nd);
+    }
     qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
     sysbus_mmio_map(s, 0, base);
@@ -113,7 +115,6 @@ static void zynq_init(QEMUMachineInitArgs *args)
     DeviceState *dev;
     SysBusDevice *busdev;
     qemu_irq pic[64];
-    NICInfo *nd;
     Error *err = NULL;
     int n;
 
@@ -190,14 +191,8 @@ static void zynq_init(QEMUMachineInitArgs *args)
     sysbus_create_varargs("cadence_ttc", 0xF8002000,
             pic[69-IRQ_OFFSET], pic[70-IRQ_OFFSET], pic[71-IRQ_OFFSET], NULL);
 
-    for (n = 0; n < nb_nics; n++) {
-        nd = &nd_table[n];
-        if (n == 0) {
-            gem_init(nd, 0xE000B000, pic[54-IRQ_OFFSET]);
-        } else if (n == 1) {
-            gem_init(nd, 0xE000C000, pic[77-IRQ_OFFSET]);
-        }
-    }
+    gem_init(&nd_table[0], 0xE000B000, pic[54-IRQ_OFFSET]);
+    gem_init(&nd_table[1], 0xE000C000, pic[77-IRQ_OFFSET]);
 
     dev = qdev_create(NULL, "generic-sdhci");
     qdev_init_nofail(dev);
diff --git a/hw/arm/z2.c b/hw/arm/z2.c
index d52c5019b3..97367b1f8b 100644
--- a/hw/arm/z2.c
+++ b/hw/arm/z2.c
@@ -33,7 +33,7 @@
 #define DPRINTF(fmt, ...)
 #endif
 
-static struct keymap map[0x100] = {
+static const struct keymap map[0x100] = {
     [0 ... 0xff] = { -1, -1 },
     [0x3b] = {0, 0}, /* Option = F1 */
     [0xc8] = {0, 1}, /* Up */
diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
index 6ab8c245d3..d41f82cec4 100644
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -900,7 +900,7 @@ static const IntelHDAReg *intel_hda_reg_find(IntelHDAState *d, hwaddr addr)
 {
     const IntelHDAReg *reg;
 
-    if (addr >= sizeof(regtab)/sizeof(regtab[0])) {
+    if (addr >= ARRAY_SIZE(regtab)) {
         goto noreg;
     }
     reg = regtab+addr;
@@ -1025,7 +1025,7 @@ static void intel_hda_regs_reset(IntelHDAState *d)
     uint32_t *addr;
     int i;
 
-    for (i = 0; i < sizeof(regtab)/sizeof(regtab[0]); i++) {
+    for (i = 0; i < ARRAY_SIZE(regtab); i++) {
         if (regtab[i].name == NULL) {
             continue;
         }
diff --git a/hw/audio/marvell_88w8618.c b/hw/audio/marvell_88w8618.c
index 97194ce7ad..cdce238f55 100644
--- a/hw/audio/marvell_88w8618.c
+++ b/hw/audio/marvell_88w8618.c
@@ -288,6 +288,8 @@ static void mv88w8618_audio_class_init(ObjectClass *klass, void *data)
     dc->reset = mv88w8618_audio_reset;
     dc->vmsd = &mv88w8618_audio_vmsd;
     dc->props = mv88w8618_audio_properties;
+    /* Reason: pointer property "wm8750" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo mv88w8618_audio_info = {
diff --git a/hw/audio/pcspk.c b/hw/audio/pcspk.c
index 9004ce3d1f..f980d66b2f 100644
--- a/hw/audio/pcspk.c
+++ b/hw/audio/pcspk.c
@@ -192,8 +192,9 @@ static void pcspk_class_initfn(ObjectClass *klass, void *data)
 
     dc->realize = pcspk_realizefn;
     set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
-    dc->no_user = 1;
     dc->props = pcspk_properties;
+    /* Reason: pointer property "pit", realize sets global pcspk_state */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo pcspk_info = {
diff --git a/hw/audio/pl041.c b/hw/audio/pl041.c
index 5393b520b7..ed82be54e8 100644
--- a/hw/audio/pl041.c
+++ b/hw/audio/pl041.c
@@ -632,7 +632,6 @@ static void pl041_device_class_init(ObjectClass *klass, void *data)
 
     k->init = pl041_init;
     set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
-    dc->no_user = 1;
     dc->reset = pl041_device_reset;
     dc->vmsd = &vmstate_pl041;
     dc->props = pl041_device_properties;
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 1e57f3aabd..456d437ac3 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -35,7 +35,7 @@ enum {
 typedef struct {
     struct iocb iocb;               /* Linux AIO control block */
     QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
-    unsigned int head;              /* vring descriptor index */
+    VirtQueueElement *elem;         /* saved data from the virtqueue */
     struct iovec *bounce_iov;       /* used if guest buffers are unaligned */
     QEMUIOVector *read_qiov;        /* for read completion /w bounce buffer */
 } VirtIOBlockRequest;
@@ -96,7 +96,7 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
         len = 0;
     }
 
-    trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
+    trace_virtio_blk_data_plane_complete_request(s, req->elem->index, ret);
 
     if (req->read_qiov) {
         assert(req->bounce_iov);
@@ -118,12 +118,12 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
      * written to, but for virtio-blk it seems to be the number of bytes
      * transferred plus the status bytes.
      */
-    vring_push(&s->vring, req->head, len + sizeof(hdr));
-
+    vring_push(&s->vring, req->elem, len + sizeof(hdr));
+    req->elem = NULL;
     s->num_reqs--;
 }
 
-static void complete_request_early(VirtIOBlockDataPlane *s, unsigned int head,
+static void complete_request_early(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
                                    QEMUIOVector *inhdr, unsigned char status)
 {
     struct virtio_blk_inhdr hdr = {
@@ -134,26 +134,26 @@ static void complete_request_early(VirtIOBlockDataPlane *s, unsigned int head,
     qemu_iovec_destroy(inhdr);
     g_slice_free(QEMUIOVector, inhdr);
 
-    vring_push(&s->vring, head, sizeof(hdr));
+    vring_push(&s->vring, elem, sizeof(hdr));
     notify_guest(s);
 }
 
 /* Get disk serial number */
 static void do_get_id_cmd(VirtIOBlockDataPlane *s,
                           struct iovec *iov, unsigned int iov_cnt,
-                          unsigned int head, QEMUIOVector *inhdr)
+                          VirtQueueElement *elem, QEMUIOVector *inhdr)
 {
     char id[VIRTIO_BLK_ID_BYTES];
 
     /* Serial number not NUL-terminated when shorter than buffer */
     strncpy(id, s->blk->serial ? s->blk->serial : "", sizeof(id));
     iov_from_buf(iov, iov_cnt, 0, id, sizeof(id));
-    complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
+    complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
 }
 
 static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
-                       struct iovec *iov, unsigned int iov_cnt,
-                       long long offset, unsigned int head,
+                       struct iovec *iov, unsigned iov_cnt,
+                       long long offset, VirtQueueElement *elem,
                        QEMUIOVector *inhdr)
 {
     struct iocb *iocb;
@@ -186,19 +186,20 @@ static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
 
     /* Fill in virtio block metadata needed for completion */
     VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
-    req->head = head;
+    req->elem = elem;
     req->inhdr = inhdr;
     req->bounce_iov = bounce_iov;
     req->read_qiov = read_qiov;
     return 0;
 }
 
-static int process_request(IOQueue *ioq, struct iovec iov[],
-                           unsigned int out_num, unsigned int in_num,
-                           unsigned int head)
+static int process_request(IOQueue *ioq, VirtQueueElement *elem)
 {
     VirtIOBlockDataPlane *s = container_of(ioq, VirtIOBlockDataPlane, ioqueue);
-    struct iovec *in_iov = &iov[out_num];
+    struct iovec *iov = elem->out_sg;
+    struct iovec *in_iov = elem->in_sg;
+    unsigned out_num = elem->out_num;
+    unsigned in_num = elem->in_num;
     struct virtio_blk_outhdr outhdr;
     QEMUIOVector *inhdr;
     size_t in_size;
@@ -229,29 +230,29 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
 
     switch (outhdr.type) {
     case VIRTIO_BLK_T_IN:
-        do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, head, inhdr);
+        do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, elem, inhdr);
         return 0;
 
     case VIRTIO_BLK_T_OUT:
-        do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, head, inhdr);
+        do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, elem, inhdr);
         return 0;
 
     case VIRTIO_BLK_T_SCSI_CMD:
         /* TODO support SCSI commands */
-        complete_request_early(s, head, inhdr, VIRTIO_BLK_S_UNSUPP);
+        complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_UNSUPP);
         return 0;
 
     case VIRTIO_BLK_T_FLUSH:
         /* TODO fdsync not supported by Linux AIO, do it synchronously here! */
         if (qemu_fdatasync(s->fd) < 0) {
-            complete_request_early(s, head, inhdr, VIRTIO_BLK_S_IOERR);
+            complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_IOERR);
         } else {
-            complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
+            complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
         }
         return 0;
 
     case VIRTIO_BLK_T_GET_ID:
-        do_get_id_cmd(s, in_iov, in_num, head, inhdr);
+        do_get_id_cmd(s, in_iov, in_num, elem, inhdr);
         return 0;
 
     default:
@@ -267,29 +268,8 @@ static void handle_notify(EventNotifier *e)
     VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
                                            host_notifier);
 
-    /* There is one array of iovecs into which all new requests are extracted
-     * from the vring.  Requests are read from the vring and the translated
-     * descriptors are written to the iovecs array.  The iovecs do not have to
-     * persist across handle_notify() calls because the kernel copies the
-     * iovecs on io_submit().
-     *
-     * Handling io_submit() EAGAIN may require storing the requests across
-     * handle_notify() calls until the kernel has sufficient resources to
-     * accept more I/O.  This is not implemented yet.
-     */
-    struct iovec iovec[VRING_MAX];
-    struct iovec *end = &iovec[VRING_MAX];
-    struct iovec *iov = iovec;
-
-    /* When a request is read from the vring, the index of the first descriptor
-     * (aka head) is returned so that the completed request can be pushed onto
-     * the vring later.
-     *
-     * The number of hypervisor read-only iovecs is out_num.  The number of
-     * hypervisor write-only iovecs is in_num.
-     */
-    int head;
-    unsigned int out_num = 0, in_num = 0;
+    VirtQueueElement *elem;
+    int ret;
     unsigned int num_queued;
 
     event_notifier_test_and_clear(&s->host_notifier);
@@ -298,29 +278,31 @@ static void handle_notify(EventNotifier *e)
         vring_disable_notification(s->vdev, &s->vring);
 
         for (;;) {
-            head = vring_pop(s->vdev, &s->vring, iov, end, &out_num, &in_num);
-            if (head < 0) {
+            ret = vring_pop(s->vdev, &s->vring, &elem);
+            if (ret < 0) {
+                assert(elem == NULL);
                 break; /* no more requests */
             }
 
-            trace_virtio_blk_data_plane_process_request(s, out_num, in_num,
-                                                        head);
+            trace_virtio_blk_data_plane_process_request(s, elem->out_num,
+                                                        elem->in_num, elem->index);
 
-            if (process_request(&s->ioqueue, iov, out_num, in_num, head) < 0) {
+            if (process_request(&s->ioqueue, elem) < 0) {
                 vring_set_broken(&s->vring);
+                vring_free_element(elem);
+                ret = -EFAULT;
                 break;
             }
-            iov += out_num + in_num;
         }
 
-        if (likely(head == -EAGAIN)) { /* vring emptied */
+        if (likely(ret == -EAGAIN)) { /* vring emptied */
             /* Re-enable guest->host notifies and stop processing the vring.
              * But if the guest has snuck in more descriptors, keep processing.
              */
             if (vring_enable_notification(s->vdev, &s->vring)) {
                 break;
             }
-        } else { /* head == -ENOBUFS or fatal error, iovecs[] is depleted */
+        } else { /* ret == -ENOBUFS or fatal error, iovecs[] is depleted */
             /* Since there are no iovecs[] left, stop processing for now.  Do
              * not re-enable guest->host notifies since the I/O completion
              * handler knows to check for more vring descriptors anyway.
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index c5a6c21215..592b58f9b5 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -2234,7 +2234,6 @@ static void isabus_fdc_class_init(ObjectClass *klass, void *data)
 
     dc->realize = isabus_fdc_realize;
     dc->fw_name = "fdc";
-    dc->no_user = 1;
     dc->reset = fdctrl_external_reset_isa;
     dc->vmsd = &vmstate_isa_fdc;
     dc->props = isa_fdc_properties;
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
index f18db53bca..1012f1ad64 100644
--- a/hw/char/cadence_uart.c
+++ b/hw/char/cadence_uart.c
@@ -34,6 +34,9 @@
 #define UART_SR_INTR_RFUL      0x00000004
 #define UART_SR_INTR_TEMPTY    0x00000008
 #define UART_SR_INTR_TFUL      0x00000010
+/* somewhat awkwardly, TTRIG is misaligned between SR and ISR */
+#define UART_SR_TTRIG          0x00002000
+#define UART_INTR_TTRIG        0x00000400
 /* bits fields in CSR that correlate to CISR. If any of these bits are set in
  * SR, then the same bit in CISR is set high too */
 #define UART_SR_TO_CISR_MASK   0x0000001F
@@ -43,6 +46,7 @@
 #define UART_INTR_PARE         0x00000080
 #define UART_INTR_TIMEOUT      0x00000100
 #define UART_INTR_DMSI         0x00000200
+#define UART_INTR_TOVR         0x00001000
 
 #define UART_SR_RACTIVE    0x00000400
 #define UART_SR_TACTIVE    0x00000800
@@ -110,23 +114,37 @@
 #define CADENCE_UART(obj) OBJECT_CHECK(UartState, (obj), TYPE_CADENCE_UART)
 
 typedef struct {
+    /*< private >*/
     SysBusDevice parent_obj;
+    /*< public >*/
 
     MemoryRegion iomem;
     uint32_t r[R_MAX];
-    uint8_t r_fifo[RX_FIFO_SIZE];
+    uint8_t rx_fifo[RX_FIFO_SIZE];
+    uint8_t tx_fifo[TX_FIFO_SIZE];
     uint32_t rx_wpos;
     uint32_t rx_count;
+    uint32_t tx_count;
     uint64_t char_tx_time;
     CharDriverState *chr;
     qemu_irq irq;
     QEMUTimer *fifo_trigger_handle;
-    QEMUTimer *tx_time_handle;
 } UartState;
 
 static void uart_update_status(UartState *s)
 {
+    s->r[R_SR] = 0;
+
+    s->r[R_SR] |= s->rx_count == RX_FIFO_SIZE ? UART_SR_INTR_RFUL : 0;
+    s->r[R_SR] |= !s->rx_count ? UART_SR_INTR_REMPTY : 0;
+    s->r[R_SR] |= s->rx_count >= s->r[R_RTRIG] ? UART_SR_INTR_RTRIG : 0;
+
+    s->r[R_SR] |= s->tx_count == TX_FIFO_SIZE ? UART_SR_INTR_TFUL : 0;
+    s->r[R_SR] |= !s->tx_count ? UART_SR_INTR_TEMPTY : 0;
+    s->r[R_SR] |= s->tx_count >= s->r[R_TTRIG] ? UART_SR_TTRIG : 0;
+
     s->r[R_CISR] |= s->r[R_SR] & UART_SR_TO_CISR_MASK;
+    s->r[R_CISR] |= s->r[R_SR] & UART_SR_TTRIG ? UART_INTR_TTRIG : 0;
     qemu_set_irq(s->irq, !!(s->r[R_IMR] & s->r[R_CISR]));
 }
 
@@ -139,24 +157,6 @@ static void fifo_trigger_update(void *opaque)
     uart_update_status(s);
 }
 
-static void uart_tx_redo(UartState *s)
-{
-    uint64_t new_tx_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-
-    timer_mod(s->tx_time_handle, new_tx_time + s->char_tx_time);
-
-    s->r[R_SR] |= UART_SR_INTR_TEMPTY;
-
-    uart_update_status(s);
-}
-
-static void uart_tx_write(void *opaque)
-{
-    UartState *s = (UartState *)opaque;
-
-    uart_tx_redo(s);
-}
-
 static void uart_rx_reset(UartState *s)
 {
     s->rx_wpos = 0;
@@ -164,15 +164,11 @@ static void uart_rx_reset(UartState *s)
     if (s->chr) {
         qemu_chr_accept_input(s->chr);
     }
-
-    s->r[R_SR] |= UART_SR_INTR_REMPTY;
-    s->r[R_SR] &= ~UART_SR_INTR_RFUL;
 }
 
 static void uart_tx_reset(UartState *s)
 {
-    s->r[R_SR] |= UART_SR_INTR_TEMPTY;
-    s->r[R_SR] &= ~UART_SR_INTR_TFUL;
+    s->tx_count = 0;
 }
 
 static void uart_send_breaks(UartState *s)
@@ -237,8 +233,16 @@ static void uart_parameters_setup(UartState *s)
 static int uart_can_receive(void *opaque)
 {
     UartState *s = (UartState *)opaque;
+    int ret = MAX(RX_FIFO_SIZE, TX_FIFO_SIZE);
+    uint32_t ch_mode = s->r[R_MR] & UART_MR_CHMODE;
 
-    return RX_FIFO_SIZE - s->rx_count;
+    if (ch_mode == NORMAL_MODE || ch_mode == ECHO_MODE) {
+        ret = MIN(ret, RX_FIFO_SIZE - s->rx_count);
+    }
+    if (ch_mode == REMOTE_LOOPBACK || ch_mode == ECHO_MODE) {
+        ret = MIN(ret, TX_FIFO_SIZE - s->tx_count);
+    }
+    return ret;
 }
 
 static void uart_ctrl_update(UartState *s)
@@ -253,10 +257,6 @@ static void uart_ctrl_update(UartState *s)
 
     s->r[R_CR] &= ~(UART_CR_TXRST | UART_CR_RXRST);
 
-    if ((s->r[R_CR] & UART_CR_TX_EN) && !(s->r[R_CR] & UART_CR_TX_DIS)) {
-            uart_tx_redo(s);
-    }
-
     if (s->r[R_CR] & UART_CR_STARTBRK && !(s->r[R_CR] & UART_CR_STOPBRK)) {
         uart_send_breaks(s);
     }
@@ -272,24 +272,13 @@ static void uart_write_rx_fifo(void *opaque, const uint8_t *buf, int size)
         return;
     }
 
-    s->r[R_SR] &= ~UART_SR_INTR_REMPTY;
-
     if (s->rx_count == RX_FIFO_SIZE) {
         s->r[R_CISR] |= UART_INTR_ROVR;
     } else {
         for (i = 0; i < size; i++) {
-            s->r_fifo[s->rx_wpos] = buf[i];
+            s->rx_fifo[s->rx_wpos] = buf[i];
             s->rx_wpos = (s->rx_wpos + 1) % RX_FIFO_SIZE;
             s->rx_count++;
-
-            if (s->rx_count == RX_FIFO_SIZE) {
-                s->r[R_SR] |= UART_SR_INTR_RFUL;
-                break;
-            }
-
-            if (s->rx_count >= s->r[R_RTRIG]) {
-                s->r[R_SR] |= UART_SR_INTR_RTRIG;
-            }
         }
         timer_mod(s->fifo_trigger_handle, new_rx_time +
                                                 (s->char_tx_time * 4));
@@ -297,13 +286,55 @@ static void uart_write_rx_fifo(void *opaque, const uint8_t *buf, int size)
     uart_update_status(s);
 }
 
+static gboolean cadence_uart_xmit(GIOChannel *chan, GIOCondition cond,
+                                  void *opaque)
+{
+    UartState *s = opaque;
+    int ret;
+
+    /* instant drain the fifo when there's no back-end */
+    if (!s->chr) {
+        s->tx_count = 0;
+    }
+
+    if (!s->tx_count) {
+        return FALSE;
+    }
+
+    ret = qemu_chr_fe_write(s->chr, s->tx_fifo, s->tx_count);
+    s->tx_count -= ret;
+    memmove(s->tx_fifo, s->tx_fifo + ret, s->tx_count);
+
+    if (s->tx_count) {
+        int r = qemu_chr_fe_add_watch(s->chr, G_IO_OUT, cadence_uart_xmit, s);
+        assert(r);
+    }
+
+    uart_update_status(s);
+    return FALSE;
+}
+
 static void uart_write_tx_fifo(UartState *s, const uint8_t *buf, int size)
 {
     if ((s->r[R_CR] & UART_CR_TX_DIS) || !(s->r[R_CR] & UART_CR_TX_EN)) {
         return;
     }
 
-    qemu_chr_fe_write_all(s->chr, buf, size);
+    if (size > TX_FIFO_SIZE - s->tx_count) {
+        size = TX_FIFO_SIZE - s->tx_count;
+        /*
+         * This can only be a guest error via a bad tx fifo register push,
+         * as can_receive() should stop remote loop and echo modes ever getting
+         * us to here.
+         */
+        qemu_log_mask(LOG_GUEST_ERROR, "cadence_uart: TxFIFO overflow");
+        s->r[R_CISR] |= UART_INTR_ROVR;
+    }
+
+    memcpy(s->tx_fifo + s->tx_count, buf, size);
+    s->tx_count += size;
+
+    cadence_uart_xmit(NULL, G_IO_OUT, s);
 }
 
 static void uart_receive(void *opaque, const uint8_t *buf, int size)
@@ -337,26 +368,17 @@ static void uart_read_rx_fifo(UartState *s, uint32_t *c)
         return;
     }
 
-    s->r[R_SR] &= ~UART_SR_INTR_RFUL;
-
     if (s->rx_count) {
         uint32_t rx_rpos =
                 (RX_FIFO_SIZE + s->rx_wpos - s->rx_count) % RX_FIFO_SIZE;
-        *c = s->r_fifo[rx_rpos];
+        *c = s->rx_fifo[rx_rpos];
         s->rx_count--;
 
-        if (!s->rx_count) {
-            s->r[R_SR] |= UART_SR_INTR_REMPTY;
-        }
         qemu_chr_accept_input(s->chr);
     } else {
         *c = 0;
-        s->r[R_SR] |= UART_SR_INTR_REMPTY;
     }
 
-    if (s->rx_count < s->r[R_RTRIG]) {
-        s->r[R_SR] &= ~UART_SR_INTR_RTRIG;
-    }
     uart_update_status(s);
 }
 
@@ -401,6 +423,7 @@ static void uart_write(void *opaque, hwaddr offset,
         uart_parameters_setup(s);
         break;
     }
+    uart_update_status(s);
 }
 
 static uint64_t uart_read(void *opaque, hwaddr offset,
@@ -428,8 +451,10 @@ static const MemoryRegionOps uart_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void cadence_uart_reset(UartState *s)
+static void cadence_uart_reset(DeviceState *dev)
 {
+    UartState *s = CADENCE_UART(dev);
+
     s->r[R_CR] = 0x00000128;
     s->r[R_IMR] = 0;
     s->r[R_CISR] = 0;
@@ -440,8 +465,7 @@ static void cadence_uart_reset(UartState *s)
     uart_rx_reset(s);
     uart_tx_reset(s);
 
-    s->rx_count = 0;
-    s->rx_wpos = 0;
+    uart_update_status(s);
 }
 
 static int cadence_uart_init(SysBusDevice *dev)
@@ -455,15 +479,10 @@ static int cadence_uart_init(SysBusDevice *dev)
     s->fifo_trigger_handle = timer_new_ns(QEMU_CLOCK_VIRTUAL,
             (QEMUTimerCB *)fifo_trigger_update, s);
 
-    s->tx_time_handle = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-            (QEMUTimerCB *)uart_tx_write, s);
-
     s->char_tx_time = (get_ticks_per_sec() / 9600) * 10;
 
     s->chr = qemu_char_get_next_serial();
 
-    cadence_uart_reset(s);
-
     if (s->chr) {
         qemu_chr_add_handlers(s->chr, uart_can_receive, uart_receive,
                               uart_event, s);
@@ -483,17 +502,18 @@ static int cadence_uart_post_load(void *opaque, int version_id)
 
 static const VMStateDescription vmstate_cadence_uart = {
     .name = "cadence_uart",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .minimum_version_id_old = 2,
     .post_load = cadence_uart_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(r, UartState, R_MAX),
-        VMSTATE_UINT8_ARRAY(r_fifo, UartState, RX_FIFO_SIZE),
+        VMSTATE_UINT8_ARRAY(rx_fifo, UartState, RX_FIFO_SIZE),
+        VMSTATE_UINT8_ARRAY(tx_fifo, UartState, RX_FIFO_SIZE),
         VMSTATE_UINT32(rx_count, UartState),
+        VMSTATE_UINT32(tx_count, UartState),
         VMSTATE_UINT32(rx_wpos, UartState),
         VMSTATE_TIMER(fifo_trigger_handle, UartState),
-        VMSTATE_TIMER(tx_time_handle, UartState),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -505,6 +525,7 @@ static void cadence_uart_class_init(ObjectClass *klass, void *data)
 
     sdc->init = cadence_uart_init;
     dc->vmsd = &vmstate_cadence_uart;
+    dc->reset = cadence_uart_reset;
 }
 
 static const TypeInfo cadence_uart_info = {
diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c
index eef23a0ccc..19b59ccddb 100644
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -192,10 +192,9 @@ typedef struct Exynos4210UartState {
 static const char *exynos4210_uart_regname(hwaddr  offset)
 {
 
-    int regs_number = sizeof(exynos4210_uart_regs) / sizeof(Exynos4210UartReg);
     int i;
 
-    for (i = 0; i < regs_number; i++) {
+    for (i = 0; i < ARRAY_SIZE(exynos4210_uart_regs); i++) {
         if (offset == exynos4210_uart_regs[i].offset) {
             return exynos4210_uart_regs[i].name;
         }
@@ -544,10 +543,9 @@ static void exynos4210_uart_event(void *opaque, int event)
 static void exynos4210_uart_reset(DeviceState *dev)
 {
     Exynos4210UartState *s = EXYNOS4210_UART(dev);
-    int regs_number = sizeof(exynos4210_uart_regs)/sizeof(Exynos4210UartReg);
     int i;
 
-    for (i = 0; i < regs_number; i++) {
+    for (i = 0; i < ARRAY_SIZE(exynos4210_uart_regs); i++) {
         s->reg[I_(exynos4210_uart_regs[i].offset)] =
                 exynos4210_uart_regs[i].reset_value;
     }
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 7d869fcc7d..82a9123038 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -233,19 +233,19 @@ static int qbus_reset_one(BusState *bus, void *opaque)
 {
     BusClass *bc = BUS_GET_CLASS(bus);
     if (bc->reset) {
-        return bc->reset(bus);
+        bc->reset(bus);
     }
     return 0;
 }
 
 void qdev_reset_all(DeviceState *dev)
 {
-    qdev_walk_children(dev, qdev_reset_one, qbus_reset_one, NULL);
+    qdev_walk_children(dev, NULL, NULL, qdev_reset_one, qbus_reset_one, NULL);
 }
 
 void qbus_reset_all(BusState *bus)
 {
-    qbus_walk_children(bus, qdev_reset_one, qbus_reset_one, NULL);
+    qbus_walk_children(bus, NULL, NULL, qdev_reset_one, qbus_reset_one, NULL);
 }
 
 void qbus_reset_all_fn(void *opaque)
@@ -337,49 +337,70 @@ BusState *qdev_get_child_bus(DeviceState *dev, const char *name)
     return NULL;
 }
 
-int qbus_walk_children(BusState *bus, qdev_walkerfn *devfn,
-                       qbus_walkerfn *busfn, void *opaque)
+int qbus_walk_children(BusState *bus,
+                       qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn,
+                       qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn,
+                       void *opaque)
 {
     BusChild *kid;
     int err;
 
-    if (busfn) {
-        err = busfn(bus, opaque);
+    if (pre_busfn) {
+        err = pre_busfn(bus, opaque);
         if (err) {
             return err;
         }
     }
 
     QTAILQ_FOREACH(kid, &bus->children, sibling) {
-        err = qdev_walk_children(kid->child, devfn, busfn, opaque);
+        err = qdev_walk_children(kid->child,
+                                 pre_devfn, pre_busfn,
+                                 post_devfn, post_busfn, opaque);
         if (err < 0) {
             return err;
         }
     }
 
+    if (post_busfn) {
+        err = post_busfn(bus, opaque);
+        if (err) {
+            return err;
+        }
+    }
+
     return 0;
 }
 
-int qdev_walk_children(DeviceState *dev, qdev_walkerfn *devfn,
-                       qbus_walkerfn *busfn, void *opaque)
+int qdev_walk_children(DeviceState *dev,
+                       qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn,
+                       qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn,
+                       void *opaque)
 {
     BusState *bus;
     int err;
 
-    if (devfn) {
-        err = devfn(dev, opaque);
+    if (pre_devfn) {
+        err = pre_devfn(dev, opaque);
         if (err) {
             return err;
         }
     }
 
     QLIST_FOREACH(bus, &dev->child_bus, sibling) {
-        err = qbus_walk_children(bus, devfn, busfn, opaque);
+        err = qbus_walk_children(bus, pre_devfn, pre_busfn,
+                                 post_devfn, post_busfn, opaque);
         if (err < 0) {
             return err;
         }
     }
 
+    if (post_devfn) {
+        err = post_devfn(dev, opaque);
+        if (err) {
+            return err;
+        }
+    }
+
     return 0;
 }
 
@@ -481,11 +502,6 @@ BusState *qbus_create(const char *typename, DeviceState *parent, const char *nam
     return bus;
 }
 
-void qbus_free(BusState *bus)
-{
-    object_unparent(OBJECT(bus));
-}
-
 static char *bus_get_fw_dev_path(BusState *bus, DeviceState *dev)
 {
     BusClass *bc = BUS_GET_CLASS(bus);
@@ -780,7 +796,7 @@ static void device_unparent(Object *obj)
 
     while (dev->num_child_bus) {
         bus = QLIST_FIRST(&dev->child_bus);
-        qbus_free(bus);
+        object_unparent(OBJECT(bus));
     }
     if (dev->realized) {
         object_property_set_bool(obj, false, "realized", NULL);
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 146f50aa15..f4e760d6eb 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -257,6 +257,13 @@ static void sysbus_device_class_init(ObjectClass *klass, void *data)
     DeviceClass *k = DEVICE_CLASS(klass);
     k->init = sysbus_device_init;
     k->bus_type = TYPE_SYSTEM_BUS;
+    /*
+     * device_add plugs devices into suitable bus.  For "real" buses,
+     * that actually connects the device.  For sysbus, the connections
+     * need to be made separately, and device_add can't do that.  The
+     * device would be left unconnected, and could not possibly work.
+     */
+    k->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo sysbus_device_type_info = {
diff --git a/hw/cpu/icc_bus.c b/hw/cpu/icc_bus.c
index 9a4ea7e2df..7f44c59b25 100644
--- a/hw/cpu/icc_bus.c
+++ b/hw/cpu/icc_bus.c
@@ -43,15 +43,13 @@ static const TypeInfo icc_bus_info = {
 
 static void icc_device_realize(DeviceState *dev, Error **errp)
 {
-    ICCDevice *id = ICC_DEVICE(dev);
-    ICCDeviceClass *idc = ICC_DEVICE_GET_CLASS(id);
-
-    if (idc->init) {
-        if (idc->init(id) < 0) {
-            error_setg(errp, "%s initialization failed.",
-                       object_get_typename(OBJECT(dev)));
-        }
+    ICCDeviceClass *idc = ICC_DEVICE_GET_CLASS(dev);
+
+    /* convert to QOM */
+    if (idc->realize) {
+        idc->realize(dev, errp);
     }
+
 }
 
 static void icc_device_class_init(ObjectClass *oc, void *data)
diff --git a/hw/display/pl110.c b/hw/display/pl110.c
index 790e5108ed..ab689e9aae 100644
--- a/hw/display/pl110.c
+++ b/hw/display/pl110.c
@@ -496,7 +496,6 @@ static void pl110_class_init(ObjectClass *klass, void *data)
 
     k->init = pl110_initfn;
     set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
-    dc->no_user = 1;
     dc->vmsd = &vmstate_pl110;
 }
 
diff --git a/hw/dma/pl080.c b/hw/dma/pl080.c
index 35b90155a2..cb7bda9803 100644
--- a/hw/dma/pl080.c
+++ b/hw/dma/pl080.c
@@ -381,7 +381,6 @@ static void pl080_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
 
-    dc->no_user = 1;
     dc->vmsd = &vmstate_pl080;
 }
 
diff --git a/hw/dma/sparc32_dma.c b/hw/dma/sparc32_dma.c
index 2a92ffb82e..eac338f1bc 100644
--- a/hw/dma/sparc32_dma.c
+++ b/hw/dma/sparc32_dma.c
@@ -304,6 +304,8 @@ static void sparc32_dma_class_init(ObjectClass *klass, void *data)
     dc->reset = dma_reset;
     dc->vmsd = &vmstate_dma;
     dc->props = sparc32_dma_properties;
+    /* Reason: pointer property "iommu_opaque" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo sparc32_dma_info = {
diff --git a/hw/gpio/omap_gpio.c b/hw/gpio/omap_gpio.c
index b8f572bb70..938782a45d 100644
--- a/hw/gpio/omap_gpio.c
+++ b/hw/gpio/omap_gpio.c
@@ -759,6 +759,8 @@ static void omap_gpio_class_init(ObjectClass *klass, void *data)
     k->init = omap_gpio_init;
     dc->reset = omap_gpif_reset;
     dc->props = omap_gpio_properties;
+    /* Reason: pointer property "clk" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo omap_gpio_info = {
@@ -788,6 +790,8 @@ static void omap2_gpio_class_init(ObjectClass *klass, void *data)
     k->init = omap2_gpio_init;
     dc->reset = omap2_gpif_reset;
     dc->props = omap2_gpio_properties;
+    /* Reason: pointer properties "iclk", "fclk0", ..., "fclk5" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo omap2_gpio_info = {
diff --git a/hw/i2c/omap_i2c.c b/hw/i2c/omap_i2c.c
index f528b2b38e..2d8e2b7839 100644
--- a/hw/i2c/omap_i2c.c
+++ b/hw/i2c/omap_i2c.c
@@ -475,6 +475,8 @@ static void omap_i2c_class_init(ObjectClass *klass, void *data)
     k->init = omap_i2c_init;
     dc->props = omap_i2c_properties;
     dc->reset = omap_i2c_reset;
+    /* Reason: pointer properties "iclk", "fclk" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo omap_i2c_info = {
diff --git a/hw/i2c/smbus_eeprom.c b/hw/i2c/smbus_eeprom.c
index 0154283762..0218f8a0eb 100644
--- a/hw/i2c/smbus_eeprom.c
+++ b/hw/i2c/smbus_eeprom.c
@@ -121,6 +121,8 @@ static void smbus_eeprom_class_initfn(ObjectClass *klass, void *data)
     sc->write_data = eeprom_write_data;
     sc->read_data = eeprom_read_data;
     dc->props = smbus_eeprom_properties;
+    /* Reason: pointer property "data" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo smbus_eeprom_info = {
diff --git a/hw/i2c/smbus_ich9.c b/hw/i2c/smbus_ich9.c
index ca229789f4..8d47eaffc8 100644
--- a/hw/i2c/smbus_ich9.c
+++ b/hw/i2c/smbus_ich9.c
@@ -97,11 +97,15 @@ static void ich9_smb_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_INTEL_ICH9_6;
     k->revision = ICH9_A2_SMB_REVISION;
     k->class_id = PCI_CLASS_SERIAL_SMBUS;
-    dc->no_user = 1;
     dc->vmsd = &vmstate_ich9_smbus;
     dc->desc = "ICH9 SMBUS Bridge";
     k->init = ich9_smbus_initfn;
     k->config_write = ich9_smbus_write_config;
+    /*
+     * Reason: part of ICH9 southbridge, needs to be wired up by
+     * pc_q35_init()
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 i2c_bus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base)
diff --git a/hw/i386/acpi-dsdt-cpu-hotplug.dsl b/hw/i386/acpi-dsdt-cpu-hotplug.dsl
index c96ac42a31..995b415bae 100644
--- a/hw/i386/acpi-dsdt-cpu-hotplug.dsl
+++ b/hw/i386/acpi-dsdt-cpu-hotplug.dsl
@@ -52,7 +52,6 @@ Scope(\_SB) {
         Sleep(200)
     }
 
-    /* CPU hotplug notify method */
     OperationRegion(PRST, SystemIO, 0xaf00, 32)
     Field(PRST, ByteAcc, NoLock, Preserve) {
         PRS, 256
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 5609063120..e873b509a5 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -25,9 +25,9 @@ static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
     return *((uint32_t *)(kapic->regs + (reg_id << 4)));
 }
 
-void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic)
+void kvm_put_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
     int i;
 
     memset(kapic, 0, sizeof(*kapic));
@@ -51,9 +51,9 @@ void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic)
     kvm_apic_set_reg(kapic, 0x3e, s->divide_conf);
 }
 
-void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic)
+void kvm_get_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
     int i, v;
 
     s->id = kvm_apic_get_reg(kapic, 0x2) >> 24;
@@ -171,8 +171,10 @@ static const MemoryRegionOps kvm_apic_io_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void kvm_apic_init(APICCommonState *s)
+static void kvm_apic_realize(DeviceState *dev, Error **errp)
 {
+    APICCommonState *s = APIC_COMMON(dev);
+
     memory_region_init_io(&s->io_memory, NULL, &kvm_apic_io_ops, s, "kvm-apic-msi",
                           APIC_SPACE_SIZE);
 
@@ -185,7 +187,7 @@ static void kvm_apic_class_init(ObjectClass *klass, void *data)
 {
     APICCommonClass *k = APIC_COMMON_CLASS(klass);
 
-    k->init = kvm_apic_init;
+    k->realize = kvm_apic_realize;
     k->set_base = kvm_apic_set_base;
     k->set_tpr = kvm_apic_set_tpr;
     k->get_tpr = kvm_apic_get_tpr;
diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 383938d1bc..892aa025f4 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -114,7 +114,6 @@ static void kvmclock_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = kvmclock_realize;
-    dc->no_user = 1;
     dc->vmsd = &kvmclock_vmsd;
 }
 
diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c
index f11a540825..d2a6c4cf60 100644
--- a/hw/i386/kvm/ioapic.c
+++ b/hw/i386/kvm/ioapic.c
@@ -127,11 +127,13 @@ static void kvm_ioapic_set_irq(void *opaque, int irq, int level)
     apic_report_irq_delivered(delivered);
 }
 
-static void kvm_ioapic_init(IOAPICCommonState *s, int instance_no)
+static void kvm_ioapic_realize(DeviceState *dev, Error **errp)
 {
+    IOAPICCommonState *s = IOAPIC_COMMON(dev);
+
     memory_region_init_reservation(&s->io_memory, NULL, "kvm-ioapic", 0x1000);
 
-    qdev_init_gpio_in(DEVICE(s), kvm_ioapic_set_irq, IOAPIC_NUM_PINS);
+    qdev_init_gpio_in(dev, kvm_ioapic_set_irq, IOAPIC_NUM_PINS);
 }
 
 static Property kvm_ioapic_properties[] = {
@@ -144,7 +146,7 @@ static void kvm_ioapic_class_init(ObjectClass *klass, void *data)
     IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    k->init      = kvm_ioapic_init;
+    k->realize   = kvm_ioapic_realize;
     k->pre_save  = kvm_ioapic_get;
     k->post_load = kvm_ioapic_put;
     dc->reset    = kvm_ioapic_reset;
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 2d876009fc..72025d0359 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -366,7 +366,7 @@ static int vapic_enable(VAPICROMState *s, X86CPU *cpu)
         (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT);
     cpu_physical_memory_rw(vapic_paddr + offsetof(VAPICState, enabled),
                            (void *)&enabled, sizeof(enabled), 1);
-    apic_enable_vapic(cpu->env.apic_state, vapic_paddr);
+    apic_enable_vapic(cpu->apic_state, vapic_paddr);
 
     s->state = VAPIC_ACTIVE;
 
@@ -496,12 +496,10 @@ static void vapic_enable_tpr_reporting(bool enable)
     };
     CPUState *cs;
     X86CPU *cpu;
-    CPUX86State *env;
 
     CPU_FOREACH(cs) {
         cpu = X86_CPU(cs);
-        env = &cpu->env;
-        info.apic = env->apic_state;
+        info.apic = cpu->apic_state;
         run_on_cpu(cs, vapic_do_enable_tpr_reporting, &info);
     }
 }
@@ -700,7 +698,7 @@ static void vapic_write(void *opaque, hwaddr addr, uint64_t data,
     default:
     case 4:
         if (!kvm_irqchip_in_kernel()) {
-            apic_poll_irq(env->apic_state);
+            apic_poll_irq(cpu->apic_state);
         }
         break;
     }
@@ -827,7 +825,6 @@ static void vapic_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    dc->no_user = 1;
     dc->reset   = vapic_reset;
     dc->vmsd    = &vmstate_vapic;
     dc->realize = vapic_realize;
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 3cd8f383f3..6f0be37d8b 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -171,14 +171,15 @@ void cpu_smm_update(CPUX86State *env)
 /* IRQ handling */
 int cpu_get_pic_interrupt(CPUX86State *env)
 {
+    X86CPU *cpu = x86_env_get_cpu(env);
     int intno;
 
-    intno = apic_get_interrupt(env->apic_state);
+    intno = apic_get_interrupt(cpu->apic_state);
     if (intno >= 0) {
         return intno;
     }
     /* read the irq from the PIC */
-    if (!apic_accept_pic_intr(env->apic_state)) {
+    if (!apic_accept_pic_intr(cpu->apic_state)) {
         return -1;
     }
 
@@ -190,15 +191,13 @@ static void pic_irq_request(void *opaque, int irq, int level)
 {
     CPUState *cs = first_cpu;
     X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
 
     DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
-    if (env->apic_state) {
+    if (cpu->apic_state) {
         CPU_FOREACH(cs) {
             cpu = X86_CPU(cs);
-            env = &cpu->env;
-            if (apic_accept_pic_intr(env->apic_state)) {
-                apic_deliver_pic_intr(env->apic_state, level);
+            if (apic_accept_pic_intr(cpu->apic_state)) {
+                apic_deliver_pic_intr(cpu->apic_state, level);
             }
         }
     } else {
@@ -547,10 +546,15 @@ static void port92_class_initfn(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    dc->no_user = 1;
     dc->realize = port92_realizefn;
     dc->reset = port92_reset;
     dc->vmsd = &vmstate_port92_isa;
+    /*
+     * Reason: unlike ordinary ISA devices, this one needs additional
+     * wiring: its A20 output line needs to be wired up by
+     * port92_init().
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo port92_info = {
@@ -908,7 +912,7 @@ DeviceState *cpu_get_current_apic(void)
 {
     if (current_cpu) {
         X86CPU *cpu = X86_CPU(current_cpu);
-        return cpu->env.apic_state;
+        return cpu->apic_state;
     } else {
         return NULL;
     }
@@ -1002,7 +1006,7 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
     }
 
     /* map APIC MMIO area if CPU has APIC */
-    if (cpu && cpu->env.apic_state) {
+    if (cpu && cpu->apic_state) {
         /* XXX: what if the base changes? */
         sysbus_mmio_map_overlap(SYS_BUS_DEVICE(icc_bridge), 0,
                                 APIC_DEFAULT_ADDRESS, 0x1000);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4e0dae7981..276641436e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -61,6 +61,11 @@ static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
 static bool has_pci_info;
 static bool has_acpi_build = true;
 static bool smbios_type1_defaults = true;
+/* Make sure that guest addresses aligned at 1Gbyte boundaries get mapped to
+ * host addresses aligned at 1Gbyte boundaries.  This way we can use 1GByte
+ * pages in the host.
+ */
+static bool gigabyte_align = true;
 
 /* PC hardware initialisation */
 static void pc_init1(QEMUMachineInitArgs *args,
@@ -106,9 +111,17 @@ static void pc_init1(QEMUMachineInitArgs *args,
         kvmclock_create();
     }
 
+    /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
+     * If it doesn't, we need to split it in chunks below and above 4G.
+     * In any case, try to make sure that guest addresses aligned at
+     * 1G boundaries get mapped to host addresses aligned at 1G boundaries.
+     * For old machine types, use whatever split we used historically to avoid
+     * breaking migration.
+     */
     if (args->ram_size >= 0xe0000000) {
-        above_4g_mem_size = args->ram_size - 0xe0000000;
-        below_4g_mem_size = 0xe0000000;
+        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
+        above_4g_mem_size = args->ram_size - lowmem;
+        below_4g_mem_size = lowmem;
     } else {
         above_4g_mem_size = 0;
         below_4g_mem_size = args->ram_size;
@@ -157,6 +170,7 @@ static void pc_init1(QEMUMachineInitArgs *args,
     if (pci_enabled) {
         pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
                               system_memory, system_io, args->ram_size,
+                              below_4g_mem_size,
                               above_4g_mem_size,
                               pci_memory, ram_memory);
     } else {
@@ -245,6 +259,7 @@ static void pc_init_pci(QEMUMachineInitArgs *args)
 static void pc_compat_1_7(QEMUMachineInitArgs *args)
 {
     smbios_type1_defaults = false;
+    gigabyte_align = false;
 }
 
 static void pc_compat_1_6(QEMUMachineInitArgs *args)
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index e917c83540..75a7ebbaa7 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -72,35 +72,102 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
     memory_region_set_readonly(isa_bios, true);
 }
 
-static void pc_system_flash_init(MemoryRegion *rom_memory,
-                                 DriveInfo *pflash_drv)
+#define FLASH_MAP_UNIT_MAX 2
+
+/* We don't have a theoretically justifiable exact lower bound on the base
+ * address of any flash mapping. In practice, the IO-APIC MMIO range is
+ * [0xFEE00000..0xFEE01000[ -- see IO_APIC_DEFAULT_ADDRESS --, leaving free
+ * only 18MB-4KB below 4G. For now, restrict the cumulative mapping to 8MB in
+ * size.
+ */
+#define FLASH_MAP_BASE_MIN ((hwaddr)(0x100000000ULL - 8*1024*1024))
+
+/* This function maps flash drives from 4G downward, in order of their unit
+ * numbers. The mapping starts at unit#0, with unit number increments of 1, and
+ * stops before the first missing flash drive, or before
+ * unit#FLASH_MAP_UNIT_MAX, whichever is reached first.
+ *
+ * Addressing within one flash drive is of course not reversed.
+ *
+ * An error message is printed and the process exits if:
+ * - the size of the backing file for a flash drive is non-positive, or not a
+ *   multiple of the required sector size, or
+ * - the current mapping's base address would fall below FLASH_MAP_BASE_MIN.
+ *
+ * The drive with unit#0 (if available) is mapped at the highest address, and
+ * it is passed to pc_isa_bios_init(). Merging several drives for isa-bios is
+ * not supported.
+ */
+static void pc_system_flash_init(MemoryRegion *rom_memory)
 {
+    int unit;
+    DriveInfo *pflash_drv;
     BlockDriverState *bdrv;
     int64_t size;
-    hwaddr phys_addr;
+    char *fatal_errmsg = NULL;
+    hwaddr phys_addr = 0x100000000ULL;
     int sector_bits, sector_size;
     pflash_t *system_flash;
     MemoryRegion *flash_mem;
+    char name[64];
 
-    bdrv = pflash_drv->bdrv;
-    size = bdrv_getlength(pflash_drv->bdrv);
     sector_bits = 12;
     sector_size = 1 << sector_bits;
 
-    if ((size % sector_size) != 0) {
-        fprintf(stderr,
-                "qemu: PC system firmware (pflash) must be a multiple of 0x%x\n",
-                sector_size);
-        exit(1);
+    for (unit = 0;
+         (unit < FLASH_MAP_UNIT_MAX &&
+          (pflash_drv = drive_get(IF_PFLASH, 0, unit)) != NULL);
+         ++unit) {
+        bdrv = pflash_drv->bdrv;
+        size = bdrv_getlength(bdrv);
+        if (size < 0) {
+            fatal_errmsg = g_strdup_printf("failed to get backing file size");
+        } else if (size == 0) {
+            fatal_errmsg = g_strdup_printf("PC system firmware (pflash) "
+                               "cannot have zero size");
+        } else if ((size % sector_size) != 0) {
+            fatal_errmsg = g_strdup_printf("PC system firmware (pflash) "
+                               "must be a multiple of 0x%x", sector_size);
+        } else if (phys_addr < size || phys_addr - size < FLASH_MAP_BASE_MIN) {
+            fatal_errmsg = g_strdup_printf("oversized backing file, pflash "
+                               "segments cannot be mapped under "
+                               TARGET_FMT_plx, FLASH_MAP_BASE_MIN);
+        }
+        if (fatal_errmsg != NULL) {
+            Location loc;
+
+            /* push a new, "none" location on the location stack; overwrite its
+             * contents with the location saved in the option; print the error
+             * (includes location); pop the top
+             */
+            loc_push_none(&loc);
+            if (pflash_drv->opts != NULL) {
+                qemu_opts_loc_restore(pflash_drv->opts);
+            }
+            error_report("%s", fatal_errmsg);
+            loc_pop(&loc);
+            g_free(fatal_errmsg);
+            exit(1);
+        }
+
+        phys_addr -= size;
+
+        /* pflash_cfi01_register() creates a deep copy of the name */
+        snprintf(name, sizeof name, "system.flash%d", unit);
+        system_flash = pflash_cfi01_register(phys_addr, NULL /* qdev */, name,
+                                             size, bdrv, sector_size,
+                                             size >> sector_bits,
+                                             1      /* width */,
+                                             0x0000 /* id0 */,
+                                             0x0000 /* id1 */,
+                                             0x0000 /* id2 */,
+                                             0x0000 /* id3 */,
+                                             0      /* be */);
+        if (unit == 0) {
+            flash_mem = pflash_cfi01_get_memory(system_flash);
+            pc_isa_bios_init(rom_memory, flash_mem, size);
+        }
     }
-
-    phys_addr = 0x100000000ULL - size;
-    system_flash = pflash_cfi01_register(phys_addr, NULL, "system.flash", size,
-                                         bdrv, sector_size, size >> sector_bits,
-                                         1, 0x0000, 0x0000, 0x0000, 0x0000, 0);
-    flash_mem = pflash_cfi01_get_memory(system_flash);
-
-    pc_isa_bios_init(rom_memory, flash_mem, size);
 }
 
 static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw)
@@ -181,5 +248,5 @@ void pc_system_firmware_init(MemoryRegion *rom_memory, bool isapc_ram_fw)
         exit(1);
     }
 
-    pc_system_flash_init(rom_memory, pflash_drv);
+    pc_system_flash_init(rom_memory);
 }
diff --git a/hw/i386/q35-acpi-dsdt.dsl b/hw/i386/q35-acpi-dsdt.dsl
index 575c5d7376..7934a9ddfb 100644
--- a/hw/i386/q35-acpi-dsdt.dsl
+++ b/hw/i386/q35-acpi-dsdt.dsl
@@ -417,11 +417,11 @@ DefinitionBlock (
         Method(_L00) {
         }
         Method(_L01) {
+        }
+        Method(_E02) {
             // CPU hotplug event
             \_SB.PRSC()
         }
-        Method(_L02) {
-        }
         Method(_L03) {
         }
         Method(_L04) {
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index ab36749417..9b5960b44e 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -248,7 +248,6 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1;
     k->class_id = PCI_CLASS_STORAGE_IDE;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-    dc->no_user = 1;
 }
 
 static const TypeInfo piix3_ide_info = {
@@ -267,7 +266,6 @@ static void piix3_ide_xen_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1;
     k->class_id = PCI_CLASS_STORAGE_IDE;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-    dc->no_user = 1;
     dc->unplug = pci_piix3_xen_ide_unplug;
 }
 
@@ -289,7 +287,6 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_INTEL_82371AB;
     k->class_id = PCI_CLASS_STORAGE_IDE;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-    dc->no_user = 1;
 }
 
 static const TypeInfo piix4_ide_info = {
diff --git a/hw/ide/via.c b/hw/ide/via.c
index 99468c773e..198123b026 100644
--- a/hw/ide/via.c
+++ b/hw/ide/via.c
@@ -225,7 +225,6 @@ static void via_ide_class_init(ObjectClass *klass, void *data)
     k->revision = 0x06;
     k->class_id = PCI_CLASS_STORAGE_IDE;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-    dc->no_user = 1;
 }
 
 static const TypeInfo via_ide_info = {
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index ce86237cf3..655b8c5011 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -522,7 +522,6 @@ static void i8042_class_initfn(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = i8042_realizefn;
-    dc->no_user = 1;
     dc->vmsd = &vmstate_kbd_isa;
 }
 
diff --git a/hw/input/pxa2xx_keypad.c b/hw/input/pxa2xx_keypad.c
index 846d1370de..b90b0ba102 100644
--- a/hw/input/pxa2xx_keypad.c
+++ b/hw/input/pxa2xx_keypad.c
@@ -85,7 +85,7 @@
 struct PXA2xxKeyPadState {
     MemoryRegion iomem;
     qemu_irq    irq;
-    struct  keymap *map;
+    const struct  keymap *map;
     int         pressed_cnt;
     int         alt_code;
 
@@ -322,8 +322,8 @@ PXA2xxKeyPadState *pxa27x_keypad_init(MemoryRegion *sysmem,
     return s;
 }
 
-void pxa27x_register_keypad(PXA2xxKeyPadState *kp, struct keymap *map,
-        int size)
+void pxa27x_register_keypad(PXA2xxKeyPadState *kp,
+                            const struct keymap *map, int size)
 {
     if(!map || size < 0x80) {
         fprintf(stderr, "%s - No PXA keypad map defined\n", __FUNCTION__);
diff --git a/hw/input/vmmouse.c b/hw/input/vmmouse.c
index abd032b794..6a5053352a 100644
--- a/hw/input/vmmouse.c
+++ b/hw/input/vmmouse.c
@@ -282,10 +282,11 @@ static void vmmouse_class_initfn(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = vmmouse_realizefn;
-    dc->no_user = 1;
     dc->reset = vmmouse_reset;
     dc->vmsd = &vmstate_vmmouse;
     dc->props = vmmouse_properties;
+    /* Reason: pointer property "ps2_mouse" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo vmmouse_info = {
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index a913186ed0..3d3deb6298 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -171,9 +171,9 @@ static void apic_local_deliver(APICCommonState *s, int vector)
     }
 }
 
-void apic_deliver_pic_intr(DeviceState *d, int level)
+void apic_deliver_pic_intr(DeviceState *dev, int level)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
 
     if (level) {
         apic_local_deliver(s, APIC_LVT_LINT0);
@@ -376,9 +376,9 @@ static void apic_update_irq(APICCommonState *s)
     }
 }
 
-void apic_poll_irq(DeviceState *d)
+void apic_poll_irq(DeviceState *dev)
 {
-    APICCommonState *s = APIC_COMMON(d);
+    APICCommonState *s = APIC_COMMON(dev);
 
     apic_sync_vapic(s, SYNC_FROM_VAPIC);
     apic_update_irq(s);
@@ -482,9 +482,9 @@ static void apic_startup(APICCommonState *s, int vector_num)
     cpu_interrupt(CPU(s->cpu), CPU_INTERRUPT_SIPI);
 }
 
-void apic_sipi(DeviceState *d)
+void apic_sipi(DeviceState *dev)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
 
     cpu_reset_interrupt(CPU(s->cpu), CPU_INTERRUPT_SIPI);
 
@@ -494,11 +494,11 @@ void apic_sipi(DeviceState *d)
     s->wait_for_sipi = 0;
 }
 
-static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode,
+static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode,
                          uint8_t delivery_mode, uint8_t vector_num,
                          uint8_t trigger_mode)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
     uint32_t deliver_bitmask[MAX_APIC_WORDS];
     int dest_shorthand = (s->icr[0] >> 18) & 3;
     APICCommonState *apic_iter;
@@ -551,9 +551,9 @@ static bool apic_check_pic(APICCommonState *s)
     return true;
 }
 
-int apic_get_interrupt(DeviceState *d)
+int apic_get_interrupt(DeviceState *dev)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
     int intno;
 
     /* if the APIC is installed or enabled, we let the 8259 handle the
@@ -585,9 +585,9 @@ int apic_get_interrupt(DeviceState *d)
     return intno;
 }
 
-int apic_accept_pic_intr(DeviceState *d)
+int apic_accept_pic_intr(DeviceState *dev)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
     uint32_t lvt0;
 
     if (!s)
@@ -657,16 +657,16 @@ static void apic_mem_writew(void *opaque, hwaddr addr, uint32_t val)
 
 static uint32_t apic_mem_readl(void *opaque, hwaddr addr)
 {
-    DeviceState *d;
+    DeviceState *dev;
     APICCommonState *s;
     uint32_t val;
     int index;
 
-    d = cpu_get_current_apic();
-    if (!d) {
+    dev = cpu_get_current_apic();
+    if (!dev) {
         return 0;
     }
-    s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    s = APIC_COMMON(dev);
 
     index = (addr >> 4) & 0xff;
     switch(index) {
@@ -752,7 +752,7 @@ static void apic_send_msi(hwaddr addr, uint32_t data)
 
 static void apic_mem_writel(void *opaque, hwaddr addr, uint32_t val)
 {
-    DeviceState *d;
+    DeviceState *dev;
     APICCommonState *s;
     int index = (addr >> 4) & 0xff;
     if (addr > 0xfff || !index) {
@@ -765,11 +765,11 @@ static void apic_mem_writel(void *opaque, hwaddr addr, uint32_t val)
         return;
     }
 
-    d = cpu_get_current_apic();
-    if (!d) {
+    dev = cpu_get_current_apic();
+    if (!dev) {
         return;
     }
-    s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    s = APIC_COMMON(dev);
 
     trace_apic_mem_writel(addr, val);
 
@@ -810,7 +810,7 @@ static void apic_mem_writel(void *opaque, hwaddr addr, uint32_t val)
         break;
     case 0x30:
         s->icr[0] = val;
-        apic_deliver(d, (s->icr[1] >> 24) & 0xff, (s->icr[0] >> 11) & 1,
+        apic_deliver(dev, (s->icr[1] >> 24) & 0xff, (s->icr[0] >> 11) & 1,
                      (s->icr[0] >> 8) & 7, (s->icr[0] & 0xff),
                      (s->icr[0] >> 15) & 1);
         break;
@@ -871,8 +871,10 @@ static const MemoryRegionOps apic_io_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void apic_init(APICCommonState *s)
+static void apic_realize(DeviceState *dev, Error **errp)
 {
+    APICCommonState *s = APIC_COMMON(dev);
+
     memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi",
                           APIC_SPACE_SIZE);
 
@@ -886,7 +888,7 @@ static void apic_class_init(ObjectClass *klass, void *data)
 {
     APICCommonClass *k = APIC_COMMON_CLASS(klass);
 
-    k->init = apic_init;
+    k->realize = apic_realize;
     k->set_base = apic_set_base;
     k->set_tpr = apic_set_tpr;
     k->get_tpr = apic_get_tpr;
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index a0beb10863..c623fcc6d8 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -27,21 +27,21 @@
 static int apic_irq_delivered;
 bool apic_report_tpr_access;
 
-void cpu_set_apic_base(DeviceState *d, uint64_t val)
+void cpu_set_apic_base(DeviceState *dev, uint64_t val)
 {
     trace_cpu_set_apic_base(val);
 
-    if (d) {
-        APICCommonState *s = APIC_COMMON(d);
+    if (dev) {
+        APICCommonState *s = APIC_COMMON(dev);
         APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
         info->set_base(s, val);
     }
 }
 
-uint64_t cpu_get_apic_base(DeviceState *d)
+uint64_t cpu_get_apic_base(DeviceState *dev)
 {
-    if (d) {
-        APICCommonState *s = APIC_COMMON(d);
+    if (dev) {
+        APICCommonState *s = APIC_COMMON(dev);
         trace_cpu_get_apic_base((uint64_t)s->apicbase);
         return s->apicbase;
     } else {
@@ -50,39 +50,39 @@ uint64_t cpu_get_apic_base(DeviceState *d)
     }
 }
 
-void cpu_set_apic_tpr(DeviceState *d, uint8_t val)
+void cpu_set_apic_tpr(DeviceState *dev, uint8_t val)
 {
     APICCommonState *s;
     APICCommonClass *info;
 
-    if (!d) {
+    if (!dev) {
         return;
     }
 
-    s = APIC_COMMON(d);
+    s = APIC_COMMON(dev);
     info = APIC_COMMON_GET_CLASS(s);
 
     info->set_tpr(s, val);
 }
 
-uint8_t cpu_get_apic_tpr(DeviceState *d)
+uint8_t cpu_get_apic_tpr(DeviceState *dev)
 {
     APICCommonState *s;
     APICCommonClass *info;
 
-    if (!d) {
+    if (!dev) {
         return 0;
     }
 
-    s = APIC_COMMON(d);
+    s = APIC_COMMON(dev);
     info = APIC_COMMON_GET_CLASS(s);
 
     return info->get_tpr(s);
 }
 
-void apic_enable_tpr_access_reporting(DeviceState *d, bool enable)
+void apic_enable_tpr_access_reporting(DeviceState *dev, bool enable)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
     APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
 
     apic_report_tpr_access = enable;
@@ -91,19 +91,19 @@ void apic_enable_tpr_access_reporting(DeviceState *d, bool enable)
     }
 }
 
-void apic_enable_vapic(DeviceState *d, hwaddr paddr)
+void apic_enable_vapic(DeviceState *dev, hwaddr paddr)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
     APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
 
     s->vapic_paddr = paddr;
     info->vapic_base_update(s);
 }
 
-void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip,
+void apic_handle_tpr_access_report(DeviceState *dev, target_ulong ip,
                                    TPRAccess access)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
 
     vapic_report_tpr_access(s->vapic, CPU(s->cpu), ip, access);
 }
@@ -129,9 +129,9 @@ int apic_get_irq_delivered(void)
     return apic_irq_delivered;
 }
 
-void apic_deliver_nmi(DeviceState *d)
+void apic_deliver_nmi(DeviceState *dev)
 {
-    APICCommonState *s = APIC_COMMON(d);
+    APICCommonState *s = APIC_COMMON(dev);
     APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
 
     info->external_nmi(s);
@@ -170,9 +170,9 @@ bool apic_next_timer(APICCommonState *s, int64_t current_time)
     return true;
 }
 
-void apic_init_reset(DeviceState *d)
+void apic_init_reset(DeviceState *dev)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
     int i;
 
     if (!s) {
@@ -203,19 +203,19 @@ void apic_init_reset(DeviceState *d)
     s->timer_expiry = -1;
 }
 
-void apic_designate_bsp(DeviceState *d)
+void apic_designate_bsp(DeviceState *dev)
 {
-    if (d == NULL) {
+    if (dev == NULL) {
         return;
     }
 
-    APICCommonState *s = APIC_COMMON(d);
+    APICCommonState *s = APIC_COMMON(dev);
     s->apicbase |= MSR_IA32_APICBASE_BSP;
 }
 
-static void apic_reset_common(DeviceState *d)
+static void apic_reset_common(DeviceState *dev)
 {
-    APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
+    APICCommonState *s = APIC_COMMON(dev);
     APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
     bool bsp;
 
@@ -226,7 +226,7 @@ static void apic_reset_common(DeviceState *d)
     s->vapic_paddr = 0;
     info->vapic_base_update(s);
 
-    apic_init_reset(d);
+    apic_init_reset(dev);
 
     if (bsp) {
         /*
@@ -284,7 +284,7 @@ static int apic_load_old(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
-static int apic_init_common(ICCDevice *dev)
+static void apic_common_realize(DeviceState *dev, Error **errp)
 {
     APICCommonState *s = APIC_COMMON(dev);
     APICCommonClass *info;
@@ -293,14 +293,16 @@ static int apic_init_common(ICCDevice *dev)
     static bool mmio_registered;
 
     if (apic_no >= MAX_APICS) {
-        return -1;
+        error_setg(errp, "%s initialization failed.",
+                   object_get_typename(OBJECT(dev)));
+        return;
     }
     s->idx = apic_no++;
 
     info = APIC_COMMON_GET_CLASS(s);
-    info->init(s);
+    info->realize(dev, errp);
     if (!mmio_registered) {
-        ICCBus *b = ICC_BUS(qdev_get_parent_bus(DEVICE(dev)));
+        ICCBus *b = ICC_BUS(qdev_get_parent_bus(dev));
         memory_region_add_subregion(b->apic_address_space, 0, &s->io_memory);
         mmio_registered = true;
     }
@@ -315,7 +317,6 @@ static int apic_init_common(ICCDevice *dev)
         info->enable_tpr_reporting(s, true);
     }
 
-    return 0;
 }
 
 static void apic_dispatch_pre_save(void *opaque)
@@ -386,9 +387,13 @@ static void apic_common_class_init(ObjectClass *klass, void *data)
 
     dc->vmsd = &vmstate_apic_common;
     dc->reset = apic_reset_common;
-    dc->no_user = 1;
     dc->props = apic_properties_common;
-    idc->init = apic_init_common;
+    idc->realize = apic_common_realize;
+    /*
+     * Reason: APIC and CPU need to be wired up by
+     * x86_cpu_apic_create()
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo apic_common_type = {
@@ -400,9 +405,9 @@ static const TypeInfo apic_common_type = {
     .abstract = true,
 };
 
-static void register_types(void)
+static void apic_common_register_types(void)
 {
     type_register_static(&apic_common_type);
 }
 
-type_init(register_types)
+type_init(apic_common_register_types)
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index d431b7a881..9409684ce8 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -128,7 +128,7 @@ static void gic_set_irq(void *opaque, int irq, int level)
 
     if (level) {
         GIC_SET_LEVEL(irq, cm);
-        if (GIC_TEST_TRIGGER(irq) || GIC_TEST_ENABLED(irq, cm)) {
+        if (GIC_TEST_EDGE_TRIGGER(irq) || GIC_TEST_ENABLED(irq, cm)) {
             DPRINTF("Set %d pending mask %x\n", irq, target);
             GIC_SET_PENDING(irq, target);
         }
@@ -168,6 +168,15 @@ uint32_t gic_acknowledge_irq(GICState *s, int cpu)
     return new_irq;
 }
 
+void gic_set_priority(GICState *s, int cpu, int irq, uint8_t val)
+{
+    if (irq < GIC_INTERNAL) {
+        s->priority1[irq][cpu] = val;
+    } else {
+        s->priority2[(irq) - GIC_INTERNAL] = val;
+    }
+}
+
 void gic_complete_irq(GICState *s, int cpu, int irq)
 {
     int update = 0;
@@ -188,7 +197,7 @@ void gic_complete_irq(GICState *s, int cpu, int irq)
         return; /* No active IRQ.  */
     /* Mark level triggered interrupts as pending if they are still
        raised.  */
-    if (!GIC_TEST_TRIGGER(irq) && GIC_TEST_ENABLED(irq, cm)
+    if (!GIC_TEST_EDGE_TRIGGER(irq) && GIC_TEST_ENABLED(irq, cm)
         && GIC_TEST_LEVEL(irq, cm) && (GIC_TARGET(irq) & cm) != 0) {
         DPRINTF("Set %d pending mask %x\n", irq, cm);
         GIC_SET_PENDING(irq, cm);
@@ -311,7 +320,7 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset)
         for (i = 0; i < 4; i++) {
             if (GIC_TEST_MODEL(irq + i))
                 res |= (1 << (i * 2));
-            if (GIC_TEST_TRIGGER(irq + i))
+            if (GIC_TEST_EDGE_TRIGGER(irq + i))
                 res |= (2 << (i * 2));
         }
     } else if (offset < 0xfe0) {
@@ -386,7 +395,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
                 /* If a raised level triggered IRQ enabled then mark
                    is as pending.  */
                 if (GIC_TEST_LEVEL(irq + i, mask)
-                        && !GIC_TEST_TRIGGER(irq + i)) {
+                        && !GIC_TEST_EDGE_TRIGGER(irq + i)) {
                     DPRINTF("Set %d pending mask %x\n", irq + i, mask);
                     GIC_SET_PENDING(irq + i, mask);
                 }
@@ -443,11 +452,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         irq = (offset - 0x400) + GIC_BASE_IRQ;
         if (irq >= s->num_irq)
             goto bad_reg;
-        if (irq < GIC_INTERNAL) {
-            s->priority1[irq][cpu] = value;
-        } else {
-            s->priority2[irq - GIC_INTERNAL] = value;
-        }
+        gic_set_priority(s, cpu, irq, value);
     } else if (offset < 0xc00) {
         /* Interrupt CPU Target. RAZ/WI on uniprocessor GICs, with the
          * annoying exception of the 11MPCore's GIC.
@@ -478,9 +483,9 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
                 GIC_CLEAR_MODEL(irq + i);
             }
             if (value & (2 << (i * 2))) {
-                GIC_SET_TRIGGER(irq + i);
+                GIC_SET_EDGE_TRIGGER(irq + i);
             } else {
-                GIC_CLEAR_TRIGGER(irq + i);
+                GIC_CLEAR_EDGE_TRIGGER(irq + i);
             }
         }
     } else {
@@ -704,7 +709,6 @@ static void arm_gic_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     ARMGICClass *agc = ARM_GIC_CLASS(klass);
 
-    dc->no_user = 1;
     agc->parent_realize = dc->realize;
     dc->realize = arm_gic_realize;
 }
diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index c7658508dd..e4fc65028a 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -51,7 +51,7 @@ static const VMStateDescription vmstate_gic_irq_state = {
         VMSTATE_UINT8(active, gic_irq_state),
         VMSTATE_UINT8(level, gic_irq_state),
         VMSTATE_BOOL(model, gic_irq_state),
-        VMSTATE_BOOL(trigger, gic_irq_state),
+        VMSTATE_BOOL(edge_trigger, gic_irq_state),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -126,7 +126,7 @@ static void arm_gic_common_reset(DeviceState *dev)
     }
     for (i = 0; i < 16; i++) {
         GIC_SET_ENABLED(i, ALL_CPU_MASK);
-        GIC_SET_TRIGGER(i);
+        GIC_SET_EDGE_TRIGGER(i);
     }
     if (s->num_cpu == 1) {
         /* For uniprocessor GICs all interrupts always target the sole CPU */
@@ -156,7 +156,6 @@ static void arm_gic_common_class_init(ObjectClass *klass, void *data)
     dc->realize = arm_gic_common_realize;
     dc->props = arm_gic_common_properties;
     dc->vmsd = &vmstate_gic;
-    dc->no_user = 1;
 }
 
 static const TypeInfo arm_gic_common_type = {
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index f71397542a..59a3da5a6b 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -150,7 +150,6 @@ static void kvm_arm_gic_class_init(ObjectClass *klass, void *data)
     kgc->parent_reset = dc->reset;
     dc->realize = kvm_arm_gic_realize;
     dc->reset = kvm_arm_gic_reset;
-    dc->no_user = 1;
 }
 
 static const TypeInfo kvm_arm_gic_info = {
diff --git a/hw/intc/etraxfs_pic.c b/hw/intc/etraxfs_pic.c
index e02da533cb..636262b49f 100644
--- a/hw/intc/etraxfs_pic.c
+++ b/hw/intc/etraxfs_pic.c
@@ -170,6 +170,10 @@ static void etraxfs_pic_class_init(ObjectClass *klass, void *data)
 
     k->init = etraxfs_pic_init;
     dc->props = etraxfs_pic_properties;
+    /*
+     * Note: pointer property "interrupt_vector" may remain null, thus
+     * no need for dc->cannot_instantiate_with_device_add_yet = true;
+     */
 }
 
 static const TypeInfo etraxfs_pic_info = {
diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
index 3989fd1bd5..8c02d5888c 100644
--- a/hw/intc/gic_internal.h
+++ b/hw/intc/gic_internal.h
@@ -44,9 +44,9 @@
 #define GIC_SET_LEVEL(irq, cm) s->irq_state[irq].level = (cm)
 #define GIC_CLEAR_LEVEL(irq, cm) s->irq_state[irq].level &= ~(cm)
 #define GIC_TEST_LEVEL(irq, cm) ((s->irq_state[irq].level & (cm)) != 0)
-#define GIC_SET_TRIGGER(irq) s->irq_state[irq].trigger = true
-#define GIC_CLEAR_TRIGGER(irq) s->irq_state[irq].trigger = false
-#define GIC_TEST_TRIGGER(irq) s->irq_state[irq].trigger
+#define GIC_SET_EDGE_TRIGGER(irq) s->irq_state[irq].edge_trigger = true
+#define GIC_CLEAR_EDGE_TRIGGER(irq) s->irq_state[irq].edge_trigger = false
+#define GIC_TEST_EDGE_TRIGGER(irq) (s->irq_state[irq].edge_trigger)
 #define GIC_GET_PRIORITY(irq, cpu) (((irq) < GIC_INTERNAL) ?            \
                                     s->priority1[irq][cpu] :            \
                                     s->priority2[(irq) - GIC_INTERNAL])
@@ -61,5 +61,6 @@ uint32_t gic_acknowledge_irq(GICState *s, int cpu);
 void gic_complete_irq(GICState *s, int cpu, int irq);
 void gic_update(GICState *s);
 void gic_init_irqs_and_distributor(GICState *s, int num_irq);
+void gic_set_priority(GICState *s, int cpu, int irq, uint8_t val);
 
 #endif /* !QEMU_ARM_GIC_INTERNAL_H */
diff --git a/hw/intc/grlib_irqmp.c b/hw/intc/grlib_irqmp.c
index 42e00bc4b8..d1813f76b6 100644
--- a/hw/intc/grlib_irqmp.c
+++ b/hw/intc/grlib_irqmp.c
@@ -355,6 +355,8 @@ static void grlib_irqmp_class_init(ObjectClass *klass, void *data)
     k->init = grlib_irqmp_init;
     dc->reset = grlib_irqmp_reset;
     dc->props = grlib_irqmp_properties;
+    /* Reason: pointer properties "set_pil_in", "set_pil_in_opaque" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo grlib_irqmp_info = {
diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c
index 803d037f68..9d293999be 100644
--- a/hw/intc/i8259_common.c
+++ b/hw/intc/i8259_common.c
@@ -135,9 +135,15 @@ static void pic_common_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->vmsd = &vmstate_pic_common;
-    dc->no_user = 1;
     dc->props = pic_properties_common;
     dc->realize = pic_common_realize;
+    /*
+     * Reason: unlike ordinary ISA devices, the PICs need additional
+     * wiring: its IRQ input lines are set up by board code, and the
+     * wiring of the slave to the master is hard-coded in device model
+     * code.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo pic_common_type = {
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index d866e00297..652dd47a1c 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -36,6 +36,9 @@
 
 static IOAPICCommonState *ioapics[MAX_IOAPICS];
 
+/* global variable from ioapic_common.c */
+extern int ioapic_no;
+
 static void ioapic_service(IOAPICCommonState *s)
 {
     uint8_t i;
@@ -225,14 +228,16 @@ static const MemoryRegionOps ioapic_io_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void ioapic_init(IOAPICCommonState *s, int instance_no)
+static void ioapic_realize(DeviceState *dev, Error **errp)
 {
+    IOAPICCommonState *s = IOAPIC_COMMON(dev);
+
     memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
                           "ioapic", 0x1000);
 
-    qdev_init_gpio_in(DEVICE(s), ioapic_set_irq, IOAPIC_NUM_PINS);
+    qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
 
-    ioapics[instance_no] = s;
+    ioapics[ioapic_no] = s;
 }
 
 static void ioapic_class_init(ObjectClass *klass, void *data)
@@ -240,7 +245,7 @@ static void ioapic_class_init(ObjectClass *klass, void *data)
     IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    k->init = ioapic_init;
+    k->realize = ioapic_realize;
     dc->reset = ioapic_reset_common;
 }
 
diff --git a/hw/intc/ioapic_common.c b/hw/intc/ioapic_common.c
index 6b705c1546..4d3d309b62 100644
--- a/hw/intc/ioapic_common.c
+++ b/hw/intc/ioapic_common.c
@@ -23,6 +23,14 @@
 #include "hw/i386/ioapic_internal.h"
 #include "hw/sysbus.h"
 
+/* ioapic_no count start from 0 to MAX_IOAPICS,
+ * remove as static variable from ioapic_common_init.
+ * now as a global variable, let child to increase the counter
+ * then we can drop the 'instance_no' argument
+ * and convert to our QOM's realize function
+ */
+int ioapic_no;
+
 void ioapic_reset_common(DeviceState *dev)
 {
     IOAPICCommonState *s = IOAPIC_COMMON(dev);
@@ -61,7 +69,6 @@ static void ioapic_common_realize(DeviceState *dev, Error **errp)
 {
     IOAPICCommonState *s = IOAPIC_COMMON(dev);
     IOAPICCommonClass *info;
-    static int ioapic_no;
 
     if (ioapic_no >= MAX_IOAPICS) {
         error_setg(errp, "Only %d ioapics allowed", MAX_IOAPICS);
@@ -69,7 +76,7 @@ static void ioapic_common_realize(DeviceState *dev, Error **errp)
     }
 
     info = IOAPIC_COMMON_GET_CLASS(s);
-    info->init(s, ioapic_no);
+    info->realize(dev, errp);
 
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->io_memory);
     ioapic_no++;
@@ -98,7 +105,6 @@ static void ioapic_common_class_init(ObjectClass *klass, void *data)
 
     dc->realize = ioapic_common_realize;
     dc->vmsd = &vmstate_ioapic_common;
-    dc->no_user = 1;
 }
 
 static const TypeInfo ioapic_common_type = {
@@ -110,9 +116,9 @@ static const TypeInfo ioapic_common_type = {
     .abstract = true,
 };
 
-static void register_types(void)
+static void ioapic_common_register_types(void)
 {
     type_register_static(&ioapic_common_type);
 }
 
-type_init(register_types)
+type_init(ioapic_common_register_types)
diff --git a/hw/intc/omap_intc.c b/hw/intc/omap_intc.c
index 7dd63da802..ad3931c112 100644
--- a/hw/intc/omap_intc.c
+++ b/hw/intc/omap_intc.c
@@ -392,6 +392,8 @@ static void omap_intc_class_init(ObjectClass *klass, void *data)
     k->init = omap_intc_init;
     dc->reset = omap_inth_reset;
     dc->props = omap_intc_properties;
+    /* Reason: pointer property "clk" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo omap_intc_info = {
@@ -637,6 +639,8 @@ static void omap2_intc_class_init(ObjectClass *klass, void *data)
     k->init = omap2_intc_init;
     dc->reset = omap_inth_reset;
     dc->props = omap2_intc_properties;
+    /* Reason: pointer property "iclk", "fclk" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo omap2_intc_info = {
diff --git a/hw/intc/pl190.c b/hw/intc/pl190.c
index 329680da3a..2bf359a76b 100644
--- a/hw/intc/pl190.c
+++ b/hw/intc/pl190.c
@@ -273,7 +273,6 @@ static void pl190_class_init(ObjectClass *klass, void *data)
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = pl190_init;
-    dc->no_user = 1;
     dc->reset = pl190_reset;
     dc->vmsd = &vmstate_pl190;
 }
diff --git a/hw/isa/isa-bus.c b/hw/isa/isa-bus.c
index 9e104eb9a7..55d01008d3 100644
--- a/hw/isa/isa-bus.c
+++ b/hw/isa/isa-bus.c
@@ -197,7 +197,6 @@ static void isabus_bridge_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->fw_name = "isa";
-    dc->no_user = 1;
 }
 
 static const TypeInfo isabus_bridge_info = {
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 19b2198fa6..51ce12dad6 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -644,14 +644,17 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data)
     dc->reset = ich9_lpc_reset;
     k->init = ich9_lpc_initfn;
     dc->vmsd = &vmstate_ich9_lpc;
-    dc->no_user = 1;
     k->config_write = ich9_lpc_config_write;
     dc->desc = "ICH9 LPC bridge";
     k->vendor_id = PCI_VENDOR_ID_INTEL;
     k->device_id = PCI_DEVICE_ID_INTEL_ICH9_8;
     k->revision = ICH9_A2_LPC_REVISION;
     k->class_id = PCI_CLASS_BRIDGE_ISA;
-
+    /*
+     * Reason: part of ICH9 southbridge, needs to be wired up by
+     * pc_q35_init()
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo ich9_lpc_info = {
diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c
index 1a1d4518ce..def6fe3a0f 100644
--- a/hw/isa/piix4.c
+++ b/hw/isa/piix4.c
@@ -113,8 +113,12 @@ static void piix4_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_INTEL_82371AB_0;
     k->class_id = PCI_CLASS_BRIDGE_ISA;
     dc->desc = "ISA bridge";
-    dc->no_user = 1;
     dc->vmsd = &vmstate_piix4;
+    /*
+     * Reason: part of PIIX4 southbridge, needs to be wired up,
+     * e.g. by mips_malta_init()
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo piix4_info = {
diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index 5fb808630f..e639357db3 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -480,8 +480,12 @@ static void via_class_init(ObjectClass *klass, void *data)
     k->class_id = PCI_CLASS_BRIDGE_ISA;
     k->revision = 0x40;
     dc->desc = "ISA bridge";
-    dc->no_user = 1;
     dc->vmsd = &vmstate_via;
+    /*
+     * Reason: part of VIA VT82C686 southbridge, needs to be wired up,
+     * e.g. by mips_fulong2e_init()
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo via_info = {
diff --git a/hw/microblaze/Makefile.objs b/hw/microblaze/Makefile.objs
index c65e2aabf1..b2517d87fe 100644
--- a/hw/microblaze/Makefile.objs
+++ b/hw/microblaze/Makefile.objs
@@ -1,4 +1,3 @@
 obj-y += petalogix_s3adsp1800_mmu.o
 obj-y += petalogix_ml605_mmu.o
 obj-y += boot.o
-obj-y += pic_cpu.o
diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c
index 10970e0f3f..1a87756246 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -39,7 +39,6 @@
 #include "hw/ssi.h"
 
 #include "boot.h"
-#include "pic_cpu.h"
 
 #include "hw/stream.h"
 
@@ -82,20 +81,18 @@ petalogix_ml605_init(QEMUMachineInitArgs *args)
     Object *ds, *cs;
     MicroBlazeCPU *cpu;
     SysBusDevice *busdev;
-    CPUMBState *env;
     DriveInfo *dinfo;
     int i;
     hwaddr ddr_base = MEMORY_BASEADDR;
     MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1);
     MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
-    qemu_irq irq[32], *cpu_irq;
+    qemu_irq irq[32];
 
     /* init CPUs */
     if (cpu_model == NULL) {
         cpu_model = "microblaze";
     }
     cpu = cpu_mb_init(cpu_model);
-    env = &cpu->env;
 
     /* Attach emulated BRAM through the LMB.  */
     memory_region_init_ram(phys_lmb_bram, NULL, "petalogix_ml605.lmb_bram",
@@ -117,8 +114,8 @@ petalogix_ml605_init(QEMUMachineInitArgs *args)
                           2, 0x89, 0x18, 0x0000, 0x0, 0);
 
 
-    cpu_irq = microblaze_pic_init_cpu(env);
-    dev = xilinx_intc_create(INTC_BASEADDR, cpu_irq[0], 4);
+    dev = xilinx_intc_create(INTC_BASEADDR, qdev_get_gpio_in(DEVICE(cpu),
+                             MB_CPU_IRQ), 4);
     for (i = 0; i < 32; i++) {
         irq[i] = qdev_get_gpio_in(dev, i);
     }
diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c
index ec6489c2d3..f50021506c 100644
--- a/hw/microblaze/petalogix_s3adsp1800_mmu.c
+++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c
@@ -35,7 +35,6 @@
 #include "exec/address-spaces.h"
 
 #include "boot.h"
-#include "pic_cpu.h"
 
 #define LMB_BRAM_SIZE  (128 * 1024)
 #define FLASH_SIZE     (16 * 1024 * 1024)
@@ -63,13 +62,12 @@ petalogix_s3adsp1800_init(QEMUMachineInitArgs *args)
     const char *cpu_model = args->cpu_model;
     DeviceState *dev;
     MicroBlazeCPU *cpu;
-    CPUMBState *env;
     DriveInfo *dinfo;
     int i;
     hwaddr ddr_base = MEMORY_BASEADDR;
     MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1);
     MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
-    qemu_irq irq[32], *cpu_irq;
+    qemu_irq irq[32];
     MemoryRegion *sysmem = get_system_memory();
 
     /* init CPUs */
@@ -77,7 +75,6 @@ petalogix_s3adsp1800_init(QEMUMachineInitArgs *args)
         cpu_model = "microblaze";
     }
     cpu = cpu_mb_init(cpu_model);
-    env = &cpu->env;
 
     /* Attach emulated BRAM through the LMB.  */
     memory_region_init_ram(phys_lmb_bram, NULL,
@@ -96,8 +93,8 @@ petalogix_s3adsp1800_init(QEMUMachineInitArgs *args)
                           FLASH_SIZE >> 16,
                           1, 0x89, 0x18, 0x0000, 0x0, 1);
 
-    cpu_irq = microblaze_pic_init_cpu(env);
-    dev = xilinx_intc_create(INTC_BASEADDR, cpu_irq[0], 0xA);
+    dev = xilinx_intc_create(INTC_BASEADDR, qdev_get_gpio_in(DEVICE(cpu),
+                             MB_CPU_IRQ), 0xA);
     for (i = 0; i < 32; i++) {
         irq[i] = qdev_get_gpio_in(dev, i);
     }
diff --git a/hw/microblaze/pic_cpu.c b/hw/microblaze/pic_cpu.c
deleted file mode 100644
index 16902f7880..0000000000
--- a/hw/microblaze/pic_cpu.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * QEMU MicroBlaze CPU interrupt wrapper logic.
- *
- * Copyright (c) 2009 Edgar E. Iglesias, Axis Communications AB.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "hw/hw.h"
-#include "pic_cpu.h"
-
-#define D(x)
-
-static void microblaze_pic_cpu_handler(void *opaque, int irq, int level)
-{
-    MicroBlazeCPU *cpu = opaque;
-    CPUState *cs = CPU(cpu);
-    int type = irq ? CPU_INTERRUPT_NMI : CPU_INTERRUPT_HARD;
-
-    if (level) {
-        cpu_interrupt(cs, type);
-    } else {
-        cpu_reset_interrupt(cs, type);
-    }
-}
-
-qemu_irq *microblaze_pic_init_cpu(CPUMBState *env)
-{
-    return qemu_allocate_irqs(microblaze_pic_cpu_handler, mb_env_get_cpu(env),
-                              2);
-}
diff --git a/hw/microblaze/pic_cpu.h b/hw/microblaze/pic_cpu.h
deleted file mode 100644
index 43090a48ef..0000000000
--- a/hw/microblaze/pic_cpu.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef MICROBLAZE_PIC_CPU_H
-#define MICROBLAZE_PIC_CPU_H
-
-#include "qemu-common.h"
-
-qemu_irq *microblaze_pic_init_cpu(CPUMBState *env);
-
-#endif /*  MICROBLAZE_PIC_CPU_H */
diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c
index 3da2e67098..6398514c99 100644
--- a/hw/mips/gt64xxx_pci.c
+++ b/hw/mips/gt64xxx_pci.c
@@ -1151,12 +1151,18 @@ static int gt64120_pci_init(PCIDevice *d)
 static void gt64120_pci_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init = gt64120_pci_init;
     k->vendor_id = PCI_VENDOR_ID_MARVELL;
     k->device_id = PCI_DEVICE_ID_MARVELL_GT6412X;
     k->revision = 0x10;
     k->class_id = PCI_CLASS_BRIDGE_HOST;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo gt64120_pci_info = {
diff --git a/hw/misc/arm_l2x0.c b/hw/misc/arm_l2x0.c
index 8e192cdf83..9e220c9a56 100644
--- a/hw/misc/arm_l2x0.c
+++ b/hw/misc/arm_l2x0.c
@@ -179,7 +179,6 @@ static void l2x0_class_init(ObjectClass *klass, void *data)
 
     k->init = l2x0_priv_init;
     dc->vmsd = &vmstate_l2x0;
-    dc->no_user = 1;
     dc->props = l2x0_properties;
     dc->reset = l2x0_priv_reset;
 }
diff --git a/hw/misc/exynos4210_pmu.c b/hw/misc/exynos4210_pmu.c
index cbf0795c0a..5ec14d1c86 100644
--- a/hw/misc/exynos4210_pmu.c
+++ b/hw/misc/exynos4210_pmu.c
@@ -383,8 +383,7 @@ static const Exynos4210PmuReg exynos4210_pmu_regs[] = {
     {"GPS_ALIVE_OPTION", GPS_ALIVE_OPTION, 0x00000001},
 };
 
-#define PMU_NUM_OF_REGISTERS     \
-    (sizeof(exynos4210_pmu_regs) / sizeof(Exynos4210PmuReg))
+#define PMU_NUM_OF_REGISTERS ARRAY_SIZE(exynos4210_pmu_regs)
 
 #define TYPE_EXYNOS4210_PMU "exynos4210.pmu"
 #define EXYNOS4210_PMU(obj) \
diff --git a/hw/misc/vmport.c b/hw/misc/vmport.c
index 0b5a5644e4..cd5716a46d 100644
--- a/hw/misc/vmport.c
+++ b/hw/misc/vmport.c
@@ -162,7 +162,8 @@ static void vmport_class_initfn(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = vmport_realizefn;
-    dc->no_user = 1;
+    /* Reason: realize sets global port_state */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo vmport_info = {
diff --git a/hw/net/etraxfs_eth.c b/hw/net/etraxfs_eth.c
index 78ebbbca72..6a3c86db48 100644
--- a/hw/net/etraxfs_eth.c
+++ b/hw/net/etraxfs_eth.c
@@ -646,6 +646,8 @@ static void etraxfs_eth_class_init(ObjectClass *klass, void *data)
 
     k->init = fs_eth_init;
     dc->props = etraxfs_eth_properties;
+    /* Reason: pointer properties "dma_out", "dma_in" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo etraxfs_eth_info = {
diff --git a/hw/net/lance.c b/hw/net/lance.c
index e339f029b7..fe18564e1e 100644
--- a/hw/net/lance.c
+++ b/hw/net/lance.c
@@ -161,6 +161,8 @@ static void lance_class_init(ObjectClass *klass, void *data)
     dc->reset = lance_reset;
     dc->vmsd = &vmstate_lance;
     dc->props = lance_properties;
+    /* Reason: pointer property "dma" */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo lance_info = {
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index f5dc3ea845..ee96c1681b 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -599,7 +599,6 @@ static void fw_cfg_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = fw_cfg_realize;
-    dc->no_user = 1;
     dc->reset = fw_cfg_reset;
     dc->vmsd = &vmstate_fw_cfg;
     dc->props = fw_cfg_properties;
diff --git a/hw/pci-bridge/dec.c b/hw/pci-bridge/dec.c
index e5e3be829f..a6ca940d55 100644
--- a/hw/pci-bridge/dec.c
+++ b/hw/pci-bridge/dec.c
@@ -116,6 +116,7 @@ static int dec_21154_pci_host_init(PCIDevice *d)
 static void dec_21154_pci_host_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init = dec_21154_pci_host_init;
     k->vendor_id = PCI_VENDOR_ID_DEC;
@@ -123,6 +124,11 @@ static void dec_21154_pci_host_class_init(ObjectClass *klass, void *data)
     k->revision = 0x02;
     k->class_id = PCI_CLASS_BRIDGE_PCI;
     k->is_bridge = 1;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo dec_21154_pci_host_info = {
diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index 92f289f8f9..1b399ddbc3 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c
@@ -516,11 +516,17 @@ static int pbm_pci_host_init(PCIDevice *d)
 static void pbm_pci_host_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init = pbm_pci_host_init;
     k->vendor_id = PCI_VENDOR_ID_SUN;
     k->device_id = PCI_DEVICE_ID_SUN_SABRE;
     k->class_id = PCI_CLASS_BRIDGE_HOST;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo pbm_pci_host_info = {
diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index 5086d42c13..902441f10b 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -806,8 +806,12 @@ static void bonito_class_init(ObjectClass *klass, void *data)
     k->revision = 0x01;
     k->class_id = PCI_CLASS_BRIDGE_HOST;
     dc->desc = "Host bridge";
-    dc->no_user = 1;
     dc->vmsd = &vmstate_bonito;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo bonito_info = {
@@ -819,11 +823,9 @@ static const TypeInfo bonito_info = {
 
 static void bonito_pcihost_class_init(ObjectClass *klass, void *data)
 {
-    DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = bonito_pcihost_initfn;
-    dc->no_user = 1;
 }
 
 static const TypeInfo bonito_pcihost_info = {
diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c
index 75b60d36ac..6c7cfdbeb2 100644
--- a/hw/pci-host/grackle.c
+++ b/hw/pci-host/grackle.c
@@ -130,7 +130,11 @@ static void grackle_pci_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_MOTOROLA_MPC106;
     k->revision  = 0x00;
     k->class_id  = PCI_CLASS_BRIDGE_HOST;
-    dc->no_user = 1;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo grackle_pci_info = {
@@ -143,10 +147,8 @@ static const TypeInfo grackle_pci_info = {
 static void pci_grackle_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
-    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init = pci_grackle_init_device;
-    dc->no_user = 1;
 }
 
 static const TypeInfo grackle_pci_host_info = {
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index 63be7f6cee..e89d5c1dfa 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -311,6 +311,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
                     MemoryRegion *address_space_mem,
                     MemoryRegion *address_space_io,
                     ram_addr_t ram_size,
+                    ram_addr_t below_4g_mem_size,
                     ram_addr_t above_4g_mem_size,
                     MemoryRegion *pci_address_space,
                     MemoryRegion *ram_memory)
@@ -340,15 +341,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
     f->ram_memory = ram_memory;
 
     i440fx = I440FX_PCI_HOST_BRIDGE(dev);
-    /* Set PCI window size the way seabios has always done it. */
-    /* Power of 2 so bios can cover it with a single MTRR */
-    if (ram_size <= 0x80000000) {
-        i440fx->pci_info.w32.begin = 0x80000000;
-    } else if (ram_size <= 0xc0000000) {
-        i440fx->pci_info.w32.begin = 0xc0000000;
-    } else {
-        i440fx->pci_info.w32.begin = 0xe0000000;
-    }
+    i440fx->pci_info.w32.begin = below_4g_mem_size;
 
     /* setup pci memory mapping */
     pc_pci_as_mapping_init(OBJECT(f), f->system_memory,
@@ -635,7 +628,6 @@ static void piix3_class_init(ObjectClass *klass, void *data)
 
     dc->desc        = "ISA bridge";
     dc->vmsd        = &vmstate_piix3;
-    dc->no_user     = 1,
     k->no_hotplug   = 1;
     k->init         = piix3_initfn;
     k->config_write = piix3_write_config;
@@ -643,6 +635,11 @@ static void piix3_class_init(ObjectClass *klass, void *data)
     /* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */
     k->device_id    = PCI_DEVICE_ID_INTEL_82371SB_0;
     k->class_id     = PCI_CLASS_BRIDGE_ISA;
+    /*
+     * Reason: part of PIIX3 southbridge, needs to be wired up by
+     * pc_piix.c's pc_init1()
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo piix3_info = {
@@ -659,7 +656,6 @@ static void piix3_xen_class_init(ObjectClass *klass, void *data)
 
     dc->desc        = "ISA bridge";
     dc->vmsd        = &vmstate_piix3;
-    dc->no_user     = 1;
     k->no_hotplug   = 1;
     k->init         = piix3_initfn;
     k->config_write = piix3_write_config_xen;
@@ -667,6 +663,11 @@ static void piix3_xen_class_init(ObjectClass *klass, void *data)
     /* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */
     k->device_id    = PCI_DEVICE_ID_INTEL_82371SB_0;
     k->class_id     = PCI_CLASS_BRIDGE_ISA;
+    /*
+     * Reason: part of PIIX3 southbridge, needs to be wired up by
+     * pc_piix.c's pc_init1()
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 };
 
 static const TypeInfo piix3_xen_info = {
@@ -689,8 +690,12 @@ static void i440fx_class_init(ObjectClass *klass, void *data)
     k->revision = 0x02;
     k->class_id = PCI_CLASS_BRIDGE_HOST;
     dc->desc = "Host bridge";
-    dc->no_user = 1;
     dc->vmsd = &vmstate_i440fx;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo i440fx_info = {
@@ -727,7 +732,6 @@ static void i440fx_pcihost_class_init(ObjectClass *klass, void *data)
     hc->root_bus_path = i440fx_pcihost_root_bus_path;
     dc->realize = i440fx_pcihost_realize;
     dc->fw_name = "pci";
-    dc->no_user = 1;
     dc->props = i440fx_props;
 }
 
diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index f00793d819..c80b7cb2f5 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c
@@ -387,6 +387,11 @@ static void e500_host_bridge_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_MPC8533E;
     k->class_id = PCI_CLASS_PROCESSOR_POWERPC;
     dc->desc = "Host bridge";
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo e500_host_bridge_info = {
diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
index 0e71fdbfb1..042dc8f225 100644
--- a/hw/pci-host/prep.c
+++ b/hw/pci-host/prep.c
@@ -198,7 +198,11 @@ static void raven_class_init(ObjectClass *klass, void *data)
     k->class_id = PCI_CLASS_BRIDGE_HOST;
     dc->desc = "PReP Host Bridge - Motorola Raven";
     dc->vmsd = &vmstate_raven;
-    dc->no_user = 1;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo raven_info = {
@@ -215,7 +219,6 @@ static void raven_pcihost_class_init(ObjectClass *klass, void *data)
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->realize = raven_pcihost_realizefn;
     dc->fw_name = "pci";
-    dc->no_user = 1;
 }
 
 static const TypeInfo raven_pcihost_info = {
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 81c82404d6..4bc2e0118e 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -403,6 +403,11 @@ static void mch_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_INTEL_Q35_MCH;
     k->revision = MCH_HOST_BRIDGE_REVISION_DEFAULT;
     k->class_id = PCI_CLASS_BRIDGE_HOST;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo mch_info = {
diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index adc1d89010..e72fe2a70b 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -351,12 +351,18 @@ static int unin_internal_pci_host_init(PCIDevice *d)
 static void unin_main_pci_host_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init      = unin_main_pci_host_init;
     k->vendor_id = PCI_VENDOR_ID_APPLE;
     k->device_id = PCI_DEVICE_ID_APPLE_UNI_N_PCI;
     k->revision  = 0x00;
     k->class_id  = PCI_CLASS_BRIDGE_HOST;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo unin_main_pci_host_info = {
@@ -369,12 +375,18 @@ static const TypeInfo unin_main_pci_host_info = {
 static void u3_agp_pci_host_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init      = u3_agp_pci_host_init;
     k->vendor_id = PCI_VENDOR_ID_APPLE;
     k->device_id = PCI_DEVICE_ID_APPLE_U3_AGP;
     k->revision  = 0x00;
     k->class_id  = PCI_CLASS_BRIDGE_HOST;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo u3_agp_pci_host_info = {
@@ -387,12 +399,18 @@ static const TypeInfo u3_agp_pci_host_info = {
 static void unin_agp_pci_host_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init      = unin_agp_pci_host_init;
     k->vendor_id = PCI_VENDOR_ID_APPLE;
     k->device_id = PCI_DEVICE_ID_APPLE_UNI_N_AGP;
     k->revision  = 0x00;
     k->class_id  = PCI_CLASS_BRIDGE_HOST;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo unin_agp_pci_host_info = {
@@ -405,12 +423,18 @@ static const TypeInfo unin_agp_pci_host_info = {
 static void unin_internal_pci_host_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init      = unin_internal_pci_host_init;
     k->vendor_id = PCI_VENDOR_ID_APPLE;
     k->device_id = PCI_DEVICE_ID_APPLE_UNI_N_I_PCI;
     k->revision  = 0x00;
     k->class_id  = PCI_CLASS_BRIDGE_HOST;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo unin_internal_pci_host_info = {
diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c
index 6b28929d26..71ff0de303 100644
--- a/hw/pci-host/versatile.c
+++ b/hw/pci-host/versatile.c
@@ -467,11 +467,17 @@ static int versatile_pci_host_init(PCIDevice *d)
 static void versatile_pci_host_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init = versatile_pci_host_init;
     k->vendor_id = PCI_VENDOR_ID_XILINX;
     k->device_id = PCI_DEVICE_ID_XILINX_XC2VP30;
     k->class_id = PCI_CLASS_PROCESSOR_CO;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo versatile_pci_host_info = {
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 82c11ecde4..aa2a395499 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -46,7 +46,7 @@
 static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent);
 static char *pcibus_get_dev_path(DeviceState *dev);
 static char *pcibus_get_fw_dev_path(DeviceState *dev);
-static int pcibus_reset(BusState *qbus);
+static void pcibus_reset(BusState *qbus);
 static void pci_bus_finalize(Object *obj);
 
 static Property pci_props[] = {
@@ -167,16 +167,10 @@ void pci_device_deassert_intx(PCIDevice *dev)
     }
 }
 
-/*
- * This function is called on #RST and FLR.
- * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
- */
-void pci_device_reset(PCIDevice *dev)
+static void pci_do_device_reset(PCIDevice *dev)
 {
     int r;
 
-    qdev_reset_all(&dev->qdev);
-
     dev->irq_state = 0;
     pci_update_irq_status(dev);
     pci_device_deassert_intx(dev);
@@ -209,30 +203,34 @@ void pci_device_reset(PCIDevice *dev)
 }
 
 /*
+ * This function is called on #RST and FLR.
+ * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
+ */
+void pci_device_reset(PCIDevice *dev)
+{
+    qdev_reset_all(&dev->qdev);
+    pci_do_device_reset(dev);
+}
+
+/*
  * Trigger pci bus reset under a given bus.
- * To be called on RST# assert.
+ * Called via qbus_reset_all on RST# assert, after the devices
+ * have been reset qdev_reset_all-ed already.
  */
-void pci_bus_reset(PCIBus *bus)
+static void pcibus_reset(BusState *qbus)
 {
+    PCIBus *bus = DO_UPCAST(PCIBus, qbus, qbus);
     int i;
 
-    for (i = 0; i < bus->nirq; i++) {
-        bus->irq_count[i] = 0;
-    }
     for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
         if (bus->devices[i]) {
-            pci_device_reset(bus->devices[i]);
+            pci_do_device_reset(bus->devices[i]);
         }
     }
-}
-
-static int pcibus_reset(BusState *qbus)
-{
-    pci_bus_reset(DO_UPCAST(PCIBus, qbus, qbus));
 
-    /* topology traverse is done by pci_bus_reset().
-       Tell qbus/qdev walker not to traverse the tree */
-    return 1;
+    for (i = 0; i < bus->nirq; i++) {
+        assert(bus->irq_count[i] == 0);
+    }
 }
 
 static void pci_host_bus_register(PCIBus *bus, DeviceState *parent)
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index f72872ebcf..4becdc14b8 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -268,7 +268,7 @@ void pci_bridge_write_config(PCIDevice *d,
     newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
     if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
         /* Trigger hot reset on 0->1 transition. */
-        pci_bus_reset(&s->sec_bus);
+        qbus_reset_all(&s->sec_bus.qbus);
     }
 }
 
@@ -391,7 +391,7 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
     pci_bridge_region_cleanup(s, s->windows);
     memory_region_destroy(&s->address_space_mem);
     memory_region_destroy(&s->address_space_io);
-    /* qbus_free() is called automatically during device deletion */
+    /* object_unparent() is called automatically during device deletion */
 }
 
 /*
diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
index d2d6f65e6c..4cb78518a3 100644
--- a/hw/ppc/ppc4xx_pci.c
+++ b/hw/ppc/ppc4xx_pci.c
@@ -380,6 +380,11 @@ static void ppc4xx_host_bridge_class_init(ObjectClass *klass, void *data)
     k->vendor_id    = PCI_VENDOR_ID_IBM;
     k->device_id    = PCI_DEVICE_ID_IBM_440GX;
     k->class_id     = PCI_CLASS_BRIDGE_OTHER;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo ppc4xx_host_bridge_info = {
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index fee6195f95..4e33f462d9 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -528,11 +528,9 @@ static int spapr_vio_bridge_init(SysBusDevice *dev)
 
 static void spapr_vio_bridge_class_init(ObjectClass *klass, void *data)
 {
-    DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = spapr_vio_bridge_init;
-    dc->no_user = 1;
 }
 
 static const TypeInfo spapr_vio_bridge_info = {
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 65d39da314..1a6397b88e 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -182,7 +182,6 @@ static void s390_ipl_class_init(ObjectClass *klass, void *data)
     k->init = s390_ipl_init;
     dc->props = s390_ipl_properties;
     dc->reset = s390_ipl_reset;
-    dc->no_user = 1;
 }
 
 static const TypeInfo s390_ipl_info = {
diff --git a/hw/s390x/s390-virtio-bus.c b/hw/s390x/s390-virtio-bus.c
index 6a831114da..46c5ff1898 100644
--- a/hw/s390x/s390-virtio-bus.c
+++ b/hw/s390x/s390-virtio-bus.c
@@ -676,11 +676,9 @@ static int s390_virtio_bridge_init(SysBusDevice *dev)
 
 static void s390_virtio_bridge_class_init(ObjectClass *klass, void *data)
 {
-    DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = s390_virtio_bridge_init;
-    dc->no_user = 1;
 }
 
 static const TypeInfo s390_virtio_bridge_info = {
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index ecc80ecaf7..bc8871249d 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -30,13 +30,10 @@
 static void virtio_ccw_bus_new(VirtioBusState *bus, size_t bus_size,
                                VirtioCcwDevice *dev);
 
-static int virtual_css_bus_reset(BusState *qbus)
+static void virtual_css_bus_reset(BusState *qbus)
 {
     /* This should actually be modelled via the generic css */
     css_reset();
-
-    /* we dont traverse ourself, return 0 */
-    return 0;
 }
 
 
@@ -1283,11 +1280,9 @@ static int virtual_css_bridge_init(SysBusDevice *dev)
 
 static void virtual_css_bridge_class_init(ObjectClass *klass, void *data)
 {
-    DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = virtual_css_bridge_init;
-    dc->no_user = 1;
 }
 
 static const TypeInfo virtual_css_bridge_info = {
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 7653411097..bce617cb93 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -47,6 +47,7 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
 #define SCSI_MAX_MODE_LEN           256
 
 #define DEFAULT_DISCARD_GRANULARITY 4096
+#define DEFAULT_MAX_UNMAP_SIZE      (1 << 30)   /* 1 GB */
 
 typedef struct SCSIDiskState SCSIDiskState;
 
@@ -74,6 +75,7 @@ struct SCSIDiskState
     bool media_event;
     bool eject_request;
     uint64_t wwn;
+    uint64_t max_unmap_size;
     QEMUBH *bh;
     char *version;
     char *serial;
@@ -625,6 +627,8 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
                     s->qdev.conf.min_io_size / s->qdev.blocksize;
             unsigned int opt_io_size =
                     s->qdev.conf.opt_io_size / s->qdev.blocksize;
+            unsigned int max_unmap_sectors =
+                    s->max_unmap_size / s->qdev.blocksize;
 
             if (s->qdev.type == TYPE_ROM) {
                 DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n",
@@ -647,6 +651,18 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             outbuf[14] = (opt_io_size >> 8) & 0xff;
             outbuf[15] = opt_io_size & 0xff;
 
+            /* max unmap LBA count, default is 1GB */
+            outbuf[20] = (max_unmap_sectors >> 24) & 0xff;
+            outbuf[21] = (max_unmap_sectors >> 16) & 0xff;
+            outbuf[22] = (max_unmap_sectors >> 8) & 0xff;
+            outbuf[23] = max_unmap_sectors & 0xff;
+
+            /* max unmap descriptors, 255 fit in 4 kb with an 8-byte header.  */
+            outbuf[24] = 0;
+            outbuf[25] = 0;
+            outbuf[26] = 0;
+            outbuf[27] = 255;
+
             /* optimal unmap granularity */
             outbuf[28] = (unmap_sectors >> 24) & 0xff;
             outbuf[29] = (unmap_sectors >> 16) & 0xff;
@@ -2519,6 +2535,8 @@ static Property scsi_hd_properties[] = {
     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
                     SCSI_DISK_F_DPOFUA, false),
     DEFINE_PROP_HEX64("wwn", SCSIDiskState, wwn, 0),
+    DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
+                       DEFAULT_MAX_UNMAP_SIZE),
     DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -2628,6 +2646,8 @@ static Property scsi_disk_properties[] = {
     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
                     SCSI_DISK_F_DPOFUA, false),
     DEFINE_PROP_HEX64("wwn", SCSIDiskState, wwn, 0),
+    DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
+                       DEFAULT_MAX_UNMAP_SIZE),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index c35896d28c..462558b76d 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -506,7 +506,6 @@ static void pl181_class_init(ObjectClass *klass, void *data)
     sdc->init = pl181_init;
     k->vmsd = &vmstate_pl181;
     k->reset = pl181_reset;
-    k->no_user = 1;
 }
 
 static const TypeInfo pl181_info = {
diff --git a/hw/sh4/sh_pci.c b/hw/sh4/sh_pci.c
index e81176a11e..a2f6d9e0b6 100644
--- a/hw/sh4/sh_pci.c
+++ b/hw/sh4/sh_pci.c
@@ -162,10 +162,16 @@ static int sh_pci_host_init(PCIDevice *d)
 static void sh_pci_host_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->init = sh_pci_host_init;
     k->vendor_id = PCI_VENDOR_ID_HITACHI;
     k->device_id = PCI_DEVICE_ID_HITACHI_SH7751R;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo sh_pci_host_info = {
diff --git a/hw/timer/arm_mptimer.c b/hw/timer/arm_mptimer.c
index d9f9494f26..35a0a2356f 100644
--- a/hw/timer/arm_mptimer.c
+++ b/hw/timer/arm_mptimer.c
@@ -274,7 +274,6 @@ static void arm_mptimer_class_init(ObjectClass *klass, void *data)
     dc->realize = arm_mptimer_realize;
     dc->vmsd = &vmstate_arm_mptimer;
     dc->reset = arm_mptimer_reset;
-    dc->no_user = 1;
     dc->props = arm_mptimer_properties;
 }
 
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index bb3bf98745..2fbbeb1735 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -765,7 +765,6 @@ static void hpet_device_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = hpet_realize;
-    dc->no_user = 1;
     dc->reset = hpet_reset;
     dc->vmsd = &vmstate_hpet;
     dc->props = hpet_device_properties;
diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c
index e8fb971488..9db5c9d129 100644
--- a/hw/timer/i8254_common.c
+++ b/hw/timer/i8254_common.c
@@ -282,7 +282,12 @@ static void pit_common_class_init(ObjectClass *klass, void *data)
 
     dc->realize = pit_common_realize;
     dc->vmsd = &vmstate_pit_common;
-    dc->no_user = 1;
+    /*
+     * Reason: unlike ordinary ISA devices, the PIT may need to be
+     * wired to the HPET, and because of that, some wiring is always
+     * done by board code.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo pit_common_type = {
diff --git a/hw/timer/m48t59.c b/hw/timer/m48t59.c
index be0592b53d..3cfb18a8b3 100644
--- a/hw/timer/m48t59.c
+++ b/hw/timer/m48t59.c
@@ -750,9 +750,10 @@ static void m48t59_isa_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = m48t59_isa_realize;
-    dc->no_user = 1;
     dc->reset = m48t59_reset_isa;
     dc->props = m48t59_isa_properties;
+    /* Reason: needs to be wired up by m48t59_init_isa() */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo m48t59_isa_info = {
diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index b0116381c0..6fb124fead 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -899,9 +899,10 @@ static void rtc_class_initfn(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = rtc_realizefn;
-    dc->no_user = 1;
     dc->vmsd = &vmstate_rtc;
     dc->props = mc146818rtc_properties;
+    /* Reason: needs to be wired up by rtc_init() */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo mc146818rtc_info = {
diff --git a/hw/timer/pl031.c b/hw/timer/pl031.c
index 65928a4819..34d9b44e7e 100644
--- a/hw/timer/pl031.c
+++ b/hw/timer/pl031.c
@@ -251,7 +251,6 @@ static void pl031_class_init(ObjectClass *klass, void *data)
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = pl031_init;
-    dc->no_user = 1;
     dc->vmsd = &vmstate_pl031;
 }
 
diff --git a/hw/virtio/dataplane/Makefile.objs b/hw/virtio/dataplane/Makefile.objs
index a91bf33c8b..9a8cfc0297 100644
--- a/hw/virtio/dataplane/Makefile.objs
+++ b/hw/virtio/dataplane/Makefile.objs
@@ -1 +1 @@
-common-obj-y += hostmem.o vring.o
+common-obj-y += vring.o
diff --git a/hw/virtio/dataplane/hostmem.c b/hw/virtio/dataplane/hostmem.c
deleted file mode 100644
index 901d98b8a0..0000000000
--- a/hw/virtio/dataplane/hostmem.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Thread-safe guest to host memory mapping
- *
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "exec/address-spaces.h"
-#include "hw/virtio/dataplane/hostmem.h"
-
-static int hostmem_lookup_cmp(const void *phys_, const void *region_)
-{
-    hwaddr phys = *(const hwaddr *)phys_;
-    const HostMemRegion *region = region_;
-
-    if (phys < region->guest_addr) {
-        return -1;
-    } else if (phys >= region->guest_addr + region->size) {
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
-/**
- * Map guest physical address to host pointer
- */
-void *hostmem_lookup(HostMem *hostmem, hwaddr phys, hwaddr len, bool is_write)
-{
-    HostMemRegion *region;
-    void *host_addr = NULL;
-    hwaddr offset_within_region;
-
-    qemu_mutex_lock(&hostmem->current_regions_lock);
-    region = bsearch(&phys, hostmem->current_regions,
-                     hostmem->num_current_regions,
-                     sizeof(hostmem->current_regions[0]),
-                     hostmem_lookup_cmp);
-    if (!region) {
-        goto out;
-    }
-    if (is_write && region->readonly) {
-        goto out;
-    }
-    offset_within_region = phys - region->guest_addr;
-    if (len <= region->size - offset_within_region) {
-        host_addr = region->host_addr + offset_within_region;
-    }
-out:
-    qemu_mutex_unlock(&hostmem->current_regions_lock);
-
-    return host_addr;
-}
-
-/**
- * Install new regions list
- */
-static void hostmem_listener_commit(MemoryListener *listener)
-{
-    HostMem *hostmem = container_of(listener, HostMem, listener);
-    int i;
-
-    qemu_mutex_lock(&hostmem->current_regions_lock);
-    for (i = 0; i < hostmem->num_current_regions; i++) {
-        memory_region_unref(hostmem->current_regions[i].mr);
-    }
-    g_free(hostmem->current_regions);
-    hostmem->current_regions = hostmem->new_regions;
-    hostmem->num_current_regions = hostmem->num_new_regions;
-    qemu_mutex_unlock(&hostmem->current_regions_lock);
-
-    /* Reset new regions list */
-    hostmem->new_regions = NULL;
-    hostmem->num_new_regions = 0;
-}
-
-/**
- * Add a MemoryRegionSection to the new regions list
- */
-static void hostmem_append_new_region(HostMem *hostmem,
-                                      MemoryRegionSection *section)
-{
-    void *ram_ptr = memory_region_get_ram_ptr(section->mr);
-    size_t num = hostmem->num_new_regions;
-    size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]);
-
-    hostmem->new_regions = g_realloc(hostmem->new_regions, new_size);
-    hostmem->new_regions[num] = (HostMemRegion){
-        .host_addr = ram_ptr + section->offset_within_region,
-        .guest_addr = section->offset_within_address_space,
-        .size = int128_get64(section->size),
-        .readonly = section->readonly,
-        .mr = section->mr,
-    };
-    hostmem->num_new_regions++;
-
-    memory_region_ref(section->mr);
-}
-
-static void hostmem_listener_append_region(MemoryListener *listener,
-                                           MemoryRegionSection *section)
-{
-    HostMem *hostmem = container_of(listener, HostMem, listener);
-
-    /* Ignore non-RAM regions, we may not be able to map them */
-    if (!memory_region_is_ram(section->mr)) {
-        return;
-    }
-
-    /* Ignore regions with dirty logging, we cannot mark them dirty */
-    if (memory_region_is_logging(section->mr)) {
-        return;
-    }
-
-    hostmem_append_new_region(hostmem, section);
-}
-
-/* We don't implement most MemoryListener callbacks, use these nop stubs */
-static void hostmem_listener_dummy(MemoryListener *listener)
-{
-}
-
-static void hostmem_listener_section_dummy(MemoryListener *listener,
-                                           MemoryRegionSection *section)
-{
-}
-
-static void hostmem_listener_eventfd_dummy(MemoryListener *listener,
-                                           MemoryRegionSection *section,
-                                           bool match_data, uint64_t data,
-                                           EventNotifier *e)
-{
-}
-
-static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
-                                                  MemoryRegionSection *section,
-                                                  hwaddr addr, hwaddr len)
-{
-}
-
-void hostmem_init(HostMem *hostmem)
-{
-    memset(hostmem, 0, sizeof(*hostmem));
-
-    qemu_mutex_init(&hostmem->current_regions_lock);
-
-    hostmem->listener = (MemoryListener){
-        .begin = hostmem_listener_dummy,
-        .commit = hostmem_listener_commit,
-        .region_add = hostmem_listener_append_region,
-        .region_del = hostmem_listener_section_dummy,
-        .region_nop = hostmem_listener_append_region,
-        .log_start = hostmem_listener_section_dummy,
-        .log_stop = hostmem_listener_section_dummy,
-        .log_sync = hostmem_listener_section_dummy,
-        .log_global_start = hostmem_listener_dummy,
-        .log_global_stop = hostmem_listener_dummy,
-        .eventfd_add = hostmem_listener_eventfd_dummy,
-        .eventfd_del = hostmem_listener_eventfd_dummy,
-        .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy,
-        .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
-        .priority = 10,
-    };
-
-    memory_listener_register(&hostmem->listener, &address_space_memory);
-    if (hostmem->num_new_regions > 0) {
-        hostmem_listener_commit(&hostmem->listener);
-    }
-}
-
-void hostmem_finalize(HostMem *hostmem)
-{
-    memory_listener_unregister(&hostmem->listener);
-    g_free(hostmem->new_regions);
-    g_free(hostmem->current_regions);
-    qemu_mutex_destroy(&hostmem->current_regions_lock);
-}
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index 351a343806..250d45ec3d 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -15,9 +15,53 @@
  */
 
 #include "trace.h"
+#include "hw/hw.h"
+#include "exec/memory.h"
+#include "exec/address-spaces.h"
 #include "hw/virtio/dataplane/vring.h"
 #include "qemu/error-report.h"
 
+/* vring_map can be coupled with vring_unmap or (if you still have the
+ * value returned in *mr) memory_region_unref.
+ */
+static void *vring_map(MemoryRegion **mr, hwaddr phys, hwaddr len,
+                       bool is_write)
+{
+    MemoryRegionSection section = memory_region_find(get_system_memory(), phys, len);
+
+    if (!section.mr || int128_get64(section.size) < len) {
+        goto out;
+    }
+    if (is_write && section.readonly) {
+        goto out;
+    }
+    if (!memory_region_is_ram(section.mr)) {
+        goto out;
+    }
+
+    /* Ignore regions with dirty logging, we cannot mark them dirty */
+    if (memory_region_is_logging(section.mr)) {
+        goto out;
+    }
+
+    *mr = section.mr;
+    return memory_region_get_ram_ptr(section.mr) + section.offset_within_region;
+
+out:
+    memory_region_unref(section.mr);
+    *mr = NULL;
+    return NULL;
+}
+
+static void vring_unmap(void *buffer, bool is_write)
+{
+    ram_addr_t addr;
+    MemoryRegion *mr;
+
+    mr = qemu_ram_addr_from_host(buffer, &addr);
+    memory_region_unref(mr);
+}
+
 /* Map the guest's vring to host memory */
 bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
 {
@@ -27,8 +71,7 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
 
     vring->broken = false;
 
-    hostmem_init(&vring->hostmem);
-    vring_ptr = hostmem_lookup(&vring->hostmem, vring_addr, vring_size, true);
+    vring_ptr = vring_map(&vring->mr, vring_addr, vring_size, true);
     if (!vring_ptr) {
         error_report("Failed to map vring "
                      "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu,
@@ -54,7 +97,7 @@ void vring_teardown(Vring *vring, VirtIODevice *vdev, int n)
     virtio_queue_set_last_avail_idx(vdev, n, vring->last_avail_idx);
     virtio_queue_invalidate_signalled_used(vdev, n);
 
-    hostmem_finalize(&vring->hostmem);
+    memory_region_unref(vring->mr);
 }
 
 /* Disable guest->host notifies */
@@ -110,14 +153,61 @@ bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
     return vring_need_event(vring_used_event(&vring->vr), new, old);
 }
 
+
+static int get_desc(Vring *vring, VirtQueueElement *elem,
+                    struct vring_desc *desc)
+{
+    unsigned *num;
+    struct iovec *iov;
+    hwaddr *addr;
+    MemoryRegion *mr;
+
+    if (desc->flags & VRING_DESC_F_WRITE) {
+        num = &elem->in_num;
+        iov = &elem->in_sg[*num];
+        addr = &elem->in_addr[*num];
+    } else {
+        num = &elem->out_num;
+        iov = &elem->out_sg[*num];
+        addr = &elem->out_addr[*num];
+
+        /* If it's an output descriptor, they're all supposed
+         * to come before any input descriptors. */
+        if (unlikely(elem->in_num)) {
+            error_report("Descriptor has out after in");
+            return -EFAULT;
+        }
+    }
+
+    /* Stop for now if there are not enough iovecs available. */
+    if (*num >= VIRTQUEUE_MAX_SIZE) {
+        return -ENOBUFS;
+    }
+
+    /* TODO handle non-contiguous memory across region boundaries */
+    iov->iov_base = vring_map(&mr, desc->addr, desc->len,
+                              desc->flags & VRING_DESC_F_WRITE);
+    if (!iov->iov_base) {
+        error_report("Failed to map descriptor addr %#" PRIx64 " len %u",
+                     (uint64_t)desc->addr, desc->len);
+        return -EFAULT;
+    }
+
+    /* The MemoryRegion is looked up again and unref'ed later, leave the
+     * ref in place.  */
+    iov->iov_len = desc->len;
+    *addr = desc->addr;
+    *num += 1;
+    return 0;
+}
+
 /* This is stolen from linux/drivers/vhost/vhost.c. */
-static int get_indirect(Vring *vring,
-                        struct iovec iov[], struct iovec *iov_end,
-                        unsigned int *out_num, unsigned int *in_num,
+static int get_indirect(Vring *vring, VirtQueueElement *elem,
                         struct vring_desc *indirect)
 {
     struct vring_desc desc;
     unsigned int i = 0, count, found = 0;
+    int ret;
 
     /* Sanity check */
     if (unlikely(indirect->len % sizeof(desc))) {
@@ -139,11 +229,12 @@ static int get_indirect(Vring *vring,
 
     do {
         struct vring_desc *desc_ptr;
+        MemoryRegion *mr;
 
         /* Translate indirect descriptor */
-        desc_ptr = hostmem_lookup(&vring->hostmem,
-                                  indirect->addr + found * sizeof(desc),
-                                  sizeof(desc), false);
+        desc_ptr = vring_map(&mr,
+                             indirect->addr + found * sizeof(desc),
+                             sizeof(desc), false);
         if (!desc_ptr) {
             error_report("Failed to map indirect descriptor "
                          "addr %#" PRIx64 " len %zu",
@@ -153,6 +244,7 @@ static int get_indirect(Vring *vring,
             return -EFAULT;
         }
         desc = *desc_ptr;
+        memory_region_unref(mr);
 
         /* Ensure descriptor has been loaded before accessing fields */
         barrier(); /* read_barrier_depends(); */
@@ -170,42 +262,35 @@ static int get_indirect(Vring *vring,
             return -EFAULT;
         }
 
-        /* Stop for now if there are not enough iovecs available. */
-        if (iov >= iov_end) {
-            return -ENOBUFS;
-        }
-
-        iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
-                                       desc.flags & VRING_DESC_F_WRITE);
-        if (!iov->iov_base) {
-            error_report("Failed to map indirect descriptor"
-                         "addr %#" PRIx64 " len %u",
-                         (uint64_t)desc.addr, desc.len);
-            vring->broken = true;
-            return -EFAULT;
-        }
-        iov->iov_len = desc.len;
-        iov++;
-
-        /* If this is an input descriptor, increment that count. */
-        if (desc.flags & VRING_DESC_F_WRITE) {
-            *in_num += 1;
-        } else {
-            /* If it's an output descriptor, they're all supposed
-             * to come before any input descriptors. */
-            if (unlikely(*in_num)) {
-                error_report("Indirect descriptor "
-                             "has out after in: idx %u", i);
-                vring->broken = true;
-                return -EFAULT;
-            }
-            *out_num += 1;
+        ret = get_desc(vring, elem, &desc);
+        if (ret < 0) {
+            vring->broken |= (ret == -EFAULT);
+            return ret;
         }
         i = desc.next;
     } while (desc.flags & VRING_DESC_F_NEXT);
     return 0;
 }
 
+void vring_free_element(VirtQueueElement *elem)
+{
+    int i;
+
+    /* This assumes that the iovecs, if changed, are never moved past
+     * the end of the valid area.  This is true if iovec manipulations
+     * are done with iov_discard_front and iov_discard_back.
+     */
+    for (i = 0; i < elem->out_num; i++) {
+        vring_unmap(elem->out_sg[i].iov_base, false);
+    }
+
+    for (i = 0; i < elem->in_num; i++) {
+        vring_unmap(elem->in_sg[i].iov_base, true);
+    }
+
+    g_slice_free(VirtQueueElement, elem);
+}
+
 /* This looks in the virtqueue and for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
@@ -218,16 +303,18 @@ static int get_indirect(Vring *vring,
  * Stolen from linux/drivers/vhost/vhost.c.
  */
 int vring_pop(VirtIODevice *vdev, Vring *vring,
-              struct iovec iov[], struct iovec *iov_end,
-              unsigned int *out_num, unsigned int *in_num)
+              VirtQueueElement **p_elem)
 {
     struct vring_desc desc;
     unsigned int i, head, found = 0, num = vring->vr.num;
     uint16_t avail_idx, last_avail_idx;
+    VirtQueueElement *elem = NULL;
+    int ret;
 
     /* If there was a fatal error then refuse operation */
     if (vring->broken) {
-        return -EFAULT;
+        ret = -EFAULT;
+        goto out;
     }
 
     /* Check it isn't doing very strange things with descriptor numbers. */
@@ -238,13 +325,14 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
     if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) {
         error_report("Guest moved used index from %u to %u",
                      last_avail_idx, avail_idx);
-        vring->broken = true;
-        return -EFAULT;
+        ret = -EFAULT;
+        goto out;
     }
 
     /* If there's nothing new since last we looked. */
     if (avail_idx == last_avail_idx) {
-        return -EAGAIN;
+        ret = -EAGAIN;
+        goto out;
     }
 
     /* Only get avail ring entries after they have been exposed by guest. */
@@ -254,32 +342,33 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
      * the index we've seen. */
     head = vring->vr.avail->ring[last_avail_idx % num];
 
+    elem = g_slice_new(VirtQueueElement);
+    elem->index = head;
+    elem->in_num = elem->out_num = 0;
+    
     /* If their number is silly, that's an error. */
     if (unlikely(head >= num)) {
         error_report("Guest says index %u > %u is available", head, num);
-        vring->broken = true;
-        return -EFAULT;
+        ret = -EFAULT;
+        goto out;
     }
 
     if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
         vring_avail_event(&vring->vr) = vring->vr.avail->idx;
     }
 
-    /* When we start there are none of either input nor output. */
-    *out_num = *in_num = 0;
-
     i = head;
     do {
         if (unlikely(i >= num)) {
             error_report("Desc index is %u > %u, head = %u", i, num, head);
-            vring->broken = true;
-            return -EFAULT;
+            ret = -EFAULT;
+            goto out;
         }
         if (unlikely(++found > num)) {
             error_report("Loop detected: last one at %u vq size %u head %u",
                          i, num, head);
-            vring->broken = true;
-            return -EFAULT;
+            ret = -EFAULT;
+            goto out;
         }
         desc = vring->vr.desc[i];
 
@@ -287,64 +376,50 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
         barrier();
 
         if (desc.flags & VRING_DESC_F_INDIRECT) {
-            int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
+            int ret = get_indirect(vring, elem, &desc);
             if (ret < 0) {
-                return ret;
+                goto out;
             }
             continue;
         }
 
-        /* If there are not enough iovecs left, stop for now.  The caller
-         * should check if there are more descs available once they have dealt
-         * with the current set.
-         */
-        if (iov >= iov_end) {
-            return -ENOBUFS;
+        ret = get_desc(vring, elem, &desc);
+        if (ret < 0) {
+            goto out;
         }
 
-        /* TODO handle non-contiguous memory across region boundaries */
-        iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
-                                       desc.flags & VRING_DESC_F_WRITE);
-        if (!iov->iov_base) {
-            error_report("Failed to map vring desc addr %#" PRIx64 " len %u",
-                         (uint64_t)desc.addr, desc.len);
-            vring->broken = true;
-            return -EFAULT;
-        }
-        iov->iov_len  = desc.len;
-        iov++;
-
-        if (desc.flags & VRING_DESC_F_WRITE) {
-            /* If this is an input descriptor,
-             * increment that count. */
-            *in_num += 1;
-        } else {
-            /* If it's an output descriptor, they're all supposed
-             * to come before any input descriptors. */
-            if (unlikely(*in_num)) {
-                error_report("Descriptor has out after in: idx %d", i);
-                vring->broken = true;
-                return -EFAULT;
-            }
-            *out_num += 1;
-        }
         i = desc.next;
     } while (desc.flags & VRING_DESC_F_NEXT);
 
     /* On success, increment avail index. */
     vring->last_avail_idx++;
+    *p_elem = elem;
     return head;
+
+out:
+    assert(ret < 0);
+    if (ret == -EFAULT) {
+        vring->broken = true;
+    }
+    if (elem) {
+        vring_free_element(elem);
+    }
+    *p_elem = NULL;
+    return ret;
 }
 
 /* After we've used one of their buffers, we tell them about it.
  *
  * Stolen from linux/drivers/vhost/vhost.c.
  */
-void vring_push(Vring *vring, unsigned int head, int len)
+void vring_push(Vring *vring, VirtQueueElement *elem, int len)
 {
     struct vring_used_elem *used;
+    unsigned int head = elem->index;
     uint16_t new;
 
+    vring_free_element(elem);
+
     /* Don't touch vring if a fatal error occurred */
     if (vring->broken) {
         return;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 144b9ca2ef..a001e668c4 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1172,6 +1172,8 @@ static void virtio_device_unrealize(DeviceState *dev, Error **errp)
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
     Error *err = NULL;
 
+    virtio_bus_device_unplugged(vdev);
+
     if (vdc->unrealize != NULL) {
         vdc->unrealize(dev, &err);
         if (err != NULL) {
diff --git a/hw/xen/xen_apic.c b/hw/xen/xen_apic.c
index 9f91e0f0c9..63bb7f77c6 100644
--- a/hw/xen/xen_apic.c
+++ b/hw/xen/xen_apic.c
@@ -36,8 +36,10 @@ static const MemoryRegionOps xen_apic_io_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void xen_apic_init(APICCommonState *s)
+static void xen_apic_realize(DeviceState *dev, Error **errp)
 {
+    APICCommonState *s = APIC_COMMON(dev);
+
     memory_region_init_io(&s->io_memory, OBJECT(s), &xen_apic_io_ops, s,
                           "xen-apic-msi", APIC_SPACE_SIZE);
 
@@ -72,7 +74,7 @@ static void xen_apic_class_init(ObjectClass *klass, void *data)
 {
     APICCommonClass *k = APIC_COMMON_CLASS(klass);
 
-    k->init = xen_apic_init;
+    k->realize = xen_apic_realize;
     k->set_base = xen_apic_set_base;
     k->set_tpr = xen_apic_set_tpr;
     k->get_tpr = xen_apic_get_tpr;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8b132d7178..2772f2f1bd 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -394,8 +394,9 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
 
 /**
  * commit_start:
- * @bs: Top Block device
- * @base: Block device that will be written into, and become the new top
+ * @bs: Active block device.
+ * @top: Top block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
  * @cb: Completion function for the job.
@@ -407,7 +408,22 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
                  BlockDriverState *top, int64_t speed,
                  BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
                  void *opaque, Error **errp);
-
+/**
+ * commit_active_start:
+ * @bs: Active block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
+                         int64_t speed,
+                         BlockdevOnError on_error,
+                         BlockDriverCompletionFunc *cb,
+                         void *opaque, Error **errp);
 /*
  * mirror_start:
  * @bs: Block device to operate on.
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index b6998f055a..4cb4b4a53a 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -21,6 +21,7 @@
 
 #include "qemu-common.h"
 #include "exec/cpu-common.h"
+#include "exec/memory.h"
 #include "qemu/thread.h"
 #include "qom/cpu.h"
 
@@ -459,7 +460,7 @@ typedef struct RAMBlock {
 typedef struct RAMList {
     QemuMutex mutex;
     /* Protected by the iothread lock.  */
-    uint8_t *phys_dirty;
+    unsigned long *dirty_memory[DIRTY_MEMORY_NUM];
     RAMBlock *mru_block;
     /* Protected by the ramlist lock.  */
     QTAILQ_HEAD(, RAMBlock) blocks;
diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h
index d0e063392a..25c43c06e9 100644
--- a/include/exec/memory-internal.h
+++ b/include/exec/memory-internal.h
@@ -20,9 +20,6 @@
 #define MEMORY_INTERNAL_H
 
 #ifndef CONFIG_USER_ONLY
-#include "hw/xen/xen.h"
-
-
 typedef struct AddressSpaceDispatch AddressSpaceDispatch;
 
 void address_space_init_dispatch(AddressSpace *as);
@@ -33,92 +30,5 @@ extern const MemoryRegionOps unassigned_mem_ops;
 bool memory_region_access_valid(MemoryRegion *mr, hwaddr addr,
                                 unsigned size, bool is_write);
 
-ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
-                                   MemoryRegion *mr);
-ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr);
-void *qemu_get_ram_ptr(ram_addr_t addr);
-void qemu_ram_free(ram_addr_t addr);
-void qemu_ram_free_from_ptr(ram_addr_t addr);
-
-#define VGA_DIRTY_FLAG       0x01
-#define CODE_DIRTY_FLAG      0x02
-#define MIGRATION_DIRTY_FLAG 0x08
-
-static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
-{
-    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS];
-}
-
-/* read dirty bit (return 0 or 1) */
-static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
-{
-    return cpu_physical_memory_get_dirty_flags(addr) == 0xff;
-}
-
-static inline int cpu_physical_memory_get_dirty(ram_addr_t start,
-                                                ram_addr_t length,
-                                                int dirty_flags)
-{
-    int ret = 0;
-    ram_addr_t addr, end;
-
-    end = TARGET_PAGE_ALIGN(start + length);
-    start &= TARGET_PAGE_MASK;
-    for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
-        ret |= cpu_physical_memory_get_dirty_flags(addr) & dirty_flags;
-    }
-    return ret;
-}
-
-static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
-                                                      int dirty_flags)
-{
-    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags;
-}
-
-static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
-{
-    cpu_physical_memory_set_dirty_flags(addr, 0xff);
-}
-
-static inline int cpu_physical_memory_clear_dirty_flags(ram_addr_t addr,
-                                                        int dirty_flags)
-{
-    int mask = ~dirty_flags;
-
-    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] &= mask;
-}
-
-static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
-                                                       ram_addr_t length,
-                                                       int dirty_flags)
-{
-    ram_addr_t addr, end;
-
-    end = TARGET_PAGE_ALIGN(start + length);
-    start &= TARGET_PAGE_MASK;
-    for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
-        cpu_physical_memory_set_dirty_flags(addr, dirty_flags);
-    }
-    xen_modified_memory(addr, length);
-}
-
-static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
-                                                        ram_addr_t length,
-                                                        int dirty_flags)
-{
-    ram_addr_t addr, end;
-
-    end = TARGET_PAGE_ALIGN(start + length);
-    start &= TARGET_PAGE_MASK;
-    for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
-        cpu_physical_memory_clear_dirty_flags(addr, dirty_flags);
-    }
-}
-
-void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
-                                     int dirty_flags);
-
 #endif
-
 #endif
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 480dfbf9da..296d6ab2f4 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -16,6 +16,11 @@
 
 #ifndef CONFIG_USER_ONLY
 
+#define DIRTY_MEMORY_VGA       0
+#define DIRTY_MEMORY_CODE      1
+#define DIRTY_MEMORY_MIGRATION 2
+#define DIRTY_MEMORY_NUM       3        /* num of dirty bits */
+
 #include <stdint.h>
 #include <stdbool.h>
 #include "qemu-common.h"
@@ -33,13 +38,6 @@
 typedef struct MemoryRegionOps MemoryRegionOps;
 typedef struct MemoryRegionMmio MemoryRegionMmio;
 
-/* Must match *_DIRTY_FLAGS in cpu-all.h.  To be replaced with dynamic
- * registration.
- */
-#define DIRTY_MEMORY_VGA       0
-#define DIRTY_MEMORY_CODE      1
-#define DIRTY_MEMORY_MIGRATION 3
-
 struct MemoryRegionMmio {
     CPUReadMemoryFunc *read[3];
     CPUWriteMemoryFunc *write[3];
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
new file mode 100644
index 0000000000..33c8acc02e
--- /dev/null
+++ b/include/exec/ram_addr.h
@@ -0,0 +1,147 @@
+/*
+ * Declarations for cpu physical memory functions
+ *
+ * Copyright 2011 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *  Avi Kivity <avi@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ *
+ */
+
+/*
+ * This header is for use by exec.c and memory.c ONLY.  Do not include it.
+ * The functions declared here will be removed soon.
+ */
+
+#ifndef RAM_ADDR_H
+#define RAM_ADDR_H
+
+#ifndef CONFIG_USER_ONLY
+#include "hw/xen/xen.h"
+
+ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
+                                   MemoryRegion *mr);
+ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr);
+void *qemu_get_ram_ptr(ram_addr_t addr);
+void qemu_ram_free(ram_addr_t addr);
+void qemu_ram_free_from_ptr(ram_addr_t addr);
+
+static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
+                                                 ram_addr_t length,
+                                                 unsigned client)
+{
+    unsigned long end, page, next;
+
+    assert(client < DIRTY_MEMORY_NUM);
+
+    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
+    page = start >> TARGET_PAGE_BITS;
+    next = find_next_bit(ram_list.dirty_memory[client], end, page);
+
+    return next < end;
+}
+
+static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
+                                                      unsigned client)
+{
+    return cpu_physical_memory_get_dirty(addr, 1, client);
+}
+
+static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
+{
+    bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
+    bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
+    bool migration =
+        cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
+    return !(vga && code && migration);
+}
+
+static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
+                                                      unsigned client)
+{
+    assert(client < DIRTY_MEMORY_NUM);
+    set_bit(addr >> TARGET_PAGE_BITS, ram_list.dirty_memory[client]);
+}
+
+static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
+                                                       ram_addr_t length)
+{
+    unsigned long end, page;
+
+    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
+    page = start >> TARGET_PAGE_BITS;
+    bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION], page, end - page);
+    bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_VGA], page, end - page);
+    bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_CODE], page, end - page);
+    xen_modified_memory(start, length);
+}
+
+static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
+                                                          ram_addr_t start,
+                                                          ram_addr_t pages)
+{
+    unsigned long i, j;
+    unsigned long page_number, c;
+    hwaddr addr;
+    ram_addr_t ram_addr;
+    unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
+    unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
+    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
+
+    /* start address is aligned at the start of a word? */
+    if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
+        long k;
+        long nr = BITS_TO_LONGS(pages);
+
+        for (k = 0; k < nr; k++) {
+            if (bitmap[k]) {
+                unsigned long temp = leul_to_cpu(bitmap[k]);
+
+                ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION][page + k] |= temp;
+                ram_list.dirty_memory[DIRTY_MEMORY_VGA][page + k] |= temp;
+                ram_list.dirty_memory[DIRTY_MEMORY_CODE][page + k] |= temp;
+            }
+        }
+        xen_modified_memory(start, pages);
+    } else {
+        /*
+         * bitmap-traveling is faster than memory-traveling (for addr...)
+         * especially when most of the memory is not dirty.
+         */
+        for (i = 0; i < len; i++) {
+            if (bitmap[i] != 0) {
+                c = leul_to_cpu(bitmap[i]);
+                do {
+                    j = ffsl(c) - 1;
+                    c &= ~(1ul << j);
+                    page_number = (i * HOST_LONG_BITS + j) * hpratio;
+                    addr = page_number * TARGET_PAGE_SIZE;
+                    ram_addr = start + addr;
+                    cpu_physical_memory_set_dirty_range(ram_addr,
+                                       TARGET_PAGE_SIZE * hpratio);
+                } while (c != 0);
+            }
+        }
+    }
+}
+
+static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
+                                                         ram_addr_t length,
+                                                         unsigned client)
+{
+    unsigned long end, page;
+
+    assert(client < DIRTY_MEMORY_NUM);
+    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
+    page = start >> TARGET_PAGE_BITS;
+    bitmap_clear(ram_list.dirty_memory[client], page, end - page);
+}
+
+void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
+                                     unsigned client);
+
+#endif
+#endif
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 2365274daa..806ae13780 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -152,7 +152,8 @@ enum {
     float_round_nearest_even = 0,
     float_round_down         = 1,
     float_round_up           = 2,
-    float_round_to_zero      = 3
+    float_round_to_zero      = 3,
+    float_round_ties_away    = 4,
 };
 
 /*----------------------------------------------------------------------------
@@ -180,12 +181,22 @@ typedef struct float_status {
     flag default_nan_mode;
 } float_status;
 
-void set_float_rounding_mode(int val STATUS_PARAM);
-void set_float_exception_flags(int val STATUS_PARAM);
 INLINE void set_float_detect_tininess(int val STATUS_PARAM)
 {
     STATUS(float_detect_tininess) = val;
 }
+INLINE void set_float_rounding_mode(int val STATUS_PARAM)
+{
+    STATUS(float_rounding_mode) = val;
+}
+INLINE void set_float_exception_flags(int val STATUS_PARAM)
+{
+    STATUS(float_exception_flags) = val;
+}
+INLINE void set_floatx80_rounding_precision(int val STATUS_PARAM)
+{
+    STATUS(floatx80_rounding_precision) = val;
+}
 INLINE void set_flush_to_zero(flag val STATUS_PARAM)
 {
     STATUS(flush_to_zero) = val;
@@ -198,11 +209,34 @@ INLINE void set_default_nan_mode(flag val STATUS_PARAM)
 {
     STATUS(default_nan_mode) = val;
 }
+INLINE int get_float_detect_tininess(float_status *status)
+{
+    return STATUS(float_detect_tininess);
+}
+INLINE int get_float_rounding_mode(float_status *status)
+{
+    return STATUS(float_rounding_mode);
+}
 INLINE int get_float_exception_flags(float_status *status)
 {
     return STATUS(float_exception_flags);
 }
-void set_floatx80_rounding_precision(int val STATUS_PARAM);
+INLINE int get_floatx80_rounding_precision(float_status *status)
+{
+    return STATUS(floatx80_rounding_precision);
+}
+INLINE flag get_flush_to_zero(float_status *status)
+{
+    return STATUS(flush_to_zero);
+}
+INLINE flag get_flush_inputs_to_zero(float_status *status)
+{
+    return STATUS(flush_inputs_to_zero);
+}
+INLINE flag get_default_nan_mode(float_status *status)
+{
+    return STATUS(default_nan_mode);
+}
 
 /*----------------------------------------------------------------------------
 | Routine to raise any or all of the software IEC/IEEE floating-point
@@ -225,25 +259,48 @@ enum {
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE integer-to-floating-point conversion routines.
 *----------------------------------------------------------------------------*/
-float32 int32_to_float32( int32 STATUS_PARAM );
-float64 int32_to_float64( int32 STATUS_PARAM );
-float32 uint32_to_float32( uint32 STATUS_PARAM );
-float64 uint32_to_float64( uint32 STATUS_PARAM );
-floatx80 int32_to_floatx80( int32 STATUS_PARAM );
-float128 int32_to_float128( int32 STATUS_PARAM );
-float32 int64_to_float32( int64 STATUS_PARAM );
-float32 uint64_to_float32( uint64 STATUS_PARAM );
-float64 int64_to_float64( int64 STATUS_PARAM );
-float64 uint64_to_float64( uint64 STATUS_PARAM );
-floatx80 int64_to_floatx80( int64 STATUS_PARAM );
-float128 int64_to_float128( int64 STATUS_PARAM );
-float128 uint64_to_float128( uint64 STATUS_PARAM );
+float32 int32_to_float32(int32_t STATUS_PARAM);
+float64 int32_to_float64(int32_t STATUS_PARAM);
+float32 uint32_to_float32(uint32_t STATUS_PARAM);
+float64 uint32_to_float64(uint32_t STATUS_PARAM);
+floatx80 int32_to_floatx80(int32_t STATUS_PARAM);
+float128 int32_to_float128(int32_t STATUS_PARAM);
+float32 int64_to_float32(int64_t STATUS_PARAM);
+float32 uint64_to_float32(uint64_t STATUS_PARAM);
+float64 int64_to_float64(int64_t STATUS_PARAM);
+float64 uint64_to_float64(uint64_t STATUS_PARAM);
+floatx80 int64_to_floatx80(int64_t STATUS_PARAM);
+float128 int64_to_float128(int64_t STATUS_PARAM);
+float128 uint64_to_float128(uint64_t STATUS_PARAM);
+
+/* We provide the int16 versions for symmetry of API with float-to-int */
+INLINE float32 int16_to_float32(int16_t v STATUS_PARAM)
+{
+    return int32_to_float32(v STATUS_VAR);
+}
+
+INLINE float32 uint16_to_float32(uint16_t v STATUS_PARAM)
+{
+    return uint32_to_float32(v STATUS_VAR);
+}
+
+INLINE float64 int16_to_float64(int16_t v STATUS_PARAM)
+{
+    return int32_to_float64(v STATUS_VAR);
+}
+
+INLINE float64 uint16_to_float64(uint16_t v STATUS_PARAM)
+{
+    return uint32_to_float64(v STATUS_VAR);
+}
 
 /*----------------------------------------------------------------------------
 | Software half-precision conversion routines.
 *----------------------------------------------------------------------------*/
 float16 float32_to_float16( float32, flag STATUS_PARAM );
 float32 float16_to_float32( float16, flag STATUS_PARAM );
+float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM);
+float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM);
 
 /*----------------------------------------------------------------------------
 | Software half-precision operations.
@@ -265,6 +322,8 @@ extern const float16 float16_default_nan;
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE single-precision conversion routines.
 *----------------------------------------------------------------------------*/
+int_fast16_t float32_to_int16(float32 STATUS_PARAM);
+uint_fast16_t float32_to_uint16(float32 STATUS_PARAM);
 int_fast16_t float32_to_int16_round_to_zero(float32 STATUS_PARAM);
 uint_fast16_t float32_to_uint16_round_to_zero(float32 STATUS_PARAM);
 int32 float32_to_int32( float32 STATUS_PARAM );
@@ -272,6 +331,7 @@ int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM );
 uint32 float32_to_uint32( float32 STATUS_PARAM );
 uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
 int64 float32_to_int64( float32 STATUS_PARAM );
+uint64 float32_to_uint64(float32 STATUS_PARAM);
 int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
 float64 float32_to_float64( float32 STATUS_PARAM );
 floatx80 float32_to_floatx80( float32 STATUS_PARAM );
@@ -371,6 +431,8 @@ extern const float32 float32_default_nan;
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE double-precision conversion routines.
 *----------------------------------------------------------------------------*/
+int_fast16_t float64_to_int16(float64 STATUS_PARAM);
+uint_fast16_t float64_to_uint16(float64 STATUS_PARAM);
 int_fast16_t float64_to_int16_round_to_zero(float64 STATUS_PARAM);
 uint_fast16_t float64_to_uint16_round_to_zero(float64 STATUS_PARAM);
 int32 float64_to_int32( float64 STATUS_PARAM );
diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h
index 6bbcb1750d..3e53297a99 100644
--- a/include/hw/acpi/acpi.h
+++ b/include/hw/acpi/acpi.h
@@ -69,6 +69,12 @@
 #define ACPI_BITMASK_RT_CLOCK_ENABLE            0x0400
 #define ACPI_BITMASK_PCIEXP_WAKE_DISABLE        0x4000	/* ACPI 3.0 */
 
+#define ACPI_BITMASK_PM1_COMMON_ENABLED         ( \
+        ACPI_BITMASK_RT_CLOCK_ENABLE        | \
+        ACPI_BITMASK_POWER_BUTTON_ENABLE    | \
+        ACPI_BITMASK_GLOBAL_LOCK_ENABLE     | \
+        ACPI_BITMASK_TIMER_ENABLE)
+
 /* PM1x_CNT */
 #define ACPI_BITMASK_SCI_ENABLE                 0x0001
 #define ACPI_BITMASK_BUS_MASTER_RLD             0x0002
@@ -160,6 +166,8 @@ void acpi_gpe_reset(ACPIREGS *ar);
 void acpi_gpe_ioport_writeb(ACPIREGS *ar, uint32_t addr, uint32_t val);
 uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr);
 
+void acpi_update_sci(ACPIREGS *acpi_regs, qemu_irq irq);
+
 /* acpi.c */
 extern int acpi_enabled;
 extern char unsigned *acpi_tables;
diff --git a/include/hw/arm/pxa.h b/include/hw/arm/pxa.h
index a4e1a66264..7ca330a61f 100644
--- a/include/hw/arm/pxa.h
+++ b/include/hw/arm/pxa.h
@@ -102,15 +102,15 @@ void pxa2xx_pcmcia_set_irq_cb(void *opaque, qemu_irq irq, qemu_irq cd_irq);
 
 /* pxa2xx_keypad.c */
 struct  keymap {
-    int column;
-    int row;
+    int8_t column;
+    int8_t row;
 };
 typedef struct PXA2xxKeyPadState PXA2xxKeyPadState;
 PXA2xxKeyPadState *pxa27x_keypad_init(MemoryRegion *sysmem,
                                       hwaddr base,
                                       qemu_irq irq);
-void pxa27x_register_keypad(PXA2xxKeyPadState *kp, struct keymap *map,
-                int size);
+void pxa27x_register_keypad(PXA2xxKeyPadState *kp,
+                            const struct keymap *map, int size);
 
 /* pxa2xx.c */
 typedef struct PXA2xxI2CState PXA2xxI2CState;
diff --git a/include/hw/cpu/icc_bus.h b/include/hw/cpu/icc_bus.h
index b5500708dc..98a979fa1c 100644
--- a/include/hw/cpu/icc_bus.h
+++ b/include/hw/cpu/icc_bus.h
@@ -66,7 +66,7 @@ typedef struct ICCDeviceClass {
     DeviceClass parent_class;
     /*< public >*/
 
-    int (*init)(ICCDevice *dev); /* TODO replace with QOM realize */
+    DeviceRealize realize;
 } ICCDeviceClass;
 
 #define TYPE_ICC_DEVICE "icc-device"
diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h
index 1b0a7fbfad..70542a6f43 100644
--- a/include/hw/i386/apic_internal.h
+++ b/include/hw/i386/apic_internal.h
@@ -80,7 +80,7 @@ typedef struct APICCommonClass
 {
     ICCDeviceClass parent_class;
 
-    void (*init)(APICCommonState *s);
+    DeviceRealize realize;
     void (*set_base)(APICCommonState *s, uint64_t val);
     void (*set_tpr)(APICCommonState *s, uint8_t val);
     uint8_t (*get_tpr)(APICCommonState *s);
diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h
index 25576c819e..3be3352185 100644
--- a/include/hw/i386/ioapic_internal.h
+++ b/include/hw/i386/ioapic_internal.h
@@ -83,7 +83,8 @@ typedef struct IOAPICCommonState IOAPICCommonState;
 
 typedef struct IOAPICCommonClass {
     SysBusDeviceClass parent_class;
-    void (*init)(IOAPICCommonState *s, int instance_no);
+
+    DeviceRealize realize;
     void (*pre_save)(IOAPICCommonState *s);
     void (*post_load)(IOAPICCommonState *s);
 } IOAPICCommonClass;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 24eb3de310..eb3da964f0 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -182,6 +182,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,
                     MemoryRegion *address_space_mem,
                     MemoryRegion *address_space_io,
                     ram_addr_t ram_size,
+                    ram_addr_t below_4g_mem_size,
                     ram_addr_t above_4g_mem_size,
                     MemoryRegion *pci_memory,
                     MemoryRegion *ram_memory);
diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h
index 4f381bdce7..0d232dfb67 100644
--- a/include/hw/intc/arm_gic_common.h
+++ b/include/hw/intc/arm_gic_common.h
@@ -37,7 +37,7 @@ typedef struct gic_irq_state {
     uint8_t active;
     uint8_t level;
     bool model; /* 0 = N:N, 1 = 1:N */
-    bool trigger; /* nonzero = edge triggered.  */
+    bool edge_trigger; /* true: edge-triggered, false: level-triggered  */
 } gic_irq_state;
 
 typedef struct GICState {
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b783e68d08..754b82de81 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -373,7 +373,6 @@ void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
 void pci_device_set_intx_routing_notifier(PCIDevice *dev,
                                           PCIINTxRoutingNotifier notifier);
 void pci_device_reset(PCIDevice *dev);
-void pci_bus_reset(PCIBus *bus);
 
 PCIDevice *pci_nic_init(NICInfo *nd, PCIBus *rootbus,
                         const char *default_model,
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index f2043a69c2..2c4f140b9c 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -97,7 +97,18 @@ typedef struct DeviceClass {
     const char *fw_name;
     const char *desc;
     Property *props;
-    int no_user;
+
+    /*
+     * Shall we hide this device model from -device / device_add?
+     * All devices should support instantiation with device_add, and
+     * this flag should not exist.  But we're not there, yet.  Some
+     * devices fail to instantiate with cryptic error messages.
+     * Others instantiate, but don't work.  Exposing users to such
+     * behavior would be cruel; this flag serves to protect them.  It
+     * should never be set without a comment explaining why it is set.
+     * TODO remove once we're there
+     */
+    bool cannot_instantiate_with_device_add_yet;
 
     /* callbacks */
     void (*reset)(DeviceState *dev);
@@ -158,7 +169,7 @@ struct BusClass {
      * bindings can be found at http://playground.sun.com/1275/bindings/.
      */
     char *(*get_fw_dev_path)(DeviceState *dev);
-    int (*reset)(BusState *bus);
+    void (*reset)(BusState *bus);
     /* maximum devices allowed on the bus, 0: no limit. */
     int max_dev;
 };
@@ -253,10 +264,15 @@ BusState *qbus_create(const char *typename, DeviceState *parent, const char *nam
 /* Returns > 0 if either devfn or busfn skip walk somewhere in cursion,
  *         < 0 if either devfn or busfn terminate walk somewhere in cursion,
  *           0 otherwise. */
-int qbus_walk_children(BusState *bus, qdev_walkerfn *devfn,
-                       qbus_walkerfn *busfn, void *opaque);
-int qdev_walk_children(DeviceState *dev, qdev_walkerfn *devfn,
-                       qbus_walkerfn *busfn, void *opaque);
+int qbus_walk_children(BusState *bus,
+                       qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn,
+                       qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn,
+                       void *opaque);
+int qdev_walk_children(DeviceState *dev,
+                       qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn,
+                       qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn,
+                       void *opaque);
+
 void qdev_reset_all(DeviceState *dev);
 
 /**
@@ -272,8 +288,6 @@ void qdev_reset_all(DeviceState *dev);
 void qbus_reset_all(BusState *bus);
 void qbus_reset_all_fn(void *opaque);
 
-void qbus_free(BusState *bus);
-
 /* This should go away once we get rid of the NULL bus hack */
 BusState *sysbus_get_default(void);
 
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 692f82e935..77c6f7c037 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -122,8 +122,25 @@ extern PropertyInfo qdev_prop_arraylen;
 #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d)                   \
     DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t)
 
+/*
+ * Please avoid pointer properties.  If you must use them, you must
+ * cover them in their device's class init function as follows:
+ *
+ * - If the property must be set, the device cannot be used with
+ *   device_add, so add code like this:
+ *   |* Reason: pointer property "NAME-OF-YOUR-PROP" *|
+ *   DeviceClass *dc = DEVICE_CLASS(class);
+ *   dc->cannot_instantiate_with_device_add_yet = true;
+ *
+ * - If the property may safely remain null, document it like this:
+ *   |*
+ *    * Note: pointer property "interrupt_vector" may remain null, thus
+ *    * no need for dc->cannot_instantiate_with_device_add_yet = true;
+ *    *|
+ */
 #define DEFINE_PROP_PTR(_n, _s, _f)             \
     DEFINE_PROP(_n, _s, _f, qdev_prop_ptr, void*)
+
 #define DEFINE_PROP_CHR(_n, _s, _f)             \
     DEFINE_PROP(_n, _s, _f, qdev_prop_chr, CharDriverState*)
 #define DEFINE_PROP_STRING(_n, _s, _f)             \
diff --git a/include/hw/virtio/dataplane/hostmem.h b/include/hw/virtio/dataplane/hostmem.h
deleted file mode 100644
index 2810f4b44e..0000000000
--- a/include/hw/virtio/dataplane/hostmem.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Thread-safe guest to host memory mapping
- *
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef HOSTMEM_H
-#define HOSTMEM_H
-
-#include "exec/memory.h"
-#include "qemu/thread.h"
-
-typedef struct {
-    MemoryRegion *mr;
-    void *host_addr;
-    hwaddr guest_addr;
-    uint64_t size;
-    bool readonly;
-} HostMemRegion;
-
-typedef struct {
-    /* The listener is invoked when regions change and a new list of regions is
-     * built up completely before they are installed.
-     */
-    MemoryListener listener;
-    HostMemRegion *new_regions;
-    size_t num_new_regions;
-
-    /* Current regions are accessed from multiple threads either to lookup
-     * addresses or to install a new list of regions.  The lock protects the
-     * pointer and the regions.
-     */
-    QemuMutex current_regions_lock;
-    HostMemRegion *current_regions;
-    size_t num_current_regions;
-} HostMem;
-
-void hostmem_init(HostMem *hostmem);
-void hostmem_finalize(HostMem *hostmem);
-
-/**
- * Map a guest physical address to a pointer
- *
- * Note that there is map/unmap mechanism here.  The caller must ensure that
- * mapped memory is no longer used across events like hot memory unplug.  This
- * can be done with other mechanisms like bdrv_drain_all() that quiesce
- * in-flight I/O.
- */
-void *hostmem_lookup(HostMem *hostmem, hwaddr phys, hwaddr len, bool is_write);
-
-#endif /* HOSTMEM_H */
diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h
index c0b69ff18f..63e7bf4256 100644
--- a/include/hw/virtio/dataplane/vring.h
+++ b/include/hw/virtio/dataplane/vring.h
@@ -19,11 +19,10 @@
 
 #include <linux/virtio_ring.h>
 #include "qemu-common.h"
-#include "hostmem.h"
 #include "hw/virtio/virtio.h"
 
 typedef struct {
-    HostMem hostmem;                /* guest memory mapper */
+    MemoryRegion *mr;               /* memory region containing the vring */
     struct vring vr;                /* virtqueue vring mapped to host memory */
     uint16_t last_avail_idx;        /* last processed avail ring index */
     uint16_t last_used_idx;         /* last processed used ring index */
@@ -54,9 +53,8 @@ void vring_teardown(Vring *vring, VirtIODevice *vdev, int n);
 void vring_disable_notification(VirtIODevice *vdev, Vring *vring);
 bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
 bool vring_should_notify(VirtIODevice *vdev, Vring *vring);
-int vring_pop(VirtIODevice *vdev, Vring *vring,
-              struct iovec iov[], struct iovec *iov_end,
-              unsigned int *out_num, unsigned int *in_num);
-void vring_push(Vring *vring, unsigned int head, int len);
+int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement **elem);
+void vring_push(Vring *vring, VirtQueueElement *elem, int len);
+void vring_free_element(VirtQueueElement *elem);
 
 #endif /* VRING_H */
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 140e6b471c..bfa3951a61 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -23,6 +23,17 @@
 #include "qapi-types.h"
 #include "exec/cpu-common.h"
 
+#define QEMU_VM_FILE_MAGIC           0x5145564d
+#define QEMU_VM_FILE_VERSION_COMPAT  0x00000002
+#define QEMU_VM_FILE_VERSION         0x00000003
+
+#define QEMU_VM_EOF                  0x00
+#define QEMU_VM_SECTION_START        0x01
+#define QEMU_VM_SECTION_PART         0x02
+#define QEMU_VM_SECTION_END          0x03
+#define QEMU_VM_SECTION_FULL         0x04
+#define QEMU_VM_SUBSECTION           0x05
+
 struct MigrationParams {
     bool blk;
     bool shared;
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 0f757fbeb6..a191fb6d8d 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -121,8 +121,11 @@ static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
 void qemu_put_be16(QEMUFile *f, unsigned int v);
 void qemu_put_be32(QEMUFile *f, unsigned int v);
 void qemu_put_be64(QEMUFile *f, uint64_t v);
+int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset);
 int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
+int qemu_peek_byte(QEMUFile *f, int offset);
 int qemu_get_byte(QEMUFile *f);
+void qemu_file_skip(QEMUFile *f, int size);
 void qemu_update_position(QEMUFile *f, size_t size);
 
 static inline unsigned int qemu_get_ubyte(QEMUFile *f)
@@ -141,6 +144,7 @@ void qemu_file_reset_rate_limit(QEMUFile *f);
 void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
 int64_t qemu_file_get_rate_limit(QEMUFile *f);
 int qemu_file_get_error(QEMUFile *f);
+void qemu_file_set_error(QEMUFile *f, int ret);
 void qemu_fflush(QEMUFile *f);
 
 static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 308bbb71e9..1babd5d812 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -31,7 +31,7 @@
  * bitmap_andnot(dst, src1, src2, nbits)	*dst = *src1 & ~(*src2)
  * bitmap_complement(dst, src, nbits)		*dst = ~(*src)
  * bitmap_equal(src1, src2, nbits)		Are *src1 and *src2 equal?
- * bitmap_intersects(src1, src2, nbits) 	Do *src1 and *src2 overlap?
+ * bitmap_intersects(src1, src2, nbits)         Do *src1 and *src2 overlap?
  * bitmap_empty(src, nbits)			Are all bits zero in *src?
  * bitmap_full(src, nbits)			Are all bits set in *src?
  * bitmap_set(dst, pos, nbits)			Set specified bit area
@@ -62,71 +62,71 @@
         )
 
 #define DECLARE_BITMAP(name,bits)                  \
-	unsigned long name[BITS_TO_LONGS(bits)]
+        unsigned long name[BITS_TO_LONGS(bits)]
 
 #define small_nbits(nbits)                      \
-	((nbits) <= BITS_PER_LONG)
+        ((nbits) <= BITS_PER_LONG)
 
-int slow_bitmap_empty(const unsigned long *bitmap, int bits);
-int slow_bitmap_full(const unsigned long *bitmap, int bits);
+int slow_bitmap_empty(const unsigned long *bitmap, long bits);
+int slow_bitmap_full(const unsigned long *bitmap, long bits);
 int slow_bitmap_equal(const unsigned long *bitmap1,
-                   const unsigned long *bitmap2, int bits);
+                      const unsigned long *bitmap2, long bits);
 void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
-                         int bits);
+                            long bits);
 void slow_bitmap_shift_right(unsigned long *dst,
-                          const unsigned long *src, int shift, int bits);
+                             const unsigned long *src, int shift, long bits);
 void slow_bitmap_shift_left(unsigned long *dst,
-                         const unsigned long *src, int shift, int bits);
+                            const unsigned long *src, int shift, long bits);
 int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-                 const unsigned long *bitmap2, int bits);
+                    const unsigned long *bitmap2, long bits);
 void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-                 const unsigned long *bitmap2, int bits);
+                    const unsigned long *bitmap2, long bits);
 void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-                  const unsigned long *bitmap2, int bits);
+                     const unsigned long *bitmap2, long bits);
 int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-                    const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, long bits);
 int slow_bitmap_intersects(const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+                           const unsigned long *bitmap2, long bits);
 
-static inline unsigned long *bitmap_new(int nbits)
+static inline unsigned long *bitmap_new(long nbits)
 {
-    int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+    long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
     return g_malloc0(len);
 }
 
-static inline void bitmap_zero(unsigned long *dst, int nbits)
+static inline void bitmap_zero(unsigned long *dst, long nbits)
 {
     if (small_nbits(nbits)) {
         *dst = 0UL;
     } else {
-        int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+        long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
         memset(dst, 0, len);
     }
 }
 
-static inline void bitmap_fill(unsigned long *dst, int nbits)
+static inline void bitmap_fill(unsigned long *dst, long nbits)
 {
     size_t nlongs = BITS_TO_LONGS(nbits);
     if (!small_nbits(nbits)) {
-        int len = (nlongs - 1) * sizeof(unsigned long);
+        long len = (nlongs - 1) * sizeof(unsigned long);
         memset(dst, 0xff,  len);
     }
     dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
 }
 
 static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
-                               int nbits)
+                               long nbits)
 {
     if (small_nbits(nbits)) {
         *dst = *src;
     } else {
-        int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+        long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
         memcpy(dst, src, len);
     }
 }
 
 static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
-                             const unsigned long *src2, int nbits)
+                             const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         return (*dst = *src1 & *src2) != 0;
@@ -135,7 +135,7 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+                             const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         *dst = *src1 | *src2;
@@ -145,7 +145,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+                              const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         *dst = *src1 ^ *src2;
@@ -155,7 +155,7 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+                                const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         return (*dst = *src1 & ~(*src2)) != 0;
@@ -163,8 +163,9 @@ static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
     return slow_bitmap_andnot(dst, src1, src2, nbits);
 }
 
-static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
-			int nbits)
+static inline void bitmap_complement(unsigned long *dst,
+                                     const unsigned long *src,
+                                     long nbits)
 {
     if (small_nbits(nbits)) {
         *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
@@ -174,7 +175,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
 }
 
 static inline int bitmap_equal(const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+                               const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -183,7 +184,7 @@ static inline int bitmap_equal(const unsigned long *src1,
     }
 }
 
-static inline int bitmap_empty(const unsigned long *src, int nbits)
+static inline int bitmap_empty(const unsigned long *src, long nbits)
 {
     if (small_nbits(nbits)) {
         return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -192,7 +193,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits)
     }
 }
 
-static inline int bitmap_full(const unsigned long *src, int nbits)
+static inline int bitmap_full(const unsigned long *src, long nbits)
 {
     if (small_nbits(nbits)) {
         return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -202,7 +203,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits)
 }
 
 static inline int bitmap_intersects(const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+                                    const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -211,12 +212,21 @@ static inline int bitmap_intersects(const unsigned long *src1,
     }
 }
 
-void bitmap_set(unsigned long *map, int i, int len);
-void bitmap_clear(unsigned long *map, int start, int nr);
+void bitmap_set(unsigned long *map, long i, long len);
+void bitmap_clear(unsigned long *map, long start, long nr);
 unsigned long bitmap_find_next_zero_area(unsigned long *map,
-					 unsigned long size,
-					 unsigned long start,
-					 unsigned int nr,
-					 unsigned long align_mask);
+                                         unsigned long size,
+                                         unsigned long start,
+                                         unsigned long nr,
+                                         unsigned long align_mask);
+
+static inline unsigned long *bitmap_zero_extend(unsigned long *old,
+                                                long old_nbits, long new_nbits)
+{
+    long new_len = BITS_TO_LONGS(new_nbits) * sizeof(unsigned long);
+    unsigned long *new = g_realloc(old, new_len);
+    bitmap_clear(new, old_nbits, new_nbits - old_nbits);
+    return new;
+}
 
 #endif /* BITMAP_H */
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 304c90c2b4..340b1e73bd 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -28,7 +28,7 @@
  * @nr: the bit to set
  * @addr: the address to start counting from
  */
-static inline void set_bit(int nr, unsigned long *addr)
+static inline void set_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -41,7 +41,7 @@ static inline void set_bit(int nr, unsigned long *addr)
  * @nr: Bit to clear
  * @addr: Address to start counting from
  */
-static inline void clear_bit(int nr, unsigned long *addr)
+static inline void clear_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -54,7 +54,7 @@ static inline void clear_bit(int nr, unsigned long *addr)
  * @nr: Bit to change
  * @addr: Address to start counting from
  */
-static inline void change_bit(int nr, unsigned long *addr)
+static inline void change_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -67,7 +67,7 @@ static inline void change_bit(int nr, unsigned long *addr)
  * @nr: Bit to set
  * @addr: Address to count from
  */
-static inline int test_and_set_bit(int nr, unsigned long *addr)
+static inline int test_and_set_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -82,7 +82,7 @@ static inline int test_and_set_bit(int nr, unsigned long *addr)
  * @nr: Bit to clear
  * @addr: Address to count from
  */
-static inline int test_and_clear_bit(int nr, unsigned long *addr)
+static inline int test_and_clear_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -97,7 +97,7 @@ static inline int test_and_clear_bit(int nr, unsigned long *addr)
  * @nr: Bit to change
  * @addr: Address to count from
  */
-static inline int test_and_change_bit(int nr, unsigned long *addr)
+static inline int test_and_change_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -112,7 +112,7 @@ static inline int test_and_change_bit(int nr, unsigned long *addr)
  * @nr: bit number to test
  * @addr: Address to start counting from
  */
-static inline int test_bit(int nr, const unsigned long *addr)
+static inline int test_bit(long nr, const unsigned long *addr)
 {
 	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
 }
diff --git a/include/qom/object.h b/include/qom/object.h
index a275db2092..e0ff212cb6 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -358,7 +358,8 @@ struct ObjectClass
     Type type;
     GSList *interfaces;
 
-    const char *cast_cache[OBJECT_CLASS_CAST_CACHE];
+    const char *object_cast_cache[OBJECT_CLASS_CAST_CACHE];
+    const char *class_cast_cache[OBJECT_CLASS_CAST_CACHE];
 
     ObjectUnparent *unparent;
 };
@@ -535,6 +536,7 @@ struct InterfaceClass
     ObjectClass parent_class;
     /*< private >*/
     ObjectClass *concrete_class;
+    Type interface_type;
 };
 
 #define TYPE_INTERFACE "interface"
diff --git a/kvm-all.c b/kvm-all.c
index 393775459d..0bfb060fa7 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -31,6 +31,7 @@
 #include "sysemu/kvm.h"
 #include "qemu/bswap.h"
 #include "exec/memory.h"
+#include "exec/ram_addr.h"
 #include "exec/address-spaces.h"
 #include "qemu/event_notifier.h"
 #include "trace.h"
@@ -379,31 +380,10 @@ static int kvm_set_migration_log(int enable)
 static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
                                          unsigned long *bitmap)
 {
-    unsigned int i, j;
-    unsigned long page_number, c;
-    hwaddr addr, addr1;
-    unsigned int pages = int128_get64(section->size) / getpagesize();
-    unsigned int len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
-    unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
+    ram_addr_t start = section->offset_within_region + section->mr->ram_addr;
+    ram_addr_t pages = int128_get64(section->size) / getpagesize();
 
-    /*
-     * bitmap-traveling is faster than memory-traveling (for addr...)
-     * especially when most of the memory is not dirty.
-     */
-    for (i = 0; i < len; i++) {
-        if (bitmap[i] != 0) {
-            c = leul_to_cpu(bitmap[i]);
-            do {
-                j = ffsl(c) - 1;
-                c &= ~(1ul << j);
-                page_number = (i * HOST_LONG_BITS + j) * hpratio;
-                addr1 = page_number * TARGET_PAGE_SIZE;
-                addr = section->offset_within_region + addr1;
-                memory_region_set_dirty(section->mr, addr,
-                                        TARGET_PAGE_SIZE * hpratio);
-            } while (c != 0);
-        }
-    }
+    cpu_physical_memory_set_dirty_lebitmap(bitmap, start, pages);
     return 0;
 }
 
diff --git a/linux-user/aarch64/syscall.h b/linux-user/aarch64/syscall.h
index aef419efeb..18f44a8a40 100644
--- a/linux-user/aarch64/syscall.h
+++ b/linux-user/aarch64/syscall.h
@@ -7,3 +7,4 @@ struct target_pt_regs {
 
 #define UNAME_MACHINE "aarch64"
 #define UNAME_MINIMUM_RELEASE "3.8.0"
+#define TARGET_CLONE_BACKWARDS
diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
index 6f5539b50f..21560ef832 100644
--- a/linux-user/aarch64/target_cpu.h
+++ b/linux-user/aarch64/target_cpu.h
@@ -29,7 +29,10 @@ static inline void cpu_clone_regs(CPUARMState *env, target_ulong newsp)
 
 static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
 {
-    env->sr.tpidr_el0 = newtls;
+    /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
+     * different from AArch32 Linux, which uses TPIDRRO.
+     */
+    env->cp15.tpidr_el0 = newtls;
 }
 
 #endif
diff --git a/linux-user/arm/target_cpu.h b/linux-user/arm/target_cpu.h
index ed323c079d..39d65b692b 100644
--- a/linux-user/arm/target_cpu.h
+++ b/linux-user/arm/target_cpu.h
@@ -29,7 +29,7 @@ static inline void cpu_clone_regs(CPUARMState *env, target_ulong newsp)
 
 static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
 {
-    env->cp15.c13_tls2 = newtls;
+    env->cp15.tpidrro_el0 = newtls;
 }
 
 #endif
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 8dd424dadd..5902f162b4 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1998,8 +1998,7 @@ give_up:
     free(syms);
 }
 
-int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
-                    struct image_info * info)
+int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
 {
     struct image_info interp_info;
     struct elfhdr elf_ex;
diff --git a/linux-user/flatload.c b/linux-user/flatload.c
index ceb89bb6ea..566a7a87a3 100644
--- a/linux-user/flatload.c
+++ b/linux-user/flatload.c
@@ -704,8 +704,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
 
 #endif /* CONFIG_BINFMT_SHARED_FLAT */
 
-int load_flt_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
-                    struct image_info * info)
+int load_flt_binary(struct linux_binprm *bprm, struct image_info *info)
 {
     struct lib_info libinfo[MAX_SHARED_LIBS];
     abi_ulong p = bprm->p;
diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c
index a1fe5ed9ae..f2997c2f4b 100644
--- a/linux-user/linuxload.c
+++ b/linux-user/linuxload.c
@@ -154,13 +154,13 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
                 && bprm->buf[1] == 'E'
                 && bprm->buf[2] == 'L'
                 && bprm->buf[3] == 'F') {
-            retval = load_elf_binary(bprm, regs, infop);
+            retval = load_elf_binary(bprm, infop);
 #if defined(TARGET_HAS_BFLT)
         } else if (bprm->buf[0] == 'b'
                 && bprm->buf[1] == 'F'
                 && bprm->buf[2] == 'L'
                 && bprm->buf[3] == 'T') {
-            retval = load_flt_binary(bprm,regs,infop);
+            retval = load_flt_binary(bprm, infop);
 #endif
         } else {
             return -ENOEXEC;
diff --git a/linux-user/main.c b/linux-user/main.c
index 54f71fe8f6..cabc9e1a0e 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -566,7 +566,7 @@ do_kernel_trap(CPUARMState *env)
         end_exclusive();
         break;
     case 0xffff0fe0: /* __kernel_get_tls */
-        env->regs[0] = env->cp15.c13_tls2;
+        env->regs[0] = env->cp15.tpidrro_el0;
         break;
     case 0xffff0f60: /* __kernel_cmpxchg64 */
         arm_kernel_cmpxchg64_helper(env);
@@ -585,20 +585,25 @@ do_kernel_trap(CPUARMState *env)
 
     return 0;
 }
-#endif
 
+/* Store exclusive handling for AArch32 */
 static int do_strex(CPUARMState *env)
 {
-    uint32_t val;
+    uint64_t val;
     int size;
     int rc = 1;
     int segv = 0;
     uint32_t addr;
     start_exclusive();
-    addr = env->exclusive_addr;
-    if (addr != env->exclusive_test) {
+    if (env->exclusive_addr != env->exclusive_test) {
         goto fail;
     }
+    /* We know we're always AArch32 so the address is in uint32_t range
+     * unless it was the -1 exclusive-monitor-lost value (which won't
+     * match exclusive_test above).
+     */
+    assert(extract64(env->exclusive_addr, 32, 32) == 0);
+    addr = env->exclusive_addr;
     size = env->exclusive_info & 0xf;
     switch (size) {
     case 0:
@@ -618,19 +623,19 @@ static int do_strex(CPUARMState *env)
         env->cp15.c6_data = addr;
         goto done;
     }
-    if (val != env->exclusive_val) {
-        goto fail;
-    }
     if (size == 3) {
-        segv = get_user_u32(val, addr + 4);
+        uint32_t valhi;
+        segv = get_user_u32(valhi, addr + 4);
         if (segv) {
             env->cp15.c6_data = addr + 4;
             goto done;
         }
-        if (val != env->exclusive_high) {
-            goto fail;
-        }
+        val = deposit64(val, 32, 32, valhi);
     }
+    if (val != env->exclusive_val) {
+        goto fail;
+    }
+
     val = env->regs[(env->exclusive_info >> 8) & 0xf];
     switch (size) {
     case 0:
@@ -665,7 +670,6 @@ done:
     return segv;
 }
 
-#ifdef TARGET_ABI32
 void cpu_loop(CPUARMState *env)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
@@ -880,6 +884,122 @@ void cpu_loop(CPUARMState *env)
 
 #else
 
+/*
+ * Handle AArch64 store-release exclusive
+ *
+ * rs = gets the status result of store exclusive
+ * rt = is the register that is stored
+ * rt2 = is the second register store (in STP)
+ *
+ */
+static int do_strex_a64(CPUARMState *env)
+{
+    uint64_t val;
+    int size;
+    bool is_pair;
+    int rc = 1;
+    int segv = 0;
+    uint64_t addr;
+    int rs, rt, rt2;
+
+    start_exclusive();
+    /* size | is_pair << 2 | (rs << 4) | (rt << 9) | (rt2 << 14)); */
+    size = extract32(env->exclusive_info, 0, 2);
+    is_pair = extract32(env->exclusive_info, 2, 1);
+    rs = extract32(env->exclusive_info, 4, 5);
+    rt = extract32(env->exclusive_info, 9, 5);
+    rt2 = extract32(env->exclusive_info, 14, 5);
+
+    addr = env->exclusive_addr;
+
+    if (addr != env->exclusive_test) {
+        goto finish;
+    }
+
+    switch (size) {
+    case 0:
+        segv = get_user_u8(val, addr);
+        break;
+    case 1:
+        segv = get_user_u16(val, addr);
+        break;
+    case 2:
+        segv = get_user_u32(val, addr);
+        break;
+    case 3:
+        segv = get_user_u64(val, addr);
+        break;
+    default:
+        abort();
+    }
+    if (segv) {
+        env->cp15.c6_data = addr;
+        goto error;
+    }
+    if (val != env->exclusive_val) {
+        goto finish;
+    }
+    if (is_pair) {
+        if (size == 2) {
+            segv = get_user_u32(val, addr + 4);
+        } else {
+            segv = get_user_u64(val, addr + 8);
+        }
+        if (segv) {
+            env->cp15.c6_data = addr + (size == 2 ? 4 : 8);
+            goto error;
+        }
+        if (val != env->exclusive_high) {
+            goto finish;
+        }
+    }
+    val = env->xregs[rt];
+    switch (size) {
+    case 0:
+        segv = put_user_u8(val, addr);
+        break;
+    case 1:
+        segv = put_user_u16(val, addr);
+        break;
+    case 2:
+        segv = put_user_u32(val, addr);
+        break;
+    case 3:
+        segv = put_user_u64(val, addr);
+        break;
+    }
+    if (segv) {
+        goto error;
+    }
+    if (is_pair) {
+        val = env->xregs[rt2];
+        if (size == 2) {
+            segv = put_user_u32(val, addr + 4);
+        } else {
+            segv = put_user_u64(val, addr + 8);
+        }
+        if (segv) {
+            env->cp15.c6_data = addr + (size == 2 ? 4 : 8);
+            goto error;
+        }
+    }
+    rc = 0;
+finish:
+    env->pc += 4;
+    /* rs == 31 encodes a write to the ZR, thus throwing away
+     * the status return. This is rather silly but valid.
+     */
+    if (rs < 31) {
+        env->xregs[rs] = rc;
+    }
+error:
+    /* instruction faulted, PC does not advance */
+    /* either way a strex releases any exclusive lock we have */
+    env->exclusive_addr = -1;
+    end_exclusive();
+    return segv;
+}
+
 /* AArch64 main loop */
 void cpu_loop(CPUARMState *env)
 {
@@ -939,7 +1059,7 @@ void cpu_loop(CPUARMState *env)
             }
             break;
         case EXCP_STREX:
-            if (do_strex(env)) {
+            if (do_strex_a64(env)) {
                 addr = env->cp15.c6_data;
                 goto do_segv;
             }
@@ -951,6 +1071,12 @@ void cpu_loop(CPUARMState *env)
             abort();
         }
         process_pending_signals(env);
+        /* Exception return on AArch64 always clears the exclusive monitor,
+         * so any return to running guest code implies this.
+         * A strex (successful or otherwise) also clears the monitor, so
+         * we don't need to specialcase EXCP_STREX.
+         */
+        env->exclusive_addr = -1;
     }
 }
 #endif /* ndef TARGET_ABI32 */
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index e2717e0775..c2f74f33d6 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -178,10 +178,8 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
              struct target_pt_regs * regs, struct image_info *infop,
              struct linux_binprm *);
 
-int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
-                    struct image_info * info);
-int load_flt_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
-                    struct image_info * info);
+int load_elf_binary(struct linux_binprm *bprm, struct image_info *info);
+int load_flt_binary(struct linux_binprm *bprm, struct image_info *info);
 
 abi_long memcpy_to_target(abi_ulong dest, const void *src,
                           unsigned long len);
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 4e7148a2d6..01d7c393df 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -1189,8 +1189,8 @@ static int target_setup_sigframe(struct target_rt_sigframe *sf,
         __put_user(env->vfp.regs[i * 2 + 1], &aux->fpsimd.vregs[i * 2 + 1]);
 #endif
     }
-    __put_user(/*env->fpsr*/0, &aux->fpsimd.fpsr);
-    __put_user(/*env->fpcr*/0, &aux->fpsimd.fpcr);
+    __put_user(vfp_get_fpsr(env), &aux->fpsimd.fpsr);
+    __put_user(vfp_get_fpcr(env), &aux->fpsimd.fpcr);
     __put_user(TARGET_FPSIMD_MAGIC, &aux->fpsimd.head.magic);
     __put_user(sizeof(struct target_fpsimd_context),
             &aux->fpsimd.head.size);
@@ -1209,7 +1209,7 @@ static int target_restore_sigframe(CPUARMState *env,
     int i;
     struct target_aux_context *aux =
         (struct target_aux_context *)sf->uc.tuc_mcontext.__reserved;
-    uint32_t magic, size;
+    uint32_t magic, size, fpsr, fpcr;
     uint64_t pstate;
 
     target_to_host_sigset(&set, &sf->uc.tuc_sigmask);
@@ -1235,6 +1235,10 @@ static int target_restore_sigframe(CPUARMState *env,
     for (i = 0; i < 32 * 2; i++) {
         __get_user(env->vfp.regs[i], &aux->fpsimd.vregs[i]);
     }
+    __get_user(fpsr, &aux->fpsimd.fpsr);
+    vfp_set_fpsr(env, fpsr);
+    __get_user(fpcr, &aux->fpsimd.fpcr);
+    vfp_set_fpcr(env, fpcr);
 
     return 0;
 }
@@ -2539,9 +2543,9 @@ void sparc64_set_context(CPUSPARCState *env)
             abi_ulong *src, *dst;
             src = ucp->tuc_sigmask.sig;
             dst = target_set.sig;
-            for (i = 0; i < sizeof(target_sigset_t) / sizeof(abi_ulong);
-                 i++, dst++, src++)
+            for (i = 0; i < TARGET_NSIG_WORDS; i++, dst++, src++) {
                 err |= __get_user(*dst, src);
+            }
             if (err)
                 goto do_sigsegv;
         }
@@ -2644,9 +2648,9 @@ void sparc64_get_context(CPUSPARCState *env)
         abi_ulong *src, *dst;
         src = target_set.sig;
         dst = ucp->tuc_sigmask.sig;
-        for (i = 0; i < sizeof(target_sigset_t) / sizeof(abi_ulong);
-             i++, dst++, src++)
+        for (i = 0; i < TARGET_NSIG_WORDS; i++, dst++, src++) {
             err |= __put_user(*src, dst);
+        }
         if (err)
             goto do_sigsegv;
     }
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index efd1453987..0ac05b85f2 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -2245,6 +2245,22 @@ static abi_long do_socketcall(int num, abi_ulong vptr)
             ret = do_accept4(sockfd, target_addr, target_addrlen, 0);
         }
         break;
+    case SOCKOP_accept4:
+        {
+            abi_ulong sockfd;
+            abi_ulong target_addr, target_addrlen;
+            abi_ulong flags;
+
+            if (get_user_ual(sockfd, vptr)
+                || get_user_ual(target_addr, vptr + n)
+                || get_user_ual(target_addrlen, vptr + 2 * n)
+                || get_user_ual(flags, vptr + 3 * n)) {
+                return -TARGET_EFAULT;
+            }
+
+            ret = do_accept4(sockfd, target_addr, target_addrlen, flags);
+        }
+        break;
     case SOCKOP_getsockname:
         {
             abi_ulong sockfd;
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index cf08db5a23..ae30476217 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -27,6 +27,7 @@
 #define SOCKOP_getsockopt       15
 #define SOCKOP_sendmsg          16
 #define SOCKOP_recvmsg          17
+#define SOCKOP_accept4          18
 
 #define IPCOP_semop		1
 #define IPCOP_semget		2
diff --git a/memory.c b/memory.c
index 776431416f..59ecc28401 100644
--- a/memory.c
+++ b/memory.c
@@ -22,6 +22,7 @@
 #include <assert.h>
 
 #include "exec/memory-internal.h"
+#include "exec/ram_addr.h"
 
 //#define DEBUG_UNASSIGNED
 
@@ -1174,15 +1175,14 @@ bool memory_region_get_dirty(MemoryRegion *mr, hwaddr addr,
                              hwaddr size, unsigned client)
 {
     assert(mr->terminates);
-    return cpu_physical_memory_get_dirty(mr->ram_addr + addr, size,
-                                         1 << client);
+    return cpu_physical_memory_get_dirty(mr->ram_addr + addr, size, client);
 }
 
 void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
                              hwaddr size)
 {
     assert(mr->terminates);
-    return cpu_physical_memory_set_dirty_range(mr->ram_addr + addr, size, -1);
+    cpu_physical_memory_set_dirty_range(mr->ram_addr + addr, size);
 }
 
 bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr,
@@ -1190,12 +1190,9 @@ bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr,
 {
     bool ret;
     assert(mr->terminates);
-    ret = cpu_physical_memory_get_dirty(mr->ram_addr + addr, size,
-                                        1 << client);
+    ret = cpu_physical_memory_get_dirty(mr->ram_addr + addr, size, client);
     if (ret) {
-        cpu_physical_memory_reset_dirty(mr->ram_addr + addr,
-                                        mr->ram_addr + addr + size,
-                                        1 << client);
+        cpu_physical_memory_reset_dirty(mr->ram_addr + addr, size, client);
     }
     return ret;
 }
@@ -1241,9 +1238,7 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
                                hwaddr size, unsigned client)
 {
     assert(mr->terminates);
-    cpu_physical_memory_reset_dirty(mr->ram_addr + addr,
-                                    mr->ram_addr + addr + size,
-                                    1 << client);
+    cpu_physical_memory_reset_dirty(mr->ram_addr + addr, size, client);
 }
 
 void *memory_region_get_ram_ptr(MemoryRegion *mr)
diff --git a/migration.c b/migration.c
index 557195a9d2..7235c23ffe 100644
--- a/migration.c
+++ b/migration.c
@@ -40,6 +40,7 @@ enum {
     MIG_STATE_ERROR = -1,
     MIG_STATE_NONE,
     MIG_STATE_SETUP,
+    MIG_STATE_CANCELLING,
     MIG_STATE_CANCELLED,
     MIG_STATE_ACTIVE,
     MIG_STATE_COMPLETED,
@@ -196,6 +197,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
         info->has_total_time = false;
         break;
     case MIG_STATE_ACTIVE:
+    case MIG_STATE_CANCELLING:
         info->has_status = true;
         info->status = g_strdup("active");
         info->has_total_time = true;
@@ -282,6 +284,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 
 /* shared migration helpers */
 
+static void migrate_set_state(MigrationState *s, int old_state, int new_state)
+{
+    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
+        trace_migrate_set_state(new_state);
+    }
+}
+
 static void migrate_fd_cleanup(void *opaque)
 {
     MigrationState *s = opaque;
@@ -303,18 +312,14 @@ static void migrate_fd_cleanup(void *opaque)
 
     if (s->state != MIG_STATE_COMPLETED) {
         qemu_savevm_state_cancel();
+        if (s->state == MIG_STATE_CANCELLING) {
+            migrate_set_state(s, MIG_STATE_CANCELLING, MIG_STATE_CANCELLED);
+        }
     }
 
     notifier_list_notify(&migration_state_notifiers, s);
 }
 
-static void migrate_set_state(MigrationState *s, int old_state, int new_state)
-{
-    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
-        trace_migrate_set_state(new_state);
-    }
-}
-
 void migrate_fd_error(MigrationState *s)
 {
     DPRINTF("setting error state\n");
@@ -326,9 +331,16 @@ void migrate_fd_error(MigrationState *s)
 
 static void migrate_fd_cancel(MigrationState *s)
 {
+    int old_state ;
     DPRINTF("cancelling migration\n");
 
-    migrate_set_state(s, s->state, MIG_STATE_CANCELLED);
+    do {
+        old_state = s->state;
+        if (old_state != MIG_STATE_SETUP && old_state != MIG_STATE_ACTIVE) {
+            break;
+        }
+        migrate_set_state(s, old_state, MIG_STATE_CANCELLING);
+    } while (s->state != MIG_STATE_CANCELLING);
 }
 
 void add_migration_state_change_notifier(Notifier *notify)
@@ -405,7 +417,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     params.blk = has_blk && blk;
     params.shared = has_inc && inc;
 
-    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
+    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
+        s->state == MIG_STATE_CANCELLING) {
         error_set(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
@@ -584,7 +597,7 @@ static void *migration_thread(void *opaque)
 
                 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
                 if (ret >= 0) {
-                    qemu_file_set_rate_limit(s->file, INT_MAX);
+                    qemu_file_set_rate_limit(s->file, INT64_MAX);
                     qemu_savevm_state_complete(s->file);
                 }
                 qemu_mutex_unlock_iothread();
diff --git a/net/net.c b/net/net.c
index 9db88cc0ee..f8db85f30b 100644
--- a/net/net.c
+++ b/net/net.c
@@ -856,7 +856,7 @@ static int net_host_check_device(const char *device)
                                        ,"vde"
 #endif
     };
-    for (i = 0; i < sizeof(valid_param_list) / sizeof(char *); i++) {
+    for (i = 0; i < ARRAY_SIZE(valid_param_list); i++) {
         if (!strncmp(valid_param_list[i], device,
                      strlen(valid_param_list[i])))
             return 1;
diff --git a/qapi-schema.json b/qapi-schema.json
index 4abbf36535..f27c48a285 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1967,9 +1967,11 @@
 #
 # @top:              The file name of the backing image within the image chain,
 #                    which contains the topmost data to be committed down.
-#                    Note, the active layer as 'top' is currently unsupported.
 #
 #                    If top == base, that is an error.
+#                    If top == active, the job will not be completed by itself,
+#                    user needs to complete the job with the block-job-complete
+#                    command after getting the ready event. (Since 2.0)
 #
 #
 # @speed:  #optional the maximum speed, in bytes per second
@@ -1979,7 +1981,6 @@
 #          If @device does not exist, DeviceNotFound
 #          If image commit is not supported by this device, NotSupported
 #          If @base or @top is invalid, a generic error is returned
-#          If @top is the active layer, or omitted, a generic error is returned
 #          If @speed is invalid, InvalidParameter
 #
 # Since: 1.3
@@ -3056,7 +3057,7 @@
 #
 # @devname: #optional path of the netmap device (default: '/dev/netmap').
 #
-# Since 1.8
+# Since 2.0
 ##
 { 'type': 'NetdevNetmapOptions',
   'data': {
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 62807718df..1d3b68d40a 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -87,7 +87,7 @@ static void qdev_print_devinfo(DeviceClass *dc)
     if (dc->desc) {
         error_printf(", desc \"%s\"", dc->desc);
     }
-    if (dc->no_user) {
+    if (dc->cannot_instantiate_with_device_add_yet) {
         error_printf(", no-user");
     }
     error_printf("\n");
@@ -127,7 +127,8 @@ static void qdev_print_devinfos(bool show_no_user)
             if ((i < DEVICE_CATEGORY_MAX
                  ? !test_bit(i, dc->categories)
                  : !bitmap_empty(dc->categories, DEVICE_CATEGORY_MAX))
-                || (!show_no_user && dc->no_user)) {
+                || (!show_no_user
+                    && dc->cannot_instantiate_with_device_add_yet)) {
                 continue;
             }
             if (!cat_printed) {
@@ -477,8 +478,9 @@ DeviceState *qdev_device_add(QemuOpts *opts)
         }
     }
 
-    if (!oc) {
-        qerror_report(QERR_INVALID_PARAMETER_VALUE, "driver", "device type");
+    if (!object_class_dynamic_cast(oc, TYPE_DEVICE)) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "'%s' is not a valid device model name", driver);
         return NULL;
     }
 
@@ -489,6 +491,11 @@ DeviceState *qdev_device_add(QemuOpts *opts)
     }
 
     dc = DEVICE_CLASS(oc);
+    if (dc->cannot_instantiate_with_device_add_yet) {
+        qerror_report(QERR_INVALID_PARAMETER_VALUE, "driver",
+                      "pluggable device type");
+        return NULL;
+    }
 
     /* find bus */
     path = qemu_opt_get(opts, "bus");
diff --git a/qemu-char.c b/qemu-char.c
index 418dc69d39..30c5a6afd0 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -1975,8 +1975,7 @@ static void win_stdio_wait_func(void *opaque)
     DWORD              dwSize;
     int                i;
 
-    ret = ReadConsoleInput(stdio->hStdIn, buf, sizeof(buf) / sizeof(*buf),
-                           &dwSize);
+    ret = ReadConsoleInput(stdio->hStdIn, buf, ARRAY_SIZE(buf), &dwSize);
 
     if (!ret) {
         /* Avoid error storm */
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 185dd47a03..4e9c6e9b6e 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -654,6 +654,21 @@ Supported options:
 Specifies which VHD subformat to use. Valid options are
 @code{dynamic} (default) and @code{fixed}.
 @end table
+
+@item VHDX
+Hyper-V compatible image format (VHDX).
+Supported options:
+@table @code
+@item subformat
+Specifies which VHDX subformat to use. Valid options are
+@code{dynamic} (default) and @code{fixed}.
+@item block_state_zero
+Force use of payload blocks of type 'ZERO'.
+@item block_size
+Block size; min 1 MB, max 256 MB.  0 means auto-calculate based on image size.
+@item log_size
+Log size; min 1 MB.
+@end table
 @end table
 
 @subsubsection Read-only formats
diff --git a/qemu-file.c b/qemu-file.c
new file mode 100644
index 0000000000..9473b674ba
--- /dev/null
+++ b/qemu-file.c
@@ -0,0 +1,826 @@
+#include "qemu-common.h"
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+#include "block/coroutine.h"
+#include "migration/migration.h"
+#include "migration/qemu-file.h"
+
+#define IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN(IOV_MAX, 64)
+
+struct QEMUFile {
+    const QEMUFileOps *ops;
+    void *opaque;
+
+    int64_t bytes_xfer;
+    int64_t xfer_limit;
+
+    int64_t pos; /* start of buffer when writing, end of buffer
+                    when reading */
+    int buf_index;
+    int buf_size; /* 0 when writing */
+    uint8_t buf[IO_BUF_SIZE];
+
+    struct iovec iov[MAX_IOV_SIZE];
+    unsigned int iovcnt;
+
+    int last_error;
+};
+
+typedef struct QEMUFileStdio {
+    FILE *stdio_file;
+    QEMUFile *file;
+} QEMUFileStdio;
+
+typedef struct QEMUFileSocket {
+    int fd;
+    QEMUFile *file;
+} QEMUFileSocket;
+
+static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
+                                    int64_t pos)
+{
+    QEMUFileSocket *s = opaque;
+    ssize_t len;
+    ssize_t size = iov_size(iov, iovcnt);
+
+    len = iov_send(s->fd, iov, iovcnt, 0, size);
+    if (len < size) {
+        len = -socket_error();
+    }
+    return len;
+}
+
+static int socket_get_fd(void *opaque)
+{
+    QEMUFileSocket *s = opaque;
+
+    return s->fd;
+}
+
+static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
+{
+    QEMUFileSocket *s = opaque;
+    ssize_t len;
+
+    for (;;) {
+        len = qemu_recv(s->fd, buf, size, 0);
+        if (len != -1) {
+            break;
+        }
+        if (socket_error() == EAGAIN) {
+            yield_until_fd_readable(s->fd);
+        } else if (socket_error() != EINTR) {
+            break;
+        }
+    }
+
+    if (len == -1) {
+        len = -socket_error();
+    }
+    return len;
+}
+
+static int socket_close(void *opaque)
+{
+    QEMUFileSocket *s = opaque;
+    closesocket(s->fd);
+    g_free(s);
+    return 0;
+}
+
+static int stdio_get_fd(void *opaque)
+{
+    QEMUFileStdio *s = opaque;
+
+    return fileno(s->stdio_file);
+}
+
+static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos,
+                            int size)
+{
+    QEMUFileStdio *s = opaque;
+    return fwrite(buf, 1, size, s->stdio_file);
+}
+
+static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
+{
+    QEMUFileStdio *s = opaque;
+    FILE *fp = s->stdio_file;
+    int bytes;
+
+    for (;;) {
+        clearerr(fp);
+        bytes = fread(buf, 1, size, fp);
+        if (bytes != 0 || !ferror(fp)) {
+            break;
+        }
+        if (errno == EAGAIN) {
+            yield_until_fd_readable(fileno(fp));
+        } else if (errno != EINTR) {
+            break;
+        }
+    }
+    return bytes;
+}
+
+static int stdio_pclose(void *opaque)
+{
+    QEMUFileStdio *s = opaque;
+    int ret;
+    ret = pclose(s->stdio_file);
+    if (ret == -1) {
+        ret = -errno;
+    } else if (!WIFEXITED(ret) || WEXITSTATUS(ret) != 0) {
+        /* close succeeded, but non-zero exit code: */
+        ret = -EIO; /* fake errno value */
+    }
+    g_free(s);
+    return ret;
+}
+
+static int stdio_fclose(void *opaque)
+{
+    QEMUFileStdio *s = opaque;
+    int ret = 0;
+
+    if (s->file->ops->put_buffer || s->file->ops->writev_buffer) {
+        int fd = fileno(s->stdio_file);
+        struct stat st;
+
+        ret = fstat(fd, &st);
+        if (ret == 0 && S_ISREG(st.st_mode)) {
+            /*
+             * If the file handle is a regular file make sure the
+             * data is flushed to disk before signaling success.
+             */
+            ret = fsync(fd);
+            if (ret != 0) {
+                ret = -errno;
+                return ret;
+            }
+        }
+    }
+    if (fclose(s->stdio_file) == EOF) {
+        ret = -errno;
+    }
+    g_free(s);
+    return ret;
+}
+
+static const QEMUFileOps stdio_pipe_read_ops = {
+    .get_fd =     stdio_get_fd,
+    .get_buffer = stdio_get_buffer,
+    .close =      stdio_pclose
+};
+
+static const QEMUFileOps stdio_pipe_write_ops = {
+    .get_fd =     stdio_get_fd,
+    .put_buffer = stdio_put_buffer,
+    .close =      stdio_pclose
+};
+
+QEMUFile *qemu_popen_cmd(const char *command, const char *mode)
+{
+    FILE *stdio_file;
+    QEMUFileStdio *s;
+
+    if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || mode[1] != 0) {
+        fprintf(stderr, "qemu_popen: Argument validity check failed\n");
+        return NULL;
+    }
+
+    stdio_file = popen(command, mode);
+    if (stdio_file == NULL) {
+        return NULL;
+    }
+
+    s = g_malloc0(sizeof(QEMUFileStdio));
+
+    s->stdio_file = stdio_file;
+
+    if (mode[0] == 'r') {
+        s->file = qemu_fopen_ops(s, &stdio_pipe_read_ops);
+    } else {
+        s->file = qemu_fopen_ops(s, &stdio_pipe_write_ops);
+    }
+    return s->file;
+}
+
+static const QEMUFileOps stdio_file_read_ops = {
+    .get_fd =     stdio_get_fd,
+    .get_buffer = stdio_get_buffer,
+    .close =      stdio_fclose
+};
+
+static const QEMUFileOps stdio_file_write_ops = {
+    .get_fd =     stdio_get_fd,
+    .put_buffer = stdio_put_buffer,
+    .close =      stdio_fclose
+};
+
+static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
+                                  int64_t pos)
+{
+    QEMUFileSocket *s = opaque;
+    ssize_t len, offset;
+    ssize_t size = iov_size(iov, iovcnt);
+    ssize_t total = 0;
+
+    assert(iovcnt > 0);
+    offset = 0;
+    while (size > 0) {
+        /* Find the next start position; skip all full-sized vector elements  */
+        while (offset >= iov[0].iov_len) {
+            offset -= iov[0].iov_len;
+            iov++, iovcnt--;
+        }
+
+        /* skip `offset' bytes from the (now) first element, undo it on exit */
+        assert(iovcnt > 0);
+        iov[0].iov_base += offset;
+        iov[0].iov_len -= offset;
+
+        do {
+            len = writev(s->fd, iov, iovcnt);
+        } while (len == -1 && errno == EINTR);
+        if (len == -1) {
+            return -errno;
+        }
+
+        /* Undo the changes above */
+        iov[0].iov_base -= offset;
+        iov[0].iov_len += offset;
+
+        /* Prepare for the next iteration */
+        offset += len;
+        total += len;
+        size -= len;
+    }
+
+    return total;
+}
+
+static int unix_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
+{
+    QEMUFileSocket *s = opaque;
+    ssize_t len;
+
+    for (;;) {
+        len = read(s->fd, buf, size);
+        if (len != -1) {
+            break;
+        }
+        if (errno == EAGAIN) {
+            yield_until_fd_readable(s->fd);
+        } else if (errno != EINTR) {
+            break;
+        }
+    }
+
+    if (len == -1) {
+        len = -errno;
+    }
+    return len;
+}
+
+static int unix_close(void *opaque)
+{
+    QEMUFileSocket *s = opaque;
+    close(s->fd);
+    g_free(s);
+    return 0;
+}
+
+static const QEMUFileOps unix_read_ops = {
+    .get_fd =     socket_get_fd,
+    .get_buffer = unix_get_buffer,
+    .close =      unix_close
+};
+
+static const QEMUFileOps unix_write_ops = {
+    .get_fd =     socket_get_fd,
+    .writev_buffer = unix_writev_buffer,
+    .close =      unix_close
+};
+
+QEMUFile *qemu_fdopen(int fd, const char *mode)
+{
+    QEMUFileSocket *s;
+
+    if (mode == NULL ||
+        (mode[0] != 'r' && mode[0] != 'w') ||
+        mode[1] != 'b' || mode[2] != 0) {
+        fprintf(stderr, "qemu_fdopen: Argument validity check failed\n");
+        return NULL;
+    }
+
+    s = g_malloc0(sizeof(QEMUFileSocket));
+    s->fd = fd;
+
+    if (mode[0] == 'r') {
+        s->file = qemu_fopen_ops(s, &unix_read_ops);
+    } else {
+        s->file = qemu_fopen_ops(s, &unix_write_ops);
+    }
+    return s->file;
+}
+
+static const QEMUFileOps socket_read_ops = {
+    .get_fd =     socket_get_fd,
+    .get_buffer = socket_get_buffer,
+    .close =      socket_close
+};
+
+static const QEMUFileOps socket_write_ops = {
+    .get_fd =     socket_get_fd,
+    .writev_buffer = socket_writev_buffer,
+    .close =      socket_close
+};
+
+bool qemu_file_mode_is_not_valid(const char *mode)
+{
+    if (mode == NULL ||
+        (mode[0] != 'r' && mode[0] != 'w') ||
+        mode[1] != 'b' || mode[2] != 0) {
+        fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
+        return true;
+    }
+
+    return false;
+}
+
+QEMUFile *qemu_fopen_socket(int fd, const char *mode)
+{
+    QEMUFileSocket *s;
+
+    if (qemu_file_mode_is_not_valid(mode)) {
+        return NULL;
+    }
+
+    s = g_malloc0(sizeof(QEMUFileSocket));
+    s->fd = fd;
+    if (mode[0] == 'w') {
+        qemu_set_block(s->fd);
+        s->file = qemu_fopen_ops(s, &socket_write_ops);
+    } else {
+        s->file = qemu_fopen_ops(s, &socket_read_ops);
+    }
+    return s->file;
+}
+
+QEMUFile *qemu_fopen(const char *filename, const char *mode)
+{
+    QEMUFileStdio *s;
+
+    if (qemu_file_mode_is_not_valid(mode)) {
+        return NULL;
+    }
+
+    s = g_malloc0(sizeof(QEMUFileStdio));
+
+    s->stdio_file = fopen(filename, mode);
+    if (!s->stdio_file) {
+        goto fail;
+    }
+
+    if (mode[0] == 'w') {
+        s->file = qemu_fopen_ops(s, &stdio_file_write_ops);
+    } else {
+        s->file = qemu_fopen_ops(s, &stdio_file_read_ops);
+    }
+    return s->file;
+fail:
+    g_free(s);
+    return NULL;
+}
+
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
+{
+    QEMUFile *f;
+
+    f = g_malloc0(sizeof(QEMUFile));
+
+    f->opaque = opaque;
+    f->ops = ops;
+    return f;
+}
+
+/*
+ * Get last error for stream f
+ *
+ * Return negative error value if there has been an error on previous
+ * operations, return 0 if no error happened.
+ *
+ */
+int qemu_file_get_error(QEMUFile *f)
+{
+    return f->last_error;
+}
+
+void qemu_file_set_error(QEMUFile *f, int ret)
+{
+    if (f->last_error == 0) {
+        f->last_error = ret;
+    }
+}
+
+static inline bool qemu_file_is_writable(QEMUFile *f)
+{
+    return f->ops->writev_buffer || f->ops->put_buffer;
+}
+
+/**
+ * Flushes QEMUFile buffer
+ *
+ * If there is writev_buffer QEMUFileOps it uses it otherwise uses
+ * put_buffer ops.
+ */
+void qemu_fflush(QEMUFile *f)
+{
+    ssize_t ret = 0;
+
+    if (!qemu_file_is_writable(f)) {
+        return;
+    }
+
+    if (f->ops->writev_buffer) {
+        if (f->iovcnt > 0) {
+            ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
+        }
+    } else {
+        if (f->buf_index > 0) {
+            ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index);
+        }
+    }
+    if (ret >= 0) {
+        f->pos += ret;
+    }
+    f->buf_index = 0;
+    f->iovcnt = 0;
+    if (ret < 0) {
+        qemu_file_set_error(f, ret);
+    }
+}
+
+void ram_control_before_iterate(QEMUFile *f, uint64_t flags)
+{
+    int ret = 0;
+
+    if (f->ops->before_ram_iterate) {
+        ret = f->ops->before_ram_iterate(f, f->opaque, flags);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    }
+}
+
+void ram_control_after_iterate(QEMUFile *f, uint64_t flags)
+{
+    int ret = 0;
+
+    if (f->ops->after_ram_iterate) {
+        ret = f->ops->after_ram_iterate(f, f->opaque, flags);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    }
+}
+
+void ram_control_load_hook(QEMUFile *f, uint64_t flags)
+{
+    int ret = -EINVAL;
+
+    if (f->ops->hook_ram_load) {
+        ret = f->ops->hook_ram_load(f, f->opaque, flags);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    } else {
+        qemu_file_set_error(f, ret);
+    }
+}
+
+size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+                         ram_addr_t offset, size_t size, int *bytes_sent)
+{
+    if (f->ops->save_page) {
+        int ret = f->ops->save_page(f, f->opaque, block_offset,
+                                    offset, size, bytes_sent);
+
+        if (ret != RAM_SAVE_CONTROL_DELAYED) {
+            if (bytes_sent && *bytes_sent > 0) {
+                qemu_update_position(f, *bytes_sent);
+            } else if (ret < 0) {
+                qemu_file_set_error(f, ret);
+            }
+        }
+
+        return ret;
+    }
+
+    return RAM_SAVE_CONTROL_NOT_SUPP;
+}
+
+static void qemu_fill_buffer(QEMUFile *f)
+{
+    int len;
+    int pending;
+
+    assert(!qemu_file_is_writable(f));
+
+    pending = f->buf_size - f->buf_index;
+    if (pending > 0) {
+        memmove(f->buf, f->buf + f->buf_index, pending);
+    }
+    f->buf_index = 0;
+    f->buf_size = pending;
+
+    len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
+                        IO_BUF_SIZE - pending);
+    if (len > 0) {
+        f->buf_size += len;
+        f->pos += len;
+    } else if (len == 0) {
+        qemu_file_set_error(f, -EIO);
+    } else if (len != -EAGAIN) {
+        qemu_file_set_error(f, len);
+    }
+}
+
+int qemu_get_fd(QEMUFile *f)
+{
+    if (f->ops->get_fd) {
+        return f->ops->get_fd(f->opaque);
+    }
+    return -1;
+}
+
+void qemu_update_position(QEMUFile *f, size_t size)
+{
+    f->pos += size;
+}
+
+/** Closes the file
+ *
+ * Returns negative error value if any error happened on previous operations or
+ * while closing the file. Returns 0 or positive number on success.
+ *
+ * The meaning of return value on success depends on the specific backend
+ * being used.
+ */
+int qemu_fclose(QEMUFile *f)
+{
+    int ret;
+    qemu_fflush(f);
+    ret = qemu_file_get_error(f);
+
+    if (f->ops->close) {
+        int ret2 = f->ops->close(f->opaque);
+        if (ret >= 0) {
+            ret = ret2;
+        }
+    }
+    /* If any error was spotted before closing, we should report it
+     * instead of the close() return value.
+     */
+    if (f->last_error) {
+        ret = f->last_error;
+    }
+    g_free(f);
+    return ret;
+}
+
+static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size)
+{
+    /* check for adjacent buffer and coalesce them */
+    if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
+        f->iov[f->iovcnt - 1].iov_len) {
+        f->iov[f->iovcnt - 1].iov_len += size;
+    } else {
+        f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
+        f->iov[f->iovcnt++].iov_len = size;
+    }
+
+    if (f->iovcnt >= MAX_IOV_SIZE) {
+        qemu_fflush(f);
+    }
+}
+
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size)
+{
+    if (!f->ops->writev_buffer) {
+        qemu_put_buffer(f, buf, size);
+        return;
+    }
+
+    if (f->last_error) {
+        return;
+    }
+
+    f->bytes_xfer += size;
+    add_to_iovec(f, buf, size);
+}
+
+void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
+{
+    int l;
+
+    if (f->last_error) {
+        return;
+    }
+
+    while (size > 0) {
+        l = IO_BUF_SIZE - f->buf_index;
+        if (l > size) {
+            l = size;
+        }
+        memcpy(f->buf + f->buf_index, buf, l);
+        f->bytes_xfer += l;
+        if (f->ops->writev_buffer) {
+            add_to_iovec(f, f->buf + f->buf_index, l);
+        }
+        f->buf_index += l;
+        if (f->buf_index == IO_BUF_SIZE) {
+            qemu_fflush(f);
+        }
+        if (qemu_file_get_error(f)) {
+            break;
+        }
+        buf += l;
+        size -= l;
+    }
+}
+
+void qemu_put_byte(QEMUFile *f, int v)
+{
+    if (f->last_error) {
+        return;
+    }
+
+    f->buf[f->buf_index] = v;
+    f->bytes_xfer++;
+    if (f->ops->writev_buffer) {
+        add_to_iovec(f, f->buf + f->buf_index, 1);
+    }
+    f->buf_index++;
+    if (f->buf_index == IO_BUF_SIZE) {
+        qemu_fflush(f);
+    }
+}
+
+void qemu_file_skip(QEMUFile *f, int size)
+{
+    if (f->buf_index + size <= f->buf_size) {
+        f->buf_index += size;
+    }
+}
+
+int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
+{
+    int pending;
+    int index;
+
+    assert(!qemu_file_is_writable(f));
+
+    index = f->buf_index + offset;
+    pending = f->buf_size - index;
+    if (pending < size) {
+        qemu_fill_buffer(f);
+        index = f->buf_index + offset;
+        pending = f->buf_size - index;
+    }
+
+    if (pending <= 0) {
+        return 0;
+    }
+    if (size > pending) {
+        size = pending;
+    }
+
+    memcpy(buf, f->buf + index, size);
+    return size;
+}
+
+int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size)
+{
+    int pending = size;
+    int done = 0;
+
+    while (pending > 0) {
+        int res;
+
+        res = qemu_peek_buffer(f, buf, pending, 0);
+        if (res == 0) {
+            return done;
+        }
+        qemu_file_skip(f, res);
+        buf += res;
+        pending -= res;
+        done += res;
+    }
+    return done;
+}
+
+int qemu_peek_byte(QEMUFile *f, int offset)
+{
+    int index = f->buf_index + offset;
+
+    assert(!qemu_file_is_writable(f));
+
+    if (index >= f->buf_size) {
+        qemu_fill_buffer(f);
+        index = f->buf_index + offset;
+        if (index >= f->buf_size) {
+            return 0;
+        }
+    }
+    return f->buf[index];
+}
+
+int qemu_get_byte(QEMUFile *f)
+{
+    int result;
+
+    result = qemu_peek_byte(f, 0);
+    qemu_file_skip(f, 1);
+    return result;
+}
+
+int64_t qemu_ftell(QEMUFile *f)
+{
+    qemu_fflush(f);
+    return f->pos;
+}
+
+int qemu_file_rate_limit(QEMUFile *f)
+{
+    if (qemu_file_get_error(f)) {
+        return 1;
+    }
+    if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) {
+        return 1;
+    }
+    return 0;
+}
+
+int64_t qemu_file_get_rate_limit(QEMUFile *f)
+{
+    return f->xfer_limit;
+}
+
+void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit)
+{
+    f->xfer_limit = limit;
+}
+
+void qemu_file_reset_rate_limit(QEMUFile *f)
+{
+    f->bytes_xfer = 0;
+}
+
+void qemu_put_be16(QEMUFile *f, unsigned int v)
+{
+    qemu_put_byte(f, v >> 8);
+    qemu_put_byte(f, v);
+}
+
+void qemu_put_be32(QEMUFile *f, unsigned int v)
+{
+    qemu_put_byte(f, v >> 24);
+    qemu_put_byte(f, v >> 16);
+    qemu_put_byte(f, v >> 8);
+    qemu_put_byte(f, v);
+}
+
+void qemu_put_be64(QEMUFile *f, uint64_t v)
+{
+    qemu_put_be32(f, v >> 32);
+    qemu_put_be32(f, v);
+}
+
+unsigned int qemu_get_be16(QEMUFile *f)
+{
+    unsigned int v;
+    v = qemu_get_byte(f) << 8;
+    v |= qemu_get_byte(f);
+    return v;
+}
+
+unsigned int qemu_get_be32(QEMUFile *f)
+{
+    unsigned int v;
+    v = qemu_get_byte(f) << 24;
+    v |= qemu_get_byte(f) << 16;
+    v |= qemu_get_byte(f) << 8;
+    v |= qemu_get_byte(f);
+    return v;
+}
+
+uint64_t qemu_get_be64(QEMUFile *f)
+{
+    uint64_t v;
+    v = (uint64_t)qemu_get_be32(f) << 32;
+    v |= qemu_get_be32(f);
+    return v;
+}
diff --git a/qemu-img.texi b/qemu-img.texi
index be31191e43..1bba91efde 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -431,8 +431,8 @@ This option can only be enabled if @code{compat=1.1} is specified.
 
 @item Other
 QEMU also supports various other image file formats for compatibility with
-older QEMU versions or other hypervisors, including VMDK, VDI, VHD (vpc), qcow1
-and QED. For a full list of supported formats see @code{qemu-img --help}.
+older QEMU versions or other hypervisors, including VMDK, VDI, VHD (vpc), VHDX,
+qcow1 and QED. For a full list of supported formats see @code{qemu-img --help}.
 For a more detailed description of these formats, see the QEMU Emulation User
 Documentation.
 
diff --git a/qemu-options.hx b/qemu-options.hx
index bcfe9eaa3e..56e5fdf1e0 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2419,6 +2419,8 @@ vc:80Cx24C
 No device is allocated.
 @item null
 void device
+@item chardev:@var{id}
+Use a named character device defined with the @code{-chardev} option.
 @item /dev/XXX
 [Linux only] Use host tty, e.g. @file{/dev/ttyS0}. The host serial port
 parameters are set according to the emulated ones.
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index cf07869599..b7c125364c 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -231,6 +231,7 @@ int seccomp_start(void)
 
     ctx = seccomp_init(SCMP_ACT_KILL);
     if (ctx == NULL) {
+        rc = -1;
         goto seccomp_return;
     }
 
diff --git a/qom/cpu.c b/qom/cpu.c
index 818fb26dd4..9d62479546 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -254,7 +254,11 @@ static void cpu_class_init(ObjectClass *klass, void *data)
     k->gdb_read_register = cpu_common_gdb_read_register;
     k->gdb_write_register = cpu_common_gdb_write_register;
     dc->realize = cpu_common_realizefn;
-    dc->no_user = 1;
+    /*
+     * Reason: CPUs still need special care by board code: wiring up
+     * IRQs, adding reset handlers, halting non-first CPUs, ...
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo cpu_type_info = {
diff --git a/qom/object.c b/qom/object.c
index 4dee02b6e2..62e7e415d9 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -78,8 +78,11 @@ static GHashTable *type_table_get(void)
     return type_table;
 }
 
+static bool enumerating_types;
+
 static void type_table_add(TypeImpl *ti)
 {
+    assert(!enumerating_types);
     g_hash_table_insert(type_table_get(), (void *)ti->name, ti);
 }
 
@@ -88,7 +91,7 @@ static TypeImpl *type_table_lookup(const char *name)
     return g_hash_table_lookup(type_table_get(), name);
 }
 
-static TypeImpl *type_register_internal(const TypeInfo *info)
+static TypeImpl *type_new(const TypeInfo *info)
 {
     TypeImpl *ti = g_malloc0(sizeof(*ti));
     int i;
@@ -122,8 +125,15 @@ static TypeImpl *type_register_internal(const TypeInfo *info)
     }
     ti->num_interfaces = i;
 
-    type_table_add(ti);
+    return ti;
+}
+
+static TypeImpl *type_register_internal(const TypeInfo *info)
+{
+    TypeImpl *ti;
+    ti = type_new(info);
 
+    type_table_add(ti);
     return ti;
 }
 
@@ -206,22 +216,25 @@ static bool type_is_ancestor(TypeImpl *type, TypeImpl *target_type)
 
 static void type_initialize(TypeImpl *ti);
 
-static void type_initialize_interface(TypeImpl *ti, const char *parent)
+static void type_initialize_interface(TypeImpl *ti, TypeImpl *interface_type,
+                                      TypeImpl *parent_type)
 {
     InterfaceClass *new_iface;
     TypeInfo info = { };
     TypeImpl *iface_impl;
 
-    info.parent = parent;
-    info.name = g_strdup_printf("%s::%s", ti->name, info.parent);
+    info.parent = parent_type->name;
+    info.name = g_strdup_printf("%s::%s", ti->name, interface_type->name);
     info.abstract = true;
 
-    iface_impl = type_register(&info);
+    iface_impl = type_new(&info);
+    iface_impl->parent_type = parent_type;
     type_initialize(iface_impl);
     g_free((char *)info.name);
 
     new_iface = (InterfaceClass *)iface_impl->class;
     new_iface->concrete_class = ti->class;
+    new_iface->interface_type = interface_type;
 
     ti->class->interfaces = g_slist_append(ti->class->interfaces,
                                            iface_impl->class);
@@ -251,8 +264,10 @@ static void type_initialize(TypeImpl *ti)
         ti->class->interfaces = NULL;
 
         for (e = parent->class->interfaces; e; e = e->next) {
-            ObjectClass *iface = e->data;
-            type_initialize_interface(ti, object_class_get_name(iface));
+            InterfaceClass *iface = e->data;
+            ObjectClass *klass = OBJECT_CLASS(iface);
+
+            type_initialize_interface(ti, iface->interface_type, klass->type);
         }
 
         for (i = 0; i < ti->num_interfaces; i++) {
@@ -269,7 +284,7 @@ static void type_initialize(TypeImpl *ti)
                 continue;
             }
 
-            type_initialize_interface(ti, ti->interfaces[i].typename);
+            type_initialize_interface(ti, t, t);
         }
     }
 
@@ -285,8 +300,6 @@ static void type_initialize(TypeImpl *ti)
     if (ti->class_init) {
         ti->class_init(ti->class, ti->class_data);
     }
-
-
 }
 
 static void object_init_with_type(Object *obj, TypeImpl *ti)
@@ -458,7 +471,7 @@ Object *object_dynamic_cast_assert(Object *obj, const char *typename,
     Object *inst;
 
     for (i = 0; obj && i < OBJECT_CLASS_CAST_CACHE; i++) {
-        if (obj->class->cast_cache[i] == typename) {
+        if (obj->class->object_cast_cache[i] == typename) {
             goto out;
         }
     }
@@ -475,9 +488,10 @@ Object *object_dynamic_cast_assert(Object *obj, const char *typename,
 
     if (obj && obj == inst) {
         for (i = 1; i < OBJECT_CLASS_CAST_CACHE; i++) {
-            obj->class->cast_cache[i - 1] = obj->class->cast_cache[i];
+            obj->class->object_cast_cache[i - 1] =
+                    obj->class->object_cast_cache[i];
         }
-        obj->class->cast_cache[i - 1] = typename;
+        obj->class->object_cast_cache[i - 1] = typename;
     }
 
 out:
@@ -547,7 +561,7 @@ ObjectClass *object_class_dynamic_cast_assert(ObjectClass *class,
     int i;
 
     for (i = 0; class && i < OBJECT_CLASS_CAST_CACHE; i++) {
-        if (class->cast_cache[i] == typename) {
+        if (class->class_cast_cache[i] == typename) {
             ret = class;
             goto out;
         }
@@ -568,9 +582,9 @@ ObjectClass *object_class_dynamic_cast_assert(ObjectClass *class,
 #ifdef CONFIG_QOM_CAST_DEBUG
     if (class && ret == class) {
         for (i = 1; i < OBJECT_CLASS_CAST_CACHE; i++) {
-            class->cast_cache[i - 1] = class->cast_cache[i];
+            class->class_cast_cache[i - 1] = class->class_cast_cache[i];
         }
-        class->cast_cache[i - 1] = typename;
+        class->class_cast_cache[i - 1] = typename;
     }
 out:
 #endif
@@ -659,7 +673,9 @@ void object_class_foreach(void (*fn)(ObjectClass *klass, void *opaque),
 {
     OCFData data = { fn, implements_type, include_abstract, opaque };
 
+    enumerating_types = true;
     g_hash_table_foreach(type_table_get(), object_class_foreach_tramp, &data);
+    enumerating_types = false;
 }
 
 int object_child_foreach(Object *obj, int (*fn)(Object *child, void *opaque),
diff --git a/savevm.c b/savevm.c
index 3f912ddcf9..a7dbe18a67 100644
--- a/savevm.c
+++ b/savevm.c
@@ -38,7 +38,6 @@
 #include "exec/memory.h"
 #include "qmp-commands.h"
 #include "trace.h"
-#include "qemu/bitops.h"
 #include "qemu/iov.h"
 #include "block/snapshot.h"
 #include "block/qapi.h"
@@ -53,7 +52,7 @@
 #define ARP_OP_REQUEST_REV 0x3
 
 static int announce_self_create(uint8_t *buf,
-				uint8_t *mac_addr)
+                                uint8_t *mac_addr)
 {
     /* Ethernet header. */
     memset(buf, 0xff, 6);         /* destination MAC addr */
@@ -100,411 +99,21 @@ static void qemu_announce_self_once(void *opaque)
         timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
                        50 + (SELF_ANNOUNCE_ROUNDS - count - 1) * 100);
     } else {
-	    timer_del(timer);
-	    timer_free(timer);
+            timer_del(timer);
+            timer_free(timer);
     }
 }
 
 void qemu_announce_self(void)
 {
-	static QEMUTimer *timer;
-	timer = timer_new_ms(QEMU_CLOCK_REALTIME, qemu_announce_self_once, &timer);
-	qemu_announce_self_once(&timer);
+    static QEMUTimer *timer;
+    timer = timer_new_ms(QEMU_CLOCK_REALTIME, qemu_announce_self_once, &timer);
+    qemu_announce_self_once(&timer);
 }
 
 /***********************************************************/
 /* savevm/loadvm support */
 
-#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN(IOV_MAX, 64)
-
-struct QEMUFile {
-    const QEMUFileOps *ops;
-    void *opaque;
-
-    int64_t bytes_xfer;
-    int64_t xfer_limit;
-
-    int64_t pos; /* start of buffer when writing, end of buffer
-                    when reading */
-    int buf_index;
-    int buf_size; /* 0 when writing */
-    uint8_t buf[IO_BUF_SIZE];
-
-    struct iovec iov[MAX_IOV_SIZE];
-    unsigned int iovcnt;
-
-    int last_error;
-};
-
-typedef struct QEMUFileStdio
-{
-    FILE *stdio_file;
-    QEMUFile *file;
-} QEMUFileStdio;
-
-typedef struct QEMUFileSocket
-{
-    int fd;
-    QEMUFile *file;
-} QEMUFileSocket;
-
-static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
-                                    int64_t pos)
-{
-    QEMUFileSocket *s = opaque;
-    ssize_t len;
-    ssize_t size = iov_size(iov, iovcnt);
-
-    len = iov_send(s->fd, iov, iovcnt, 0, size);
-    if (len < size) {
-        len = -socket_error();
-    }
-    return len;
-}
-
-static int socket_get_fd(void *opaque)
-{
-    QEMUFileSocket *s = opaque;
-
-    return s->fd;
-}
-
-static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
-{
-    QEMUFileSocket *s = opaque;
-    ssize_t len;
-
-    for (;;) {
-        len = qemu_recv(s->fd, buf, size, 0);
-        if (len != -1) {
-            break;
-        }
-        if (socket_error() == EAGAIN) {
-            yield_until_fd_readable(s->fd);
-        } else if (socket_error() != EINTR) {
-            break;
-        }
-    }
-
-    if (len == -1) {
-        len = -socket_error();
-    }
-    return len;
-}
-
-static int socket_close(void *opaque)
-{
-    QEMUFileSocket *s = opaque;
-    closesocket(s->fd);
-    g_free(s);
-    return 0;
-}
-
-static int stdio_get_fd(void *opaque)
-{
-    QEMUFileStdio *s = opaque;
-
-    return fileno(s->stdio_file);
-}
-
-static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
-{
-    QEMUFileStdio *s = opaque;
-    return fwrite(buf, 1, size, s->stdio_file);
-}
-
-static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
-{
-    QEMUFileStdio *s = opaque;
-    FILE *fp = s->stdio_file;
-    int bytes;
-
-    for (;;) {
-        clearerr(fp);
-        bytes = fread(buf, 1, size, fp);
-        if (bytes != 0 || !ferror(fp)) {
-            break;
-        }
-        if (errno == EAGAIN) {
-            yield_until_fd_readable(fileno(fp));
-        } else if (errno != EINTR) {
-            break;
-        }
-    }
-    return bytes;
-}
-
-static int stdio_pclose(void *opaque)
-{
-    QEMUFileStdio *s = opaque;
-    int ret;
-    ret = pclose(s->stdio_file);
-    if (ret == -1) {
-        ret = -errno;
-    } else if (!WIFEXITED(ret) || WEXITSTATUS(ret) != 0) {
-        /* close succeeded, but non-zero exit code: */
-        ret = -EIO; /* fake errno value */
-    }
-    g_free(s);
-    return ret;
-}
-
-static int stdio_fclose(void *opaque)
-{
-    QEMUFileStdio *s = opaque;
-    int ret = 0;
-
-    if (s->file->ops->put_buffer || s->file->ops->writev_buffer) {
-        int fd = fileno(s->stdio_file);
-        struct stat st;
-
-        ret = fstat(fd, &st);
-        if (ret == 0 && S_ISREG(st.st_mode)) {
-            /*
-             * If the file handle is a regular file make sure the
-             * data is flushed to disk before signaling success.
-             */
-            ret = fsync(fd);
-            if (ret != 0) {
-                ret = -errno;
-                return ret;
-            }
-        }
-    }
-    if (fclose(s->stdio_file) == EOF) {
-        ret = -errno;
-    }
-    g_free(s);
-    return ret;
-}
-
-static const QEMUFileOps stdio_pipe_read_ops = {
-    .get_fd =     stdio_get_fd,
-    .get_buffer = stdio_get_buffer,
-    .close =      stdio_pclose
-};
-
-static const QEMUFileOps stdio_pipe_write_ops = {
-    .get_fd =     stdio_get_fd,
-    .put_buffer = stdio_put_buffer,
-    .close =      stdio_pclose
-};
-
-QEMUFile *qemu_popen_cmd(const char *command, const char *mode)
-{
-    FILE *stdio_file;
-    QEMUFileStdio *s;
-
-    if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || mode[1] != 0) {
-        fprintf(stderr, "qemu_popen: Argument validity check failed\n");
-        return NULL;
-    }
-
-    stdio_file = popen(command, mode);
-    if (stdio_file == NULL) {
-        return NULL;
-    }
-
-    s = g_malloc0(sizeof(QEMUFileStdio));
-
-    s->stdio_file = stdio_file;
-
-    if(mode[0] == 'r') {
-        s->file = qemu_fopen_ops(s, &stdio_pipe_read_ops);
-    } else {
-        s->file = qemu_fopen_ops(s, &stdio_pipe_write_ops);
-    }
-    return s->file;
-}
-
-static const QEMUFileOps stdio_file_read_ops = {
-    .get_fd =     stdio_get_fd,
-    .get_buffer = stdio_get_buffer,
-    .close =      stdio_fclose
-};
-
-static const QEMUFileOps stdio_file_write_ops = {
-    .get_fd =     stdio_get_fd,
-    .put_buffer = stdio_put_buffer,
-    .close =      stdio_fclose
-};
-
-static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
-                                  int64_t pos)
-{
-    QEMUFileSocket *s = opaque;
-    ssize_t len, offset;
-    ssize_t size = iov_size(iov, iovcnt);
-    ssize_t total = 0;
-
-    assert(iovcnt > 0);
-    offset = 0;
-    while (size > 0) {
-        /* Find the next start position; skip all full-sized vector elements  */
-        while (offset >= iov[0].iov_len) {
-            offset -= iov[0].iov_len;
-            iov++, iovcnt--;
-        }
-
-        /* skip `offset' bytes from the (now) first element, undo it on exit */
-        assert(iovcnt > 0);
-        iov[0].iov_base += offset;
-        iov[0].iov_len -= offset;
-
-        do {
-            len = writev(s->fd, iov, iovcnt);
-        } while (len == -1 && errno == EINTR);
-        if (len == -1) {
-            return -errno;
-        }
-
-        /* Undo the changes above */
-        iov[0].iov_base -= offset;
-        iov[0].iov_len += offset;
-
-        /* Prepare for the next iteration */
-        offset += len;
-        total += len;
-        size -= len;
-    }
-
-    return total;
-}
-
-static int unix_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
-{
-    QEMUFileSocket *s = opaque;
-    ssize_t len;
-
-    for (;;) {
-        len = read(s->fd, buf, size);
-        if (len != -1) {
-            break;
-        }
-        if (errno == EAGAIN) {
-            yield_until_fd_readable(s->fd);
-        } else if (errno != EINTR) {
-            break;
-        }
-    }
-
-    if (len == -1) {
-        len = -errno;
-    }
-    return len;
-}
-
-static int unix_close(void *opaque)
-{
-    QEMUFileSocket *s = opaque;
-    close(s->fd);
-    g_free(s);
-    return 0;
-}
-
-static const QEMUFileOps unix_read_ops = {
-    .get_fd =     socket_get_fd,
-    .get_buffer = unix_get_buffer,
-    .close =      unix_close
-};
-
-static const QEMUFileOps unix_write_ops = {
-    .get_fd =     socket_get_fd,
-    .writev_buffer = unix_writev_buffer,
-    .close =      unix_close
-};
-
-QEMUFile *qemu_fdopen(int fd, const char *mode)
-{
-    QEMUFileSocket *s;
-
-    if (mode == NULL ||
-	(mode[0] != 'r' && mode[0] != 'w') ||
-	mode[1] != 'b' || mode[2] != 0) {
-        fprintf(stderr, "qemu_fdopen: Argument validity check failed\n");
-        return NULL;
-    }
-
-    s = g_malloc0(sizeof(QEMUFileSocket));
-    s->fd = fd;
-
-    if(mode[0] == 'r') {
-        s->file = qemu_fopen_ops(s, &unix_read_ops);
-    } else {
-        s->file = qemu_fopen_ops(s, &unix_write_ops);
-    }
-    return s->file;
-}
-
-static const QEMUFileOps socket_read_ops = {
-    .get_fd =     socket_get_fd,
-    .get_buffer = socket_get_buffer,
-    .close =      socket_close
-};
-
-static const QEMUFileOps socket_write_ops = {
-    .get_fd =     socket_get_fd,
-    .writev_buffer = socket_writev_buffer,
-    .close =      socket_close
-};
-
-bool qemu_file_mode_is_not_valid(const char *mode)
-{
-    if (mode == NULL ||
-        (mode[0] != 'r' && mode[0] != 'w') ||
-        mode[1] != 'b' || mode[2] != 0) {
-        fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
-        return true;
-    }
-
-    return false;
-}
-
-QEMUFile *qemu_fopen_socket(int fd, const char *mode)
-{
-    QEMUFileSocket *s;
-
-    if (qemu_file_mode_is_not_valid(mode)) {
-        return NULL;
-    }
-
-    s = g_malloc0(sizeof(QEMUFileSocket));
-    s->fd = fd;
-    if (mode[0] == 'w') {
-        qemu_set_block(s->fd);
-        s->file = qemu_fopen_ops(s, &socket_write_ops);
-    } else {
-        s->file = qemu_fopen_ops(s, &socket_read_ops);
-    }
-    return s->file;
-}
-
-QEMUFile *qemu_fopen(const char *filename, const char *mode)
-{
-    QEMUFileStdio *s;
-
-    if (qemu_file_mode_is_not_valid(mode)) {
-        return NULL;
-    }
-
-    s = g_malloc0(sizeof(QEMUFileStdio));
-
-    s->stdio_file = fopen(filename, mode);
-    if (!s->stdio_file)
-        goto fail;
-    
-    if(mode[0] == 'w') {
-        s->file = qemu_fopen_ops(s, &stdio_file_write_ops);
-    } else {
-        s->file = qemu_fopen_ops(s, &stdio_file_read_ops);
-    }
-    return s->file;
-fail:
-    g_free(s);
-    return NULL;
-}
-
 static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
                                    int64_t pos)
 {
@@ -550,441 +159,16 @@ static const QEMUFileOps bdrv_write_ops = {
 
 static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
 {
-    if (is_writable)
+    if (is_writable) {
         return qemu_fopen_ops(bs, &bdrv_write_ops);
-    return qemu_fopen_ops(bs, &bdrv_read_ops);
-}
-
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
-{
-    QEMUFile *f;
-
-    f = g_malloc0(sizeof(QEMUFile));
-
-    f->opaque = opaque;
-    f->ops = ops;
-    return f;
-}
-
-/*
- * Get last error for stream f
- *
- * Return negative error value if there has been an error on previous
- * operations, return 0 if no error happened.
- *
- */
-int qemu_file_get_error(QEMUFile *f)
-{
-    return f->last_error;
-}
-
-static void qemu_file_set_error(QEMUFile *f, int ret)
-{
-    if (f->last_error == 0) {
-        f->last_error = ret;
     }
+    return qemu_fopen_ops(bs, &bdrv_read_ops);
 }
 
-static inline bool qemu_file_is_writable(QEMUFile *f)
-{
-    return f->ops->writev_buffer || f->ops->put_buffer;
-}
 
-/**
- * Flushes QEMUFile buffer
- *
- * If there is writev_buffer QEMUFileOps it uses it otherwise uses
- * put_buffer ops.
+/* QEMUFile timer support.
+ * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
  */
-void qemu_fflush(QEMUFile *f)
-{
-    ssize_t ret = 0;
-
-    if (!qemu_file_is_writable(f)) {
-        return;
-    }
-
-    if (f->ops->writev_buffer) {
-        if (f->iovcnt > 0) {
-            ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
-        }
-    } else {
-        if (f->buf_index > 0) {
-            ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index);
-        }
-    }
-    if (ret >= 0) {
-        f->pos += ret;
-    }
-    f->buf_index = 0;
-    f->iovcnt = 0;
-    if (ret < 0) {
-        qemu_file_set_error(f, ret);
-    }
-}
-
-void ram_control_before_iterate(QEMUFile *f, uint64_t flags)
-{
-    int ret = 0;
-
-    if (f->ops->before_ram_iterate) {
-        ret = f->ops->before_ram_iterate(f, f->opaque, flags);
-        if (ret < 0) {
-            qemu_file_set_error(f, ret);
-        }
-    }
-}
-
-void ram_control_after_iterate(QEMUFile *f, uint64_t flags)
-{
-    int ret = 0;
-
-    if (f->ops->after_ram_iterate) {
-        ret = f->ops->after_ram_iterate(f, f->opaque, flags);
-        if (ret < 0) {
-            qemu_file_set_error(f, ret);
-        }
-    }
-}
-
-void ram_control_load_hook(QEMUFile *f, uint64_t flags)
-{
-    int ret = -EINVAL;
-
-    if (f->ops->hook_ram_load) {
-        ret = f->ops->hook_ram_load(f, f->opaque, flags);
-        if (ret < 0) {
-            qemu_file_set_error(f, ret);
-        }
-    } else {
-        qemu_file_set_error(f, ret);
-    }
-}
-
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
-                         ram_addr_t offset, size_t size, int *bytes_sent)
-{
-    if (f->ops->save_page) {
-        int ret = f->ops->save_page(f, f->opaque, block_offset,
-                                    offset, size, bytes_sent);
-
-        if (ret != RAM_SAVE_CONTROL_DELAYED) {
-            if (bytes_sent && *bytes_sent > 0) {
-                qemu_update_position(f, *bytes_sent);
-            } else if (ret < 0) {
-                qemu_file_set_error(f, ret);
-            }
-        }
-
-        return ret;
-    }
-
-    return RAM_SAVE_CONTROL_NOT_SUPP;
-}
-
-static void qemu_fill_buffer(QEMUFile *f)
-{
-    int len;
-    int pending;
-
-    assert(!qemu_file_is_writable(f));
-
-    pending = f->buf_size - f->buf_index;
-    if (pending > 0) {
-        memmove(f->buf, f->buf + f->buf_index, pending);
-    }
-    f->buf_index = 0;
-    f->buf_size = pending;
-
-    len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos,
-                        IO_BUF_SIZE - pending);
-    if (len > 0) {
-        f->buf_size += len;
-        f->pos += len;
-    } else if (len == 0) {
-        qemu_file_set_error(f, -EIO);
-    } else if (len != -EAGAIN)
-        qemu_file_set_error(f, len);
-}
-
-int qemu_get_fd(QEMUFile *f)
-{
-    if (f->ops->get_fd) {
-        return f->ops->get_fd(f->opaque);
-    }
-    return -1;
-}
-
-void qemu_update_position(QEMUFile *f, size_t size)
-{
-    f->pos += size;
-}
-
-/** Closes the file
- *
- * Returns negative error value if any error happened on previous operations or
- * while closing the file. Returns 0 or positive number on success.
- *
- * The meaning of return value on success depends on the specific backend
- * being used.
- */
-int qemu_fclose(QEMUFile *f)
-{
-    int ret;
-    qemu_fflush(f);
-    ret = qemu_file_get_error(f);
-
-    if (f->ops->close) {
-        int ret2 = f->ops->close(f->opaque);
-        if (ret >= 0) {
-            ret = ret2;
-        }
-    }
-    /* If any error was spotted before closing, we should report it
-     * instead of the close() return value.
-     */
-    if (f->last_error) {
-        ret = f->last_error;
-    }
-    g_free(f);
-    return ret;
-}
-
-static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size)
-{
-    /* check for adjacent buffer and coalesce them */
-    if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
-        f->iov[f->iovcnt - 1].iov_len) {
-        f->iov[f->iovcnt - 1].iov_len += size;
-    } else {
-        f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
-        f->iov[f->iovcnt++].iov_len = size;
-    }
-
-    if (f->iovcnt >= MAX_IOV_SIZE) {
-        qemu_fflush(f);
-    }
-}
-
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size)
-{
-    if (!f->ops->writev_buffer) {
-        qemu_put_buffer(f, buf, size);
-        return;
-    }
-
-    if (f->last_error) {
-        return;
-    }
-
-    f->bytes_xfer += size;
-    add_to_iovec(f, buf, size);
-}
-
-void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
-{
-    int l;
-
-    if (f->last_error) {
-        return;
-    }
-
-    while (size > 0) {
-        l = IO_BUF_SIZE - f->buf_index;
-        if (l > size)
-            l = size;
-        memcpy(f->buf + f->buf_index, buf, l);
-        f->bytes_xfer += l;
-        if (f->ops->writev_buffer) {
-            add_to_iovec(f, f->buf + f->buf_index, l);
-        }
-        f->buf_index += l;
-        if (f->buf_index == IO_BUF_SIZE) {
-            qemu_fflush(f);
-        }
-        if (qemu_file_get_error(f)) {
-            break;
-        }
-        buf += l;
-        size -= l;
-    }
-}
-
-void qemu_put_byte(QEMUFile *f, int v)
-{
-    if (f->last_error) {
-        return;
-    }
-
-    f->buf[f->buf_index] = v;
-    f->bytes_xfer++;
-    if (f->ops->writev_buffer) {
-        add_to_iovec(f, f->buf + f->buf_index, 1);
-    }
-    f->buf_index++;
-    if (f->buf_index == IO_BUF_SIZE) {
-        qemu_fflush(f);
-    }
-}
-
-static void qemu_file_skip(QEMUFile *f, int size)
-{
-    if (f->buf_index + size <= f->buf_size) {
-        f->buf_index += size;
-    }
-}
-
-static int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
-{
-    int pending;
-    int index;
-
-    assert(!qemu_file_is_writable(f));
-
-    index = f->buf_index + offset;
-    pending = f->buf_size - index;
-    if (pending < size) {
-        qemu_fill_buffer(f);
-        index = f->buf_index + offset;
-        pending = f->buf_size - index;
-    }
-
-    if (pending <= 0) {
-        return 0;
-    }
-    if (size > pending) {
-        size = pending;
-    }
-
-    memcpy(buf, f->buf + index, size);
-    return size;
-}
-
-int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size)
-{
-    int pending = size;
-    int done = 0;
-
-    while (pending > 0) {
-        int res;
-
-        res = qemu_peek_buffer(f, buf, pending, 0);
-        if (res == 0) {
-            return done;
-        }
-        qemu_file_skip(f, res);
-        buf += res;
-        pending -= res;
-        done += res;
-    }
-    return done;
-}
-
-static int qemu_peek_byte(QEMUFile *f, int offset)
-{
-    int index = f->buf_index + offset;
-
-    assert(!qemu_file_is_writable(f));
-
-    if (index >= f->buf_size) {
-        qemu_fill_buffer(f);
-        index = f->buf_index + offset;
-        if (index >= f->buf_size) {
-            return 0;
-        }
-    }
-    return f->buf[index];
-}
-
-int qemu_get_byte(QEMUFile *f)
-{
-    int result;
-
-    result = qemu_peek_byte(f, 0);
-    qemu_file_skip(f, 1);
-    return result;
-}
-
-int64_t qemu_ftell(QEMUFile *f)
-{
-    qemu_fflush(f);
-    return f->pos;
-}
-
-int qemu_file_rate_limit(QEMUFile *f)
-{
-    if (qemu_file_get_error(f)) {
-        return 1;
-    }
-    if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) {
-        return 1;
-    }
-    return 0;
-}
-
-int64_t qemu_file_get_rate_limit(QEMUFile *f)
-{
-    return f->xfer_limit;
-}
-
-void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit)
-{
-    f->xfer_limit = limit;
-}
-
-void qemu_file_reset_rate_limit(QEMUFile *f)
-{
-    f->bytes_xfer = 0;
-}
-
-void qemu_put_be16(QEMUFile *f, unsigned int v)
-{
-    qemu_put_byte(f, v >> 8);
-    qemu_put_byte(f, v);
-}
-
-void qemu_put_be32(QEMUFile *f, unsigned int v)
-{
-    qemu_put_byte(f, v >> 24);
-    qemu_put_byte(f, v >> 16);
-    qemu_put_byte(f, v >> 8);
-    qemu_put_byte(f, v);
-}
-
-void qemu_put_be64(QEMUFile *f, uint64_t v)
-{
-    qemu_put_be32(f, v >> 32);
-    qemu_put_be32(f, v);
-}
-
-unsigned int qemu_get_be16(QEMUFile *f)
-{
-    unsigned int v;
-    v = qemu_get_byte(f) << 8;
-    v |= qemu_get_byte(f);
-    return v;
-}
-
-unsigned int qemu_get_be32(QEMUFile *f)
-{
-    unsigned int v;
-    v = qemu_get_byte(f) << 24;
-    v |= qemu_get_byte(f) << 16;
-    v |= qemu_get_byte(f) << 8;
-    v |= qemu_get_byte(f);
-    return v;
-}
-
-uint64_t qemu_get_be64(QEMUFile *f)
-{
-    uint64_t v;
-    v = (uint64_t)qemu_get_be32(f) << 32;
-    v |= qemu_get_be32(f);
-    return v;
-}
-
-
-/* timer */
 
 void timer_put(QEMUFile *f, QEMUTimer *ts)
 {
@@ -1007,341 +191,9 @@ void timer_get(QEMUFile *f, QEMUTimer *ts)
 }
 
 
-/* bool */
-
-static int get_bool(QEMUFile *f, void *pv, size_t size)
-{
-    bool *v = pv;
-    *v = qemu_get_byte(f);
-    return 0;
-}
-
-static void put_bool(QEMUFile *f, void *pv, size_t size)
-{
-    bool *v = pv;
-    qemu_put_byte(f, *v);
-}
-
-const VMStateInfo vmstate_info_bool = {
-    .name = "bool",
-    .get  = get_bool,
-    .put  = put_bool,
-};
-
-/* 8 bit int */
-
-static int get_int8(QEMUFile *f, void *pv, size_t size)
-{
-    int8_t *v = pv;
-    qemu_get_s8s(f, v);
-    return 0;
-}
-
-static void put_int8(QEMUFile *f, void *pv, size_t size)
-{
-    int8_t *v = pv;
-    qemu_put_s8s(f, v);
-}
-
-const VMStateInfo vmstate_info_int8 = {
-    .name = "int8",
-    .get  = get_int8,
-    .put  = put_int8,
-};
-
-/* 16 bit int */
-
-static int get_int16(QEMUFile *f, void *pv, size_t size)
-{
-    int16_t *v = pv;
-    qemu_get_sbe16s(f, v);
-    return 0;
-}
-
-static void put_int16(QEMUFile *f, void *pv, size_t size)
-{
-    int16_t *v = pv;
-    qemu_put_sbe16s(f, v);
-}
-
-const VMStateInfo vmstate_info_int16 = {
-    .name = "int16",
-    .get  = get_int16,
-    .put  = put_int16,
-};
-
-/* 32 bit int */
-
-static int get_int32(QEMUFile *f, void *pv, size_t size)
-{
-    int32_t *v = pv;
-    qemu_get_sbe32s(f, v);
-    return 0;
-}
-
-static void put_int32(QEMUFile *f, void *pv, size_t size)
-{
-    int32_t *v = pv;
-    qemu_put_sbe32s(f, v);
-}
-
-const VMStateInfo vmstate_info_int32 = {
-    .name = "int32",
-    .get  = get_int32,
-    .put  = put_int32,
-};
-
-/* 32 bit int. See that the received value is the same than the one
-   in the field */
-
-static int get_int32_equal(QEMUFile *f, void *pv, size_t size)
-{
-    int32_t *v = pv;
-    int32_t v2;
-    qemu_get_sbe32s(f, &v2);
-
-    if (*v == v2)
-        return 0;
-    return -EINVAL;
-}
-
-const VMStateInfo vmstate_info_int32_equal = {
-    .name = "int32 equal",
-    .get  = get_int32_equal,
-    .put  = put_int32,
-};
-
-/* 32 bit int. See that the received value is the less or the same
-   than the one in the field */
-
-static int get_int32_le(QEMUFile *f, void *pv, size_t size)
-{
-    int32_t *old = pv;
-    int32_t new;
-    qemu_get_sbe32s(f, &new);
-
-    if (*old <= new)
-        return 0;
-    return -EINVAL;
-}
-
-const VMStateInfo vmstate_info_int32_le = {
-    .name = "int32 equal",
-    .get  = get_int32_le,
-    .put  = put_int32,
-};
-
-/* 64 bit int */
-
-static int get_int64(QEMUFile *f, void *pv, size_t size)
-{
-    int64_t *v = pv;
-    qemu_get_sbe64s(f, v);
-    return 0;
-}
-
-static void put_int64(QEMUFile *f, void *pv, size_t size)
-{
-    int64_t *v = pv;
-    qemu_put_sbe64s(f, v);
-}
-
-const VMStateInfo vmstate_info_int64 = {
-    .name = "int64",
-    .get  = get_int64,
-    .put  = put_int64,
-};
-
-/* 8 bit unsigned int */
-
-static int get_uint8(QEMUFile *f, void *pv, size_t size)
-{
-    uint8_t *v = pv;
-    qemu_get_8s(f, v);
-    return 0;
-}
-
-static void put_uint8(QEMUFile *f, void *pv, size_t size)
-{
-    uint8_t *v = pv;
-    qemu_put_8s(f, v);
-}
-
-const VMStateInfo vmstate_info_uint8 = {
-    .name = "uint8",
-    .get  = get_uint8,
-    .put  = put_uint8,
-};
-
-/* 16 bit unsigned int */
-
-static int get_uint16(QEMUFile *f, void *pv, size_t size)
-{
-    uint16_t *v = pv;
-    qemu_get_be16s(f, v);
-    return 0;
-}
-
-static void put_uint16(QEMUFile *f, void *pv, size_t size)
-{
-    uint16_t *v = pv;
-    qemu_put_be16s(f, v);
-}
-
-const VMStateInfo vmstate_info_uint16 = {
-    .name = "uint16",
-    .get  = get_uint16,
-    .put  = put_uint16,
-};
-
-/* 32 bit unsigned int */
-
-static int get_uint32(QEMUFile *f, void *pv, size_t size)
-{
-    uint32_t *v = pv;
-    qemu_get_be32s(f, v);
-    return 0;
-}
-
-static void put_uint32(QEMUFile *f, void *pv, size_t size)
-{
-    uint32_t *v = pv;
-    qemu_put_be32s(f, v);
-}
-
-const VMStateInfo vmstate_info_uint32 = {
-    .name = "uint32",
-    .get  = get_uint32,
-    .put  = put_uint32,
-};
-
-/* 32 bit uint. See that the received value is the same than the one
-   in the field */
-
-static int get_uint32_equal(QEMUFile *f, void *pv, size_t size)
-{
-    uint32_t *v = pv;
-    uint32_t v2;
-    qemu_get_be32s(f, &v2);
-
-    if (*v == v2) {
-        return 0;
-    }
-    return -EINVAL;
-}
-
-const VMStateInfo vmstate_info_uint32_equal = {
-    .name = "uint32 equal",
-    .get  = get_uint32_equal,
-    .put  = put_uint32,
-};
-
-/* 64 bit unsigned int */
-
-static int get_uint64(QEMUFile *f, void *pv, size_t size)
-{
-    uint64_t *v = pv;
-    qemu_get_be64s(f, v);
-    return 0;
-}
-
-static void put_uint64(QEMUFile *f, void *pv, size_t size)
-{
-    uint64_t *v = pv;
-    qemu_put_be64s(f, v);
-}
-
-const VMStateInfo vmstate_info_uint64 = {
-    .name = "uint64",
-    .get  = get_uint64,
-    .put  = put_uint64,
-};
-
-/* 64 bit unsigned int. See that the received value is the same than the one
-   in the field */
-
-static int get_uint64_equal(QEMUFile *f, void *pv, size_t size)
-{
-    uint64_t *v = pv;
-    uint64_t v2;
-    qemu_get_be64s(f, &v2);
-
-    if (*v == v2) {
-        return 0;
-    }
-    return -EINVAL;
-}
-
-const VMStateInfo vmstate_info_uint64_equal = {
-    .name = "int64 equal",
-    .get  = get_uint64_equal,
-    .put  = put_uint64,
-};
-
-/* 8 bit int. See that the received value is the same than the one
-   in the field */
-
-static int get_uint8_equal(QEMUFile *f, void *pv, size_t size)
-{
-    uint8_t *v = pv;
-    uint8_t v2;
-    qemu_get_8s(f, &v2);
-
-    if (*v == v2)
-        return 0;
-    return -EINVAL;
-}
-
-const VMStateInfo vmstate_info_uint8_equal = {
-    .name = "uint8 equal",
-    .get  = get_uint8_equal,
-    .put  = put_uint8,
-};
-
-/* 16 bit unsigned int int. See that the received value is the same than the one
-   in the field */
-
-static int get_uint16_equal(QEMUFile *f, void *pv, size_t size)
-{
-    uint16_t *v = pv;
-    uint16_t v2;
-    qemu_get_be16s(f, &v2);
-
-    if (*v == v2)
-        return 0;
-    return -EINVAL;
-}
-
-const VMStateInfo vmstate_info_uint16_equal = {
-    .name = "uint16 equal",
-    .get  = get_uint16_equal,
-    .put  = put_uint16,
-};
-
-/* floating point */
-
-static int get_float64(QEMUFile *f, void *pv, size_t size)
-{
-    float64 *v = pv;
-
-    *v = make_float64(qemu_get_be64(f));
-    return 0;
-}
-
-static void put_float64(QEMUFile *f, void *pv, size_t size)
-{
-    uint64_t *v = pv;
-
-    qemu_put_be64(f, float64_val(*v));
-}
-
-const VMStateInfo vmstate_info_float64 = {
-    .name = "float64",
-    .get  = get_float64,
-    .put  = put_float64,
-};
-
-/* timers  */
+/* VMState timer support.
+ * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
+ */
 
 static int get_timer(QEMUFile *f, void *pv, size_t size)
 {
@@ -1362,100 +214,6 @@ const VMStateInfo vmstate_info_timer = {
     .put  = put_timer,
 };
 
-/* uint8_t buffers */
-
-static int get_buffer(QEMUFile *f, void *pv, size_t size)
-{
-    uint8_t *v = pv;
-    qemu_get_buffer(f, v, size);
-    return 0;
-}
-
-static void put_buffer(QEMUFile *f, void *pv, size_t size)
-{
-    uint8_t *v = pv;
-    qemu_put_buffer(f, v, size);
-}
-
-const VMStateInfo vmstate_info_buffer = {
-    .name = "buffer",
-    .get  = get_buffer,
-    .put  = put_buffer,
-};
-
-/* unused buffers: space that was used for some fields that are
-   not useful anymore */
-
-static int get_unused_buffer(QEMUFile *f, void *pv, size_t size)
-{
-    uint8_t buf[1024];
-    int block_len;
-
-    while (size > 0) {
-        block_len = MIN(sizeof(buf), size);
-        size -= block_len;
-        qemu_get_buffer(f, buf, block_len);
-    }
-   return 0;
-}
-
-static void put_unused_buffer(QEMUFile *f, void *pv, size_t size)
-{
-    static const uint8_t buf[1024];
-    int block_len;
-
-    while (size > 0) {
-        block_len = MIN(sizeof(buf), size);
-        size -= block_len;
-        qemu_put_buffer(f, buf, block_len);
-    }
-}
-
-const VMStateInfo vmstate_info_unused_buffer = {
-    .name = "unused_buffer",
-    .get  = get_unused_buffer,
-    .put  = put_unused_buffer,
-};
-
-/* bitmaps (as defined by bitmap.h). Note that size here is the size
- * of the bitmap in bits. The on-the-wire format of a bitmap is 64
- * bit words with the bits in big endian order. The in-memory format
- * is an array of 'unsigned long', which may be either 32 or 64 bits.
- */
-/* This is the number of 64 bit words sent over the wire */
-#define BITS_TO_U64S(nr) DIV_ROUND_UP(nr, 64)
-static int get_bitmap(QEMUFile *f, void *pv, size_t size)
-{
-    unsigned long *bmp = pv;
-    int i, idx = 0;
-    for (i = 0; i < BITS_TO_U64S(size); i++) {
-        uint64_t w = qemu_get_be64(f);
-        bmp[idx++] = w;
-        if (sizeof(unsigned long) == 4 && idx < BITS_TO_LONGS(size)) {
-            bmp[idx++] = w >> 32;
-        }
-    }
-    return 0;
-}
-
-static void put_bitmap(QEMUFile *f, void *pv, size_t size)
-{
-    unsigned long *bmp = pv;
-    int i, idx = 0;
-    for (i = 0; i < BITS_TO_U64S(size); i++) {
-        uint64_t w = bmp[idx++];
-        if (sizeof(unsigned long) == 4 && idx < BITS_TO_LONGS(size)) {
-            w |= ((uint64_t)bmp[idx++]) << 32;
-        }
-        qemu_put_be64(f, w);
-    }
-}
-
-const VMStateInfo vmstate_info_bitmap = {
-    .name = "bitmap",
-    .get = get_bitmap,
-    .put = put_bitmap,
-};
 
 typedef struct CompatEntry {
     char idstr[256];
@@ -1502,8 +260,9 @@ static int calculate_compat_instance_id(const char *idstr)
     int instance_id = 0;
 
     QTAILQ_FOREACH(se, &savevm_handlers, entry) {
-        if (!se->compat)
+        if (!se->compat) {
             continue;
+        }
 
         if (strcmp(idstr, se->compat->idstr) == 0
             && instance_id <= se->compat->instance_id) {
@@ -1668,142 +427,6 @@ void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
     }
 }
 
-static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
-                                    void *opaque);
-static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
-                                   void *opaque);
-
-int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
-                       void *opaque, int version_id)
-{
-    VMStateField *field = vmsd->fields;
-    int ret;
-
-    if (version_id > vmsd->version_id) {
-        return -EINVAL;
-    }
-    if (version_id < vmsd->minimum_version_id_old) {
-        return -EINVAL;
-    }
-    if  (version_id < vmsd->minimum_version_id) {
-        return vmsd->load_state_old(f, opaque, version_id);
-    }
-    if (vmsd->pre_load) {
-        int ret = vmsd->pre_load(opaque);
-        if (ret)
-            return ret;
-    }
-    while(field->name) {
-        if ((field->field_exists &&
-             field->field_exists(opaque, version_id)) ||
-            (!field->field_exists &&
-             field->version_id <= version_id)) {
-            void *base_addr = opaque + field->offset;
-            int i, n_elems = 1;
-            int size = field->size;
-
-            if (field->flags & VMS_VBUFFER) {
-                size = *(int32_t *)(opaque+field->size_offset);
-                if (field->flags & VMS_MULTIPLY) {
-                    size *= field->size;
-                }
-            }
-            if (field->flags & VMS_ARRAY) {
-                n_elems = field->num;
-            } else if (field->flags & VMS_VARRAY_INT32) {
-                n_elems = *(int32_t *)(opaque+field->num_offset);
-            } else if (field->flags & VMS_VARRAY_UINT32) {
-                n_elems = *(uint32_t *)(opaque+field->num_offset);
-            } else if (field->flags & VMS_VARRAY_UINT16) {
-                n_elems = *(uint16_t *)(opaque+field->num_offset);
-            } else if (field->flags & VMS_VARRAY_UINT8) {
-                n_elems = *(uint8_t *)(opaque+field->num_offset);
-            }
-            if (field->flags & VMS_POINTER) {
-                base_addr = *(void **)base_addr + field->start;
-            }
-            for (i = 0; i < n_elems; i++) {
-                void *addr = base_addr + size * i;
-
-                if (field->flags & VMS_ARRAY_OF_POINTER) {
-                    addr = *(void **)addr;
-                }
-                if (field->flags & VMS_STRUCT) {
-                    ret = vmstate_load_state(f, field->vmsd, addr, field->vmsd->version_id);
-                } else {
-                    ret = field->info->get(f, addr, size);
-
-                }
-                if (ret < 0) {
-                    return ret;
-                }
-            }
-        }
-        field++;
-    }
-    ret = vmstate_subsection_load(f, vmsd, opaque);
-    if (ret != 0) {
-        return ret;
-    }
-    if (vmsd->post_load) {
-        return vmsd->post_load(opaque, version_id);
-    }
-    return 0;
-}
-
-void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
-                        void *opaque)
-{
-    VMStateField *field = vmsd->fields;
-
-    if (vmsd->pre_save) {
-        vmsd->pre_save(opaque);
-    }
-    while(field->name) {
-        if (!field->field_exists ||
-            field->field_exists(opaque, vmsd->version_id)) {
-            void *base_addr = opaque + field->offset;
-            int i, n_elems = 1;
-            int size = field->size;
-
-            if (field->flags & VMS_VBUFFER) {
-                size = *(int32_t *)(opaque+field->size_offset);
-                if (field->flags & VMS_MULTIPLY) {
-                    size *= field->size;
-                }
-            }
-            if (field->flags & VMS_ARRAY) {
-                n_elems = field->num;
-            } else if (field->flags & VMS_VARRAY_INT32) {
-                n_elems = *(int32_t *)(opaque+field->num_offset);
-            } else if (field->flags & VMS_VARRAY_UINT32) {
-                n_elems = *(uint32_t *)(opaque+field->num_offset);
-            } else if (field->flags & VMS_VARRAY_UINT16) {
-                n_elems = *(uint16_t *)(opaque+field->num_offset);
-            } else if (field->flags & VMS_VARRAY_UINT8) {
-                n_elems = *(uint8_t *)(opaque+field->num_offset);
-            }
-            if (field->flags & VMS_POINTER) {
-                base_addr = *(void **)base_addr + field->start;
-            }
-            for (i = 0; i < n_elems; i++) {
-                void *addr = base_addr + size * i;
-
-                if (field->flags & VMS_ARRAY_OF_POINTER) {
-                    addr = *(void **)addr;
-                }
-                if (field->flags & VMS_STRUCT) {
-                    vmstate_save_state(f, field->vmsd, addr);
-                } else {
-                    field->info->put(f, addr, size);
-                }
-            }
-        }
-        field++;
-    }
-    vmstate_subsection_save(f, vmsd, opaque);
-}
-
 static int vmstate_load(QEMUFile *f, SaveStateEntry *se, int version_id)
 {
     if (!se->vmsd) {         /* Old style */
@@ -1818,20 +441,9 @@ static void vmstate_save(QEMUFile *f, SaveStateEntry *se)
         se->ops->save_state(f, se->opaque);
         return;
     }
-    vmstate_save_state(f,se->vmsd, se->opaque);
+    vmstate_save_state(f, se->vmsd, se->opaque);
 }
 
-#define QEMU_VM_FILE_MAGIC           0x5145564d
-#define QEMU_VM_FILE_VERSION_COMPAT  0x00000002
-#define QEMU_VM_FILE_VERSION         0x00000003
-
-#define QEMU_VM_EOF                  0x00
-#define QEMU_VM_SECTION_START        0x01
-#define QEMU_VM_SECTION_PART         0x02
-#define QEMU_VM_SECTION_END          0x03
-#define QEMU_VM_SECTION_FULL         0x04
-#define QEMU_VM_SUBSECTION           0x05
-
 bool qemu_savevm_state_blocked(Error **errp)
 {
     SaveStateEntry *se;
@@ -1857,7 +469,7 @@ void qemu_savevm_state_begin(QEMUFile *f,
         }
         se->ops->set_params(params, se->opaque);
     }
-    
+
     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
     qemu_put_be32(f, QEMU_VM_FILE_VERSION);
 
@@ -1970,7 +582,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
         int len;
 
         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
-	    continue;
+            continue;
         }
         trace_savevm_section_start();
         /* Section type */
@@ -2115,79 +727,6 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id)
     return NULL;
 }
 
-static const VMStateDescription *vmstate_get_subsection(const VMStateSubsection *sub, char *idstr)
-{
-    while(sub && sub->needed) {
-        if (strcmp(idstr, sub->vmsd->name) == 0) {
-            return sub->vmsd;
-        }
-        sub++;
-    }
-    return NULL;
-}
-
-static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
-                                   void *opaque)
-{
-    while (qemu_peek_byte(f, 0) == QEMU_VM_SUBSECTION) {
-        char idstr[256];
-        int ret;
-        uint8_t version_id, len, size;
-        const VMStateDescription *sub_vmsd;
-
-        len = qemu_peek_byte(f, 1);
-        if (len < strlen(vmsd->name) + 1) {
-            /* subsection name has be be "section_name/a" */
-            return 0;
-        }
-        size = qemu_peek_buffer(f, (uint8_t *)idstr, len, 2);
-        if (size != len) {
-            return 0;
-        }
-        idstr[size] = 0;
-
-        if (strncmp(vmsd->name, idstr, strlen(vmsd->name)) != 0) {
-            /* it don't have a valid subsection name */
-            return 0;
-        }
-        sub_vmsd = vmstate_get_subsection(vmsd->subsections, idstr);
-        if (sub_vmsd == NULL) {
-            return -ENOENT;
-        }
-        qemu_file_skip(f, 1); /* subsection */
-        qemu_file_skip(f, 1); /* len */
-        qemu_file_skip(f, len); /* idstr */
-        version_id = qemu_get_be32(f);
-
-        ret = vmstate_load_state(f, sub_vmsd, opaque, version_id);
-        if (ret) {
-            return ret;
-        }
-    }
-    return 0;
-}
-
-static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
-                                    void *opaque)
-{
-    const VMStateSubsection *sub = vmsd->subsections;
-
-    while (sub && sub->needed) {
-        if (sub->needed(opaque)) {
-            const VMStateDescription *vmsd = sub->vmsd;
-            uint8_t len;
-
-            qemu_put_byte(f, QEMU_VM_SUBSECTION);
-            len = strlen(vmsd->name);
-            qemu_put_byte(f, len);
-            qemu_put_buffer(f, (uint8_t *)vmsd->name, len);
-            qemu_put_be32(f, vmsd->version_id);
-            vmstate_save_state(f, vmsd, opaque);
-        }
-        sub++;
-    }
-}
-
 typedef struct LoadStateEntry {
     QLIST_ENTRY(LoadStateEntry) entry;
     SaveStateEntry *se;
@@ -2209,16 +748,18 @@ int qemu_loadvm_state(QEMUFile *f)
     }
 
     v = qemu_get_be32(f);
-    if (v != QEMU_VM_FILE_MAGIC)
+    if (v != QEMU_VM_FILE_MAGIC) {
         return -EINVAL;
+    }
 
     v = qemu_get_be32(f);
     if (v == QEMU_VM_FILE_VERSION_COMPAT) {
         fprintf(stderr, "SaveVM v2 format is obsolete and don't work anymore\n");
         return -ENOTSUP;
     }
-    if (v != QEMU_VM_FILE_VERSION)
+    if (v != QEMU_VM_FILE_VERSION) {
         return -ENOTSUP;
+    }
 
     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
         uint32_t instance_id, version_id, section_id;
@@ -2337,8 +878,7 @@ static int del_existing_snapshots(Monitor *mon, const char *name)
     bs = NULL;
     while ((bs = bdrv_next(bs))) {
         if (bdrv_can_snapshot(bs) &&
-            bdrv_snapshot_find(bs, snapshot, name) >= 0)
-        {
+            bdrv_snapshot_find(bs, snapshot, name) >= 0) {
             bdrv_snapshot_delete_by_id_or_name(bs, name, &err);
             if (error_is_set(&err)) {
                 monitor_printf(mon,
@@ -2448,8 +988,9 @@ void do_savevm(Monitor *mon, const QDict *qdict)
     }
 
  the_end:
-    if (saved_vm_running)
+    if (saved_vm_running) {
         vm_start();
+    }
 }
 
 void qmp_xen_save_devices_state(const char *filename, Error **errp)
@@ -2473,8 +1014,9 @@ void qmp_xen_save_devices_state(const char *filename, Error **errp)
     }
 
  the_end:
-    if (saved_vm_running)
+    if (saved_vm_running) {
         vm_start();
+    }
 }
 
 int load_vmstate(const char *name)
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index e4801a8844..52efd5d66f 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -977,6 +977,7 @@ static const ARMCPUInfo arm_cpus[] = {
     { .name = "any",         .initfn = arm_any_initfn },
 #endif
 #endif
+    { .name = NULL }
 };
 
 static Property arm_cpu_properties[] = {
@@ -1040,11 +1041,13 @@ static const TypeInfo arm_cpu_type_info = {
 
 static void arm_cpu_register_types(void)
 {
-    int i;
+    const ARMCPUInfo *info = arm_cpus;
 
     type_register_static(&arm_cpu_type_info);
-    for (i = 0; i < ARRAY_SIZE(arm_cpus); i++) {
-        cpu_register(&arm_cpus[i]);
+
+    while (info->name) {
+        cpu_register(info);
+        info++;
     }
 }
 
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 56ed591164..198b6b8d4e 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -66,6 +66,18 @@
 /* ARM-specific interrupt pending bits.  */
 #define CPU_INTERRUPT_FIQ   CPU_INTERRUPT_TGT_EXT_1
 
+/* The usual mapping for an AArch64 system register to its AArch32
+ * counterpart is for the 32 bit world to have access to the lower
+ * half only (with writes leaving the upper half untouched). It's
+ * therefore useful to be able to pass TCG the offset of the least
+ * significant half of a uint64_t struct member.
+ */
+#ifdef HOST_WORDS_BIGENDIAN
+#define offsetoflow32(S, M) (offsetof(S, M) + sizeof(uint32_t))
+#else
+#define offsetoflow32(S, M) offsetof(S, M)
+#endif
+
 /* Meanings of the ARMCPU object's two inbound GPIO lines */
 #define ARM_CPU_IRQ 0
 #define ARM_CPU_FIQ 1
@@ -188,9 +200,9 @@ typedef struct CPUARMState {
         uint32_t c12_vbar; /* vector base address register */
         uint32_t c13_fcse; /* FCSE PID.  */
         uint32_t c13_context; /* Context ID.  */
-        uint32_t c13_tls1; /* User RW Thread register.  */
-        uint32_t c13_tls2; /* User RO Thread register.  */
-        uint32_t c13_tls3; /* Privileged Thread register.  */
+        uint64_t tpidr_el0; /* User RW Thread register.  */
+        uint64_t tpidrro_el0; /* User RO Thread register.  */
+        uint64_t tpidr_el1; /* Privileged Thread register.  */
         uint32_t c14_cntfrq; /* Counter Frequency register */
         uint32_t c14_cntkctl; /* Timer Control register */
         ARMGenericTimer c14_timer[NUM_GTIMERS];
@@ -266,11 +278,11 @@ typedef struct CPUARMState {
         float_status fp_status;
         float_status standard_fp_status;
     } vfp;
-    uint32_t exclusive_addr;
-    uint32_t exclusive_val;
-    uint32_t exclusive_high;
+    uint64_t exclusive_addr;
+    uint64_t exclusive_val;
+    uint64_t exclusive_high;
 #if defined(CONFIG_USER_ONLY)
-    uint32_t exclusive_test;
+    uint64_t exclusive_test;
     uint32_t exclusive_info;
 #endif
 
@@ -475,6 +487,15 @@ static inline void vfp_set_fpcr(CPUARMState *env, uint32_t val)
     vfp_set_fpscr(env, new_fpscr);
 }
 
+enum arm_fprounding {
+    FPROUNDING_TIEEVEN,
+    FPROUNDING_POSINF,
+    FPROUNDING_NEGINF,
+    FPROUNDING_ZERO,
+    FPROUNDING_TIEAWAY,
+    FPROUNDING_ODD
+};
+
 enum arm_cpu_mode {
   ARM_CPU_MODE_USR = 0x10,
   ARM_CPU_MODE_FIQ = 0x11,
@@ -572,18 +593,43 @@ void armv7m_nvic_complete_irq(void *opaque, int irq);
  *    or via MRRC/MCRR?)
  * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field.
  * (In this case crn and opc2 should be zero.)
+ * For AArch64, there is no 32/64 bit size distinction;
+ * instead all registers have a 2 bit op0, 3 bit op1 and op2,
+ * and 4 bit CRn and CRm. The encoding patterns are chosen
+ * to be easy to convert to and from the KVM encodings, and also
+ * so that the hashtable can contain both AArch32 and AArch64
+ * registers (to allow for interprocessing where we might run
+ * 32 bit code on a 64 bit core).
+ */
+/* This bit is private to our hashtable cpreg; in KVM register
+ * IDs the AArch64/32 distinction is the KVM_REG_ARM/ARM64
+ * in the upper bits of the 64 bit ID.
  */
+#define CP_REG_AA64_SHIFT 28
+#define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT)
+
 #define ENCODE_CP_REG(cp, is64, crn, crm, opc1, opc2)   \
     (((cp) << 16) | ((is64) << 15) | ((crn) << 11) |    \
      ((crm) << 7) | ((opc1) << 3) | (opc2))
 
+#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \
+    (CP_REG_AA64_MASK |                                 \
+     ((cp) << CP_REG_ARM_COPROC_SHIFT) |                \
+     ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) |         \
+     ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) |         \
+     ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) |         \
+     ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) |         \
+     ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT))
+
 /* Convert a full 64 bit KVM register ID to the truncated 32 bit
  * version used as a key for the coprocessor register hashtable
  */
 static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid)
 {
     uint32_t cpregid = kvmid;
-    if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
+    if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) {
+        cpregid |= CP_REG_AA64_MASK;
+    } else if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
         cpregid |= (1 << 15);
     }
     return cpregid;
@@ -594,11 +640,18 @@ static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid)
  */
 static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
 {
-    uint64_t kvmid = cpregid & ~(1 << 15);
-    if (cpregid & (1 << 15)) {
-        kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM;
+    uint64_t kvmid;
+
+    if (cpregid & CP_REG_AA64_MASK) {
+        kvmid = cpregid & ~CP_REG_AA64_MASK;
+        kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64;
     } else {
-        kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM;
+        kvmid = cpregid & ~(1 << 15);
+        if (cpregid & (1 << 15)) {
+            kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM;
+        } else {
+            kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM;
+        }
     }
     return kvmid;
 }
@@ -628,12 +681,28 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
 #define ARM_CP_IO 64
 #define ARM_CP_NOP (ARM_CP_SPECIAL | (1 << 8))
 #define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8))
-#define ARM_LAST_SPECIAL ARM_CP_WFI
+#define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8))
+#define ARM_LAST_SPECIAL ARM_CP_NZCV
 /* Used only as a terminator for ARMCPRegInfo lists */
 #define ARM_CP_SENTINEL 0xffff
 /* Mask of only the flag bits in a type field */
 #define ARM_CP_FLAG_MASK 0x7f
 
+/* Valid values for ARMCPRegInfo state field, indicating which of
+ * the AArch32 and AArch64 execution states this register is visible in.
+ * If the reginfo doesn't explicitly specify then it is AArch32 only.
+ * If the reginfo is declared to be visible in both states then a second
+ * reginfo is synthesised for the AArch32 view of the AArch64 register,
+ * such that the AArch32 view is the lower 32 bits of the AArch64 one.
+ * Note that we rely on the values of these enums as we iterate through
+ * the various states in some places.
+ */
+enum {
+    ARM_CP_STATE_AA32 = 0,
+    ARM_CP_STATE_AA64 = 1,
+    ARM_CP_STATE_BOTH = 2,
+};
+
 /* Return true if cptype is a valid type field. This is used to try to
  * catch errors where the sentinel has been accidentally left off the end
  * of a list of registers.
@@ -655,6 +724,8 @@ static inline bool cptype_valid(int cptype)
  * (ie anything visible in PL2 is visible in S-PL1, some things are only
  * visible in S-PL1) but "Secure PL1" is a bit of a mouthful, we bend the
  * terminology a little and call this PL3.
+ * In AArch64 things are somewhat simpler as the PLx bits line up exactly
+ * with the ELx exception levels.
  *
  * If access permissions for a register are more complex than can be
  * described with these bits, then use a laxer set of restrictions, and
@@ -676,6 +747,10 @@ static inline bool cptype_valid(int cptype)
 
 static inline int arm_current_pl(CPUARMState *env)
 {
+    if (env->aarch64) {
+        return extract32(env->pstate, 2, 2);
+    }
+
     if ((env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_USR) {
         return 0;
     }
@@ -713,12 +788,22 @@ struct ARMCPRegInfo {
      * then behave differently on read/write if necessary.
      * For 64 bit registers, only crm and opc1 are relevant; crn and opc2
      * must both be zero.
+     * For AArch64-visible registers, opc0 is also used.
+     * Since there are no "coprocessors" in AArch64, cp is purely used as a
+     * way to distinguish (for KVM's benefit) guest-visible system registers
+     * from demuxed ones provided to preserve the "no side effects on
+     * KVM register read/write from QEMU" semantics. cp==0x13 is guest
+     * visible (to match KVM's encoding); cp==0 will be converted to
+     * cp==0x13 when the ARMCPRegInfo is registered, for convenience.
      */
     uint8_t cp;
     uint8_t crn;
     uint8_t crm;
+    uint8_t opc0;
     uint8_t opc1;
     uint8_t opc2;
+    /* Execution state in which this register is visible: ARM_CP_STATE_* */
+    int state;
     /* Register type: ARM_CP_* bits/values */
     int type;
     /* Access rights: PL*_[RW] */
@@ -790,7 +875,7 @@ static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs)
 {
     define_one_arm_cp_reg_with_opaque(cpu, regs, 0);
 }
-const ARMCPRegInfo *get_arm_cp_reginfo(ARMCPU *cpu, uint32_t encoded_cp);
+const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp);
 
 /* CPWriteFn that can be used to implement writes-ignored behaviour */
 int arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -798,10 +883,15 @@ int arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
 /* CPReadFn that can be used for read-as-zero behaviour */
 int arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value);
 
-static inline bool cp_access_ok(CPUARMState *env,
+/* CPResetFn that does nothing, for use if no reset is required even
+ * if fieldoffset is non zero.
+ */
+void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque);
+
+static inline bool cp_access_ok(int current_pl,
                                 const ARMCPRegInfo *ri, int isread)
 {
-    return (ri->access >> ((arm_current_pl(env) * 2) + isread)) & 1;
+    return (ri->access >> ((current_pl * 2) + isread)) & 1;
 }
 
 /**
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 04ce87951c..a639c2e476 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -58,6 +58,7 @@ static const ARMCPUInfo aarch64_cpus[] = {
 #ifdef CONFIG_USER_ONLY
     { .name = "any",         .initfn = aarch64_any_initfn },
 #endif
+    { .name = NULL }
 };
 
 static void aarch64_cpu_initfn(Object *obj)
@@ -118,11 +119,13 @@ static const TypeInfo aarch64_cpu_type_info = {
 
 static void aarch64_cpu_register_types(void)
 {
-    int i;
+    const ARMCPUInfo *info = aarch64_cpus;
 
     type_register_static(&aarch64_cpu_type_info);
-    for (i = 0; i < ARRAY_SIZE(aarch64_cpus); i++) {
-        aarch64_cpu_register(&aarch64_cpus[i]);
+
+    while (info->name) {
+        aarch64_cpu_register(info);
+        info++;
     }
 }
 
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index d3f706754f..4ce0d01a85 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -77,3 +77,48 @@ uint64_t HELPER(rbit64)(uint64_t x)
 
     return x;
 }
+
+/* Convert a softfloat float_relation_ (as returned by
+ * the float*_compare functions) to the correct ARM
+ * NZCV flag state.
+ */
+static inline uint32_t float_rel_to_flags(int res)
+{
+    uint64_t flags;
+    switch (res) {
+    case float_relation_equal:
+        flags = PSTATE_Z | PSTATE_C;
+        break;
+    case float_relation_less:
+        flags = PSTATE_N;
+        break;
+    case float_relation_greater:
+        flags = PSTATE_C;
+        break;
+    case float_relation_unordered:
+    default:
+        flags = PSTATE_C | PSTATE_V;
+        break;
+    }
+    return flags;
+}
+
+uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
+{
+    return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
+}
+
+uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
+{
+    return float_rel_to_flags(float32_compare(x, y, fp_status));
+}
+
+uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
+{
+    return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
+}
+
+uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
+{
+    return float_rel_to_flags(float64_compare(x, y, fp_status));
+}
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index a163a94322..bca19f3dea 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -22,3 +22,7 @@ DEF_HELPER_FLAGS_1(clz64, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(cls64, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(cls32, TCG_CALL_NO_RWG_SE, i32, i32)
 DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
+DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
+DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
+DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 6ebd7dc7bc..c708f15e27 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -142,11 +142,7 @@ static bool read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
     } else if (ri->readfn) {
         return (ri->readfn(env, ri, v) == 0);
     } else {
-        if (ri->type & ARM_CP_64BIT) {
-            *v = CPREG_FIELD64(env, ri);
-        } else {
-            *v = CPREG_FIELD32(env, ri);
-        }
+        raw_read(env, ri, v);
     }
     return true;
 }
@@ -167,11 +163,7 @@ static bool write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
     } else if (ri->writefn) {
         return (ri->writefn(env, ri, v) == 0);
     } else {
-        if (ri->type & ARM_CP_64BIT) {
-            CPREG_FIELD64(env, ri) = v;
-        } else {
-            CPREG_FIELD32(env, ri) = v;
-        }
+        raw_write(env, ri, v);
     }
     return true;
 }
@@ -186,7 +178,7 @@ bool write_cpustate_to_list(ARMCPU *cpu)
         uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
         const ARMCPRegInfo *ri;
         uint64_t v;
-        ri = get_arm_cp_reginfo(cpu, regidx);
+        ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
         if (!ri) {
             ok = false;
             continue;
@@ -214,7 +206,7 @@ bool write_list_to_cpustate(ARMCPU *cpu)
         uint64_t readback;
         const ARMCPRegInfo *ri;
 
-        ri = get_arm_cp_reginfo(cpu, regidx);
+        ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
         if (!ri) {
             ok = false;
             continue;
@@ -242,7 +234,7 @@ static void add_cpreg_to_list(gpointer key, gpointer opaque)
     const ARMCPRegInfo *ri;
 
     regidx = *(uint32_t *)key;
-    ri = get_arm_cp_reginfo(cpu, regidx);
+    ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
 
     if (!(ri->type & ARM_CP_NO_MIGRATE)) {
         cpu->cpreg_indexes[cpu->cpreg_array_len] = cpreg_to_kvm_id(regidx);
@@ -258,7 +250,7 @@ static void count_cpreg(gpointer key, gpointer opaque)
     const ARMCPRegInfo *ri;
 
     regidx = *(uint32_t *)key;
-    ri = get_arm_cp_reginfo(cpu, regidx);
+    ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
 
     if (!(ri->type & ARM_CP_NO_MIGRATE)) {
         cpu->cpreg_array_len++;
@@ -397,7 +389,7 @@ static const ARMCPRegInfo cp_reginfo[] = {
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_fcse),
       .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, },
     { .name = "CONTEXTIDR", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 1,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_fcse),
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_context),
       .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
     /* ??? This covers not just the impdef TLB lockdown registers but also
      * some v7VMSA registers relating to TEX remap, so it is overly broad.
@@ -740,18 +732,26 @@ static const ARMCPRegInfo t2ee_cp_reginfo[] = {
 };
 
 static const ARMCPRegInfo v6k_cp_reginfo[] = {
+    { .name = "TPIDR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .opc2 = 2, .crn = 13, .crm = 0,
+      .access = PL0_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el0), .resetvalue = 0 },
     { .name = "TPIDRURW", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL0_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c13_tls1),
-      .resetvalue = 0 },
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.tpidr_el0),
+      .resetfn = arm_cp_reset_ignore },
+    { .name = "TPIDRRO_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .opc2 = 3, .crn = 13, .crm = 0,
+      .access = PL0_R|PL1_W,
+      .fieldoffset = offsetof(CPUARMState, cp15.tpidrro_el0), .resetvalue = 0 },
     { .name = "TPIDRURO", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 3,
       .access = PL0_R|PL1_W,
-      .fieldoffset = offsetof(CPUARMState, cp15.c13_tls2),
-      .resetvalue = 0 },
-    { .name = "TPIDRPRW", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 4,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.tpidrro_el0),
+      .resetfn = arm_cp_reset_ignore },
+    { .name = "TPIDR_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .opc2 = 4, .crn = 13, .crm = 0,
       .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c13_tls3),
-      .resetvalue = 0 },
+      .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el1), .resetvalue = 0 },
     REGINFO_SENTINEL
 };
 
@@ -1560,6 +1560,64 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
+static int aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t *value)
+{
+    *value = vfp_get_fpcr(env);
+    return 0;
+}
+
+static int aa64_fpcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                           uint64_t value)
+{
+    vfp_set_fpcr(env, value);
+    return 0;
+}
+
+static int aa64_fpsr_read(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t *value)
+{
+    *value = vfp_get_fpsr(env);
+    return 0;
+}
+
+static int aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                           uint64_t value)
+{
+    vfp_set_fpsr(env, value);
+    return 0;
+}
+
+static const ARMCPRegInfo v8_cp_reginfo[] = {
+    /* Minimal set of EL0-visible registers. This will need to be expanded
+     * significantly for system emulation of AArch64 CPUs.
+     */
+    { .name = "NZCV", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 2,
+      .access = PL0_RW, .type = ARM_CP_NZCV },
+    { .name = "FPCR", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4,
+      .access = PL0_RW, .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
+    { .name = "FPSR", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
+      .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
+    /* This claims a 32 byte cacheline size for icache and dcache, VIPT icache.
+     * It will eventually need to have a CPU-specified reset value.
+     */
+    { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
+      .access = PL0_R, .type = ARM_CP_CONST,
+      .resetvalue = 0x80030003 },
+    /* Prohibit use of DC ZVA. OPTME: implement DC ZVA and allow its use.
+     * For system mode the DZP bit here will need to be computed, not constant.
+     */
+    { .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
+      .access = PL0_R, .type = ARM_CP_CONST,
+      .resetvalue = 0x10 },
+    REGINFO_SENTINEL
+};
+
 static int sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     env->cp15.c1_sys = value;
@@ -1662,6 +1720,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
     } else {
         define_arm_cp_regs(cpu, not_v7_cp_reginfo);
     }
+    if (arm_feature(env, ARM_FEATURE_V8)) {
+        define_arm_cp_regs(cpu, v8_cp_reginfo);
+    }
     if (arm_feature(env, ARM_FEATURE_MPU)) {
         /* These are the MPU registers prior to PMSAv6. Any new
          * PMSA core later than the ARM946 will require that we
@@ -1937,6 +1998,85 @@ CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
     return cpu_list;
 }
 
+static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
+                                   void *opaque, int state,
+                                   int crm, int opc1, int opc2)
+{
+    /* Private utility function for define_one_arm_cp_reg_with_opaque():
+     * add a single reginfo struct to the hash table.
+     */
+    uint32_t *key = g_new(uint32_t, 1);
+    ARMCPRegInfo *r2 = g_memdup(r, sizeof(ARMCPRegInfo));
+    int is64 = (r->type & ARM_CP_64BIT) ? 1 : 0;
+    if (r->state == ARM_CP_STATE_BOTH && state == ARM_CP_STATE_AA32) {
+        /* The AArch32 view of a shared register sees the lower 32 bits
+         * of a 64 bit backing field. It is not migratable as the AArch64
+         * view handles that. AArch64 also handles reset.
+         * We assume it is a cp15 register.
+         */
+        r2->cp = 15;
+        r2->type |= ARM_CP_NO_MIGRATE;
+        r2->resetfn = arm_cp_reset_ignore;
+#ifdef HOST_WORDS_BIGENDIAN
+        if (r2->fieldoffset) {
+            r2->fieldoffset += sizeof(uint32_t);
+        }
+#endif
+    }
+    if (state == ARM_CP_STATE_AA64) {
+        /* To allow abbreviation of ARMCPRegInfo
+         * definitions, we treat cp == 0 as equivalent to
+         * the value for "standard guest-visible sysreg".
+         */
+        if (r->cp == 0) {
+            r2->cp = CP_REG_ARM64_SYSREG_CP;
+        }
+        *key = ENCODE_AA64_CP_REG(r2->cp, r2->crn, crm,
+                                  r2->opc0, opc1, opc2);
+    } else {
+        *key = ENCODE_CP_REG(r2->cp, is64, r2->crn, crm, opc1, opc2);
+    }
+    if (opaque) {
+        r2->opaque = opaque;
+    }
+    /* Make sure reginfo passed to helpers for wildcarded regs
+     * has the correct crm/opc1/opc2 for this reg, not CP_ANY:
+     */
+    r2->crm = crm;
+    r2->opc1 = opc1;
+    r2->opc2 = opc2;
+    /* By convention, for wildcarded registers only the first
+     * entry is used for migration; the others are marked as
+     * NO_MIGRATE so we don't try to transfer the register
+     * multiple times. Special registers (ie NOP/WFI) are
+     * never migratable.
+     */
+    if ((r->type & ARM_CP_SPECIAL) ||
+        ((r->crm == CP_ANY) && crm != 0) ||
+        ((r->opc1 == CP_ANY) && opc1 != 0) ||
+        ((r->opc2 == CP_ANY) && opc2 != 0)) {
+        r2->type |= ARM_CP_NO_MIGRATE;
+    }
+
+    /* Overriding of an existing definition must be explicitly
+     * requested.
+     */
+    if (!(r->type & ARM_CP_OVERRIDE)) {
+        ARMCPRegInfo *oldreg;
+        oldreg = g_hash_table_lookup(cpu->cp_regs, key);
+        if (oldreg && !(oldreg->type & ARM_CP_OVERRIDE)) {
+            fprintf(stderr, "Register redefined: cp=%d %d bit "
+                    "crn=%d crm=%d opc1=%d opc2=%d, "
+                    "was %s, now %s\n", r2->cp, 32 + 32 * is64,
+                    r2->crn, r2->crm, r2->opc1, r2->opc2,
+                    oldreg->name, r2->name);
+            g_assert_not_reached();
+        }
+    }
+    g_hash_table_insert(cpu->cp_regs, key, r2);
+}
+
+
 void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
                                        const ARMCPRegInfo *r, void *opaque)
 {
@@ -1951,8 +2091,19 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
      * At least one of the original and the second definition should
      * include ARM_CP_OVERRIDE in its type bits -- this is just a guard
      * against accidental use.
+     *
+     * The state field defines whether the register is to be
+     * visible in the AArch32 or AArch64 execution state. If the
+     * state is set to ARM_CP_STATE_BOTH then we synthesise a
+     * reginfo structure for the AArch32 view, which sees the lower
+     * 32 bits of the 64 bit register.
+     *
+     * Only registers visible in AArch64 may set r->opc0; opc0 cannot
+     * be wildcarded. AArch64 registers are always considered to be 64
+     * bits; the ARM_CP_64BIT* flag applies only to the AArch32 view of
+     * the register, if any.
      */
-    int crm, opc1, opc2;
+    int crm, opc1, opc2, state;
     int crmmin = (r->crm == CP_ANY) ? 0 : r->crm;
     int crmmax = (r->crm == CP_ANY) ? 15 : r->crm;
     int opc1min = (r->opc1 == CP_ANY) ? 0 : r->opc1;
@@ -1961,6 +2112,52 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
     int opc2max = (r->opc2 == CP_ANY) ? 7 : r->opc2;
     /* 64 bit registers have only CRm and Opc1 fields */
     assert(!((r->type & ARM_CP_64BIT) && (r->opc2 || r->crn)));
+    /* op0 only exists in the AArch64 encodings */
+    assert((r->state != ARM_CP_STATE_AA32) || (r->opc0 == 0));
+    /* AArch64 regs are all 64 bit so ARM_CP_64BIT is meaningless */
+    assert((r->state != ARM_CP_STATE_AA64) || !(r->type & ARM_CP_64BIT));
+    /* The AArch64 pseudocode CheckSystemAccess() specifies that op1
+     * encodes a minimum access level for the register. We roll this
+     * runtime check into our general permission check code, so check
+     * here that the reginfo's specified permissions are strict enough
+     * to encompass the generic architectural permission check.
+     */
+    if (r->state != ARM_CP_STATE_AA32) {
+        int mask = 0;
+        switch (r->opc1) {
+        case 0: case 1: case 2:
+            /* min_EL EL1 */
+            mask = PL1_RW;
+            break;
+        case 3:
+            /* min_EL EL0 */
+            mask = PL0_RW;
+            break;
+        case 4:
+            /* min_EL EL2 */
+            mask = PL2_RW;
+            break;
+        case 5:
+            /* unallocated encoding, so not possible */
+            assert(false);
+            break;
+        case 6:
+            /* min_EL EL3 */
+            mask = PL3_RW;
+            break;
+        case 7:
+            /* min_EL EL1, secure mode only (we don't check the latter) */
+            mask = PL1_RW;
+            break;
+        default:
+            /* broken reginfo with out-of-range opc1 */
+            assert(false);
+            break;
+        }
+        /* assert our permissions are not too lax (stricter is fine) */
+        assert((r->access & ~mask) == 0);
+    }
+
     /* Check that the register definition has enough info to handle
      * reads and writes if they are permitted.
      */
@@ -1977,48 +2174,14 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
     for (crm = crmmin; crm <= crmmax; crm++) {
         for (opc1 = opc1min; opc1 <= opc1max; opc1++) {
             for (opc2 = opc2min; opc2 <= opc2max; opc2++) {
-                uint32_t *key = g_new(uint32_t, 1);
-                ARMCPRegInfo *r2 = g_memdup(r, sizeof(ARMCPRegInfo));
-                int is64 = (r->type & ARM_CP_64BIT) ? 1 : 0;
-                *key = ENCODE_CP_REG(r->cp, is64, r->crn, crm, opc1, opc2);
-                if (opaque) {
-                    r2->opaque = opaque;
-                }
-                /* Make sure reginfo passed to helpers for wildcarded regs
-                 * has the correct crm/opc1/opc2 for this reg, not CP_ANY:
-                 */
-                r2->crm = crm;
-                r2->opc1 = opc1;
-                r2->opc2 = opc2;
-                /* By convention, for wildcarded registers only the first
-                 * entry is used for migration; the others are marked as
-                 * NO_MIGRATE so we don't try to transfer the register
-                 * multiple times. Special registers (ie NOP/WFI) are
-                 * never migratable.
-                 */
-                if ((r->type & ARM_CP_SPECIAL) ||
-                    ((r->crm == CP_ANY) && crm != 0) ||
-                    ((r->opc1 == CP_ANY) && opc1 != 0) ||
-                    ((r->opc2 == CP_ANY) && opc2 != 0)) {
-                    r2->type |= ARM_CP_NO_MIGRATE;
-                }
-
-                /* Overriding of an existing definition must be explicitly
-                 * requested.
-                 */
-                if (!(r->type & ARM_CP_OVERRIDE)) {
-                    ARMCPRegInfo *oldreg;
-                    oldreg = g_hash_table_lookup(cpu->cp_regs, key);
-                    if (oldreg && !(oldreg->type & ARM_CP_OVERRIDE)) {
-                        fprintf(stderr, "Register redefined: cp=%d %d bit "
-                                "crn=%d crm=%d opc1=%d opc2=%d, "
-                                "was %s, now %s\n", r2->cp, 32 + 32 * is64,
-                                r2->crn, r2->crm, r2->opc1, r2->opc2,
-                                oldreg->name, r2->name);
-                        g_assert_not_reached();
+                for (state = ARM_CP_STATE_AA32;
+                     state <= ARM_CP_STATE_AA64; state++) {
+                    if (r->state != state && r->state != ARM_CP_STATE_BOTH) {
+                        continue;
                     }
+                    add_cpreg_to_hashtable(cpu, r, opaque, state,
+                                           crm, opc1, opc2);
                 }
-                g_hash_table_insert(cpu->cp_regs, key, r2);
             }
         }
     }
@@ -2034,9 +2197,9 @@ void define_arm_cp_regs_with_opaque(ARMCPU *cpu,
     }
 }
 
-const ARMCPRegInfo *get_arm_cp_reginfo(ARMCPU *cpu, uint32_t encoded_cp)
+const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp)
 {
-    return g_hash_table_lookup(cpu->cp_regs, &encoded_cp);
+    return g_hash_table_lookup(cpregs, &encoded_cp);
 }
 
 int arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2053,6 +2216,11 @@ int arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t *value)
     return 0;
 }
 
+void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque)
+{
+    /* Helper coprocessor reset function for do-nothing-on-reset registers */
+}
+
 static int bad_mode_switch(CPUARMState *env, int mode)
 {
     /* Return true if it is not valid for us to switch to
@@ -3639,16 +3807,16 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
     if (changed & (3 << 22)) {
         i = (val >> 22) & 3;
         switch (i) {
-        case 0:
+        case FPROUNDING_TIEEVEN:
             i = float_round_nearest_even;
             break;
-        case 1:
+        case FPROUNDING_POSINF:
             i = float_round_up;
             break;
-        case 2:
+        case FPROUNDING_NEGINF:
             i = float_round_down;
             break;
-        case 3:
+        case FPROUNDING_ZERO:
             i = float_round_to_zero;
             break;
         }
@@ -3688,6 +3856,10 @@ VFP_BINOP(add)
 VFP_BINOP(sub)
 VFP_BINOP(mul)
 VFP_BINOP(div)
+VFP_BINOP(min)
+VFP_BINOP(max)
+VFP_BINOP(minnum)
+VFP_BINOP(maxnum)
 #undef VFP_BINOP
 
 float32 VFP_HELPER(neg, s)(float32 a)
@@ -3804,37 +3976,77 @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
 }
 
 /* VFP3 fixed point conversion.  */
-#define VFP_CONV_FIX(name, p, fsz, itype, sign) \
-float##fsz HELPER(vfp_##name##to##p)(uint##fsz##_t  x, uint32_t shift, \
-                                    void *fpstp) \
+#define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
+float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t  x, uint32_t shift, \
+                                     void *fpstp) \
 { \
     float_status *fpst = fpstp; \
     float##fsz tmp; \
-    tmp = sign##int32_to_##float##fsz((itype##_t)x, fpst); \
+    tmp = itype##_to_##float##fsz(x, fpst); \
     return float##fsz##_scalbn(tmp, -(int)shift, fpst); \
-} \
-uint##fsz##_t HELPER(vfp_to##name##p)(float##fsz x, uint32_t shift, \
-                                       void *fpstp) \
+}
+
+/* Notice that we want only input-denormal exception flags from the
+ * scalbn operation: the other possible flags (overflow+inexact if
+ * we overflow to infinity, output-denormal) aren't correct for the
+ * complete scale-and-convert operation.
+ */
+#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, round) \
+uint##isz##_t HELPER(vfp_to##name##p##round)(float##fsz x, \
+                                             uint32_t shift, \
+                                             void *fpstp) \
 { \
     float_status *fpst = fpstp; \
+    int old_exc_flags = get_float_exception_flags(fpst); \
     float##fsz tmp; \
     if (float##fsz##_is_any_nan(x)) { \
         float_raise(float_flag_invalid, fpst); \
         return 0; \
     } \
     tmp = float##fsz##_scalbn(x, shift, fpst); \
-    return float##fsz##_to_##itype##_round_to_zero(tmp, fpst); \
-}
-
-VFP_CONV_FIX(sh, d, 64, int16, )
-VFP_CONV_FIX(sl, d, 64, int32, )
-VFP_CONV_FIX(uh, d, 64, uint16, u)
-VFP_CONV_FIX(ul, d, 64, uint32, u)
-VFP_CONV_FIX(sh, s, 32, int16, )
-VFP_CONV_FIX(sl, s, 32, int32, )
-VFP_CONV_FIX(uh, s, 32, uint16, u)
-VFP_CONV_FIX(ul, s, 32, uint32, u)
+    old_exc_flags |= get_float_exception_flags(fpst) \
+        & float_flag_input_denormal; \
+    set_float_exception_flags(old_exc_flags, fpst); \
+    return float##fsz##_to_##itype##round(tmp, fpst); \
+}
+
+#define VFP_CONV_FIX(name, p, fsz, isz, itype)                   \
+VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, _round_to_zero) \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, )
+
+#define VFP_CONV_FIX_A64(name, p, fsz, isz, itype)               \
+VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, )
+
+VFP_CONV_FIX(sh, d, 64, 64, int16)
+VFP_CONV_FIX(sl, d, 64, 64, int32)
+VFP_CONV_FIX_A64(sq, d, 64, 64, int64)
+VFP_CONV_FIX(uh, d, 64, 64, uint16)
+VFP_CONV_FIX(ul, d, 64, 64, uint32)
+VFP_CONV_FIX_A64(uq, d, 64, 64, uint64)
+VFP_CONV_FIX(sh, s, 32, 32, int16)
+VFP_CONV_FIX(sl, s, 32, 32, int32)
+VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
+VFP_CONV_FIX(uh, s, 32, 32, uint16)
+VFP_CONV_FIX(ul, s, 32, 32, uint32)
+VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
 #undef VFP_CONV_FIX
+#undef VFP_CONV_FIX_FLOAT
+#undef VFP_CONV_FLOAT_FIX_ROUND
+
+/* Set the current fp rounding mode and return the old one.
+ * The argument is a softfloat float_round_ value.
+ */
+uint32_t HELPER(set_rmode)(uint32_t rmode, CPUARMState *env)
+{
+    float_status *fp_status = &env->vfp.fp_status;
+
+    uint32_t prev_rmode = get_float_rounding_mode(fp_status);
+    set_float_rounding_mode(rmode, fp_status);
+
+    return prev_rmode;
+}
 
 /* Half precision conversions.  */
 static float32 do_fcvt_f16_to_f32(uint32_t a, CPUARMState *env, float_status *s)
@@ -3877,6 +4089,26 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUARMState *env)
     return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status);
 }
 
+float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, CPUARMState *env)
+{
+    int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
+    float64 r = float16_to_float64(make_float16(a), ieee, &env->vfp.fp_status);
+    if (ieee) {
+        return float64_maybe_silence_nan(r);
+    }
+    return r;
+}
+
+uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, CPUARMState *env)
+{
+    int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
+    float16 r = float64_to_float16(a, ieee, &env->vfp.fp_status);
+    if (ieee) {
+        r = float16_maybe_silence_nan(r);
+    }
+    return float16_val(r);
+}
+
 #define float32_two make_float32(0x40000000)
 #define float32_three make_float32(0x40400000)
 #define float32_one_point_five make_float32(0x3fc00000)
@@ -4142,27 +4374,47 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
     return float64_muladd(a, b, c, 0, fpst);
 }
 
-/* ARMv8 VMAXNM/VMINNM */
-float32 VFP_HELPER(maxnm, s)(float32 a, float32 b, void *fpstp)
+/* ARMv8 round to integral */
+float32 HELPER(rints_exact)(float32 x, void *fp_status)
 {
-    float_status *fpst = fpstp;
-    return float32_maxnum(a, b, fpst);
+    return float32_round_to_int(x, fp_status);
 }
 
-float64 VFP_HELPER(maxnm, d)(float64 a, float64 b, void *fpstp)
+float64 HELPER(rintd_exact)(float64 x, void *fp_status)
 {
-    float_status *fpst = fpstp;
-    return float64_maxnum(a, b, fpst);
+    return float64_round_to_int(x, fp_status);
 }
 
-float32 VFP_HELPER(minnm, s)(float32 a, float32 b, void *fpstp)
+float32 HELPER(rints)(float32 x, void *fp_status)
 {
-    float_status *fpst = fpstp;
-    return float32_minnum(a, b, fpst);
+    int old_flags = get_float_exception_flags(fp_status), new_flags;
+    float32 ret;
+
+    ret = float32_round_to_int(x, fp_status);
+
+    /* Suppress any inexact exceptions the conversion produced */
+    if (!(old_flags & float_flag_inexact)) {
+        new_flags = get_float_exception_flags(fp_status);
+        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+    }
+
+    return ret;
 }
 
-float64 VFP_HELPER(minnm, d)(float64 a, float64 b, void *fpstp)
+float64 HELPER(rintd)(float64 x, void *fp_status)
 {
-    float_status *fpst = fpstp;
-    return float64_minnum(a, b, fpst);
+    int old_flags = get_float_exception_flags(fp_status), new_flags;
+    float64 ret;
+
+    ret = float64_round_to_int(x, fp_status);
+
+    new_flags = get_float_exception_flags(fp_status);
+
+    /* Suppress any inexact exceptions the conversion produced */
+    if (!(old_flags & float_flag_inexact)) {
+        new_flags = get_float_exception_flags(fp_status);
+        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+    }
+
+    return ret;
 }
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 73d67dcc17..70872dffc6 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -79,6 +79,14 @@ DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr)
 DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr)
+DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr)
+DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr)
+DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
+DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
 DEF_HELPER_1(vfp_negs, f32, f32)
 DEF_HELPER_1(vfp_negd, f64, f64)
 DEF_HELPER_1(vfp_abss, f32, f32)
@@ -107,36 +115,51 @@ DEF_HELPER_2(vfp_tosid, i32, f64, ptr)
 DEF_HELPER_2(vfp_tosizs, i32, f32, ptr)
 DEF_HELPER_2(vfp_tosizd, i32, f64, ptr)
 
+DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr)
 DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr)
 DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr)
 DEF_HELPER_3(vfp_touhs, i32, f32, i32, ptr)
 DEF_HELPER_3(vfp_touls, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_touqs, i64, f32, i32, ptr)
 DEF_HELPER_3(vfp_toshd, i64, f64, i32, ptr)
 DEF_HELPER_3(vfp_tosld, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_tosqd, i64, f64, i32, ptr)
 DEF_HELPER_3(vfp_touhd, i64, f64, i32, ptr)
 DEF_HELPER_3(vfp_tould, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_touqd, i64, f64, i32, ptr)
 DEF_HELPER_3(vfp_shtos, f32, i32, i32, ptr)
 DEF_HELPER_3(vfp_sltos, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_sqtos, f32, i64, i32, ptr)
 DEF_HELPER_3(vfp_uhtos, f32, i32, i32, ptr)
 DEF_HELPER_3(vfp_ultos, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_uqtos, f32, i64, i32, ptr)
 DEF_HELPER_3(vfp_shtod, f64, i64, i32, ptr)
 DEF_HELPER_3(vfp_sltod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_sqtod, f64, i64, i32, ptr)
 DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr)
 DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr)
+
+DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, env)
 
 DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
 DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
 DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env)
 DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env)
+DEF_HELPER_FLAGS_2(vfp_fcvt_f16_to_f64, TCG_CALL_NO_RWG, f64, i32, env)
+DEF_HELPER_FLAGS_2(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, i32, f64, env)
 
 DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
 DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
 
-DEF_HELPER_3(vfp_maxnmd, f64, f64, f64, ptr)
-DEF_HELPER_3(vfp_maxnms, f32, f32, f32, ptr)
-DEF_HELPER_3(vfp_minnmd, f64, f64, f64, ptr)
-DEF_HELPER_3(vfp_minnms, f32, f32, f32, ptr)
-
 DEF_HELPER_3(recps_f32, f32, f32, f32, env)
 DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
 DEF_HELPER_2(recpe_f32, f32, f32, env)
@@ -150,6 +173,11 @@ DEF_HELPER_3(shr_cc, i32, env, i32, i32)
 DEF_HELPER_3(sar_cc, i32, env, i32, i32)
 DEF_HELPER_3(ror_cc, i32, env, i32, i32)
 
+DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)
+
 /* neon_helper.c */
 DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32)
 DEF_HELPER_3(neon_qadd_s8, i32, env, i32, i32)
@@ -346,8 +374,6 @@ DEF_HELPER_2(neon_qneg_s8, i32, env, i32)
 DEF_HELPER_2(neon_qneg_s16, i32, env, i32)
 DEF_HELPER_2(neon_qneg_s32, i32, env, i32)
 
-DEF_HELPER_3(neon_min_f32, i32, i32, i32, ptr)
-DEF_HELPER_3(neon_max_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
diff --git a/target-arm/kvm-consts.h b/target-arm/kvm-consts.h
index 2bba0bd198..0e7f889cba 100644
--- a/target-arm/kvm-consts.h
+++ b/target-arm/kvm-consts.h
@@ -29,12 +29,14 @@
 #define CP_REG_SIZE_U32        0x0020000000000000ULL
 #define CP_REG_SIZE_U64        0x0030000000000000ULL
 #define CP_REG_ARM             0x4000000000000000ULL
+#define CP_REG_ARCH_MASK       0xff00000000000000ULL
 
 MISMATCH_CHECK(CP_REG_SIZE_SHIFT, KVM_REG_SIZE_SHIFT)
 MISMATCH_CHECK(CP_REG_SIZE_MASK, KVM_REG_SIZE_MASK)
 MISMATCH_CHECK(CP_REG_SIZE_U32, KVM_REG_SIZE_U32)
 MISMATCH_CHECK(CP_REG_SIZE_U64, KVM_REG_SIZE_U64)
 MISMATCH_CHECK(CP_REG_ARM, KVM_REG_ARM)
+MISMATCH_CHECK(CP_REG_ARCH_MASK, KVM_REG_ARCH_MASK)
 
 #define PSCI_FN_BASE 0x95c1ba5e
 #define PSCI_FN(n) (PSCI_FN_BASE + (n))
@@ -59,6 +61,41 @@ MISMATCH_CHECK(PSCI_FN_MIGRATE, KVM_PSCI_FN_MIGRATE)
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A15, KVM_ARM_TARGET_CORTEX_A15)
 #endif
 
+#define CP_REG_ARM64                   0x6000000000000000ULL
+#define CP_REG_ARM_COPROC_MASK         0x000000000FFF0000
+#define CP_REG_ARM_COPROC_SHIFT        16
+#define CP_REG_ARM64_SYSREG            (0x0013 << CP_REG_ARM_COPROC_SHIFT)
+#define CP_REG_ARM64_SYSREG_OP0_MASK   0x000000000000c000
+#define CP_REG_ARM64_SYSREG_OP0_SHIFT  14
+#define CP_REG_ARM64_SYSREG_OP1_MASK   0x0000000000003800
+#define CP_REG_ARM64_SYSREG_OP1_SHIFT  11
+#define CP_REG_ARM64_SYSREG_CRN_MASK   0x0000000000000780
+#define CP_REG_ARM64_SYSREG_CRN_SHIFT  7
+#define CP_REG_ARM64_SYSREG_CRM_MASK   0x0000000000000078
+#define CP_REG_ARM64_SYSREG_CRM_SHIFT  3
+#define CP_REG_ARM64_SYSREG_OP2_MASK   0x0000000000000007
+#define CP_REG_ARM64_SYSREG_OP2_SHIFT  0
+
+/* No kernel define but it's useful to QEMU */
+#define CP_REG_ARM64_SYSREG_CP (CP_REG_ARM64_SYSREG >> CP_REG_ARM_COPROC_SHIFT)
+
+#ifdef TARGET_AARCH64
+MISMATCH_CHECK(CP_REG_ARM64, KVM_REG_ARM64)
+MISMATCH_CHECK(CP_REG_ARM_COPROC_MASK, KVM_REG_ARM_COPROC_MASK)
+MISMATCH_CHECK(CP_REG_ARM_COPROC_SHIFT, KVM_REG_ARM_COPROC_SHIFT)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP0_MASK)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP0_SHIFT, KVM_REG_ARM64_SYSREG_OP0_SHIFT)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP1_SHIFT, KVM_REG_ARM64_SYSREG_OP1_SHIFT)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG_CRN_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG_CRN_SHIFT, KVM_REG_ARM64_SYSREG_CRN_SHIFT)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRM_MASK)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG_CRM_SHIFT, KVM_REG_ARM64_SYSREG_CRM_SHIFT)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP2_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK)
+MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP2_SHIFT, KVM_REG_ARM64_SYSREG_OP2_SHIFT)
+#endif
+
 #undef MISMATCH_CHECK
 
 #endif
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 74f010f637..8f9e7d4d28 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -222,9 +222,9 @@ static int cpu_post_load(void *opaque, int version_id)
 
 const VMStateDescription vmstate_arm_cpu = {
     .name = "cpu",
-    .version_id = 13,
-    .minimum_version_id = 13,
-    .minimum_version_id_old = 13,
+    .version_id = 14,
+    .minimum_version_id = 14,
+    .minimum_version_id_old = 14,
     .pre_save = cpu_pre_save,
     .post_load = cpu_post_load,
     .fields = (VMStateField[]) {
@@ -253,9 +253,9 @@ const VMStateDescription vmstate_arm_cpu = {
         VMSTATE_VARRAY_INT32(cpreg_vmstate_values, ARMCPU,
                              cpreg_vmstate_array_len,
                              0, vmstate_info_uint64, uint64_t),
-        VMSTATE_UINT32(env.exclusive_addr, ARMCPU),
-        VMSTATE_UINT32(env.exclusive_val, ARMCPU),
-        VMSTATE_UINT32(env.exclusive_high, ARMCPU),
+        VMSTATE_UINT64(env.exclusive_addr, ARMCPU),
+        VMSTATE_UINT64(env.exclusive_val, ARMCPU),
+        VMSTATE_UINT64(env.exclusive_high, ARMCPU),
         VMSTATE_UINT64(env.features, ARMCPU),
         VMSTATE_TIMER(gt_timer[GTIMER_PHYS], ARMCPU),
         VMSTATE_TIMER(gt_timer[GTIMER_VIRT], ARMCPU),
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index b028cc2c93..be6fbd997e 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -1765,18 +1765,6 @@ uint32_t HELPER(neon_qneg_s32)(CPUARMState *env, uint32_t x)
 }
 
 /* NEON Float helpers.  */
-uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    return float32_val(float32_min(make_float32(a), make_float32(b), fpst));
-}
-
-uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    return float32_val(float32_max(make_float32(a), make_float32(b), fpst));
-}
-
 uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
 {
     float_status *fpst = fpstp;
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 0a76130bb2..cf80c46b90 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -38,6 +38,15 @@ static TCGv_i64 cpu_X[32];
 static TCGv_i64 cpu_pc;
 static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
 
+/* Load/store exclusive handling */
+static TCGv_i64 cpu_exclusive_addr;
+static TCGv_i64 cpu_exclusive_val;
+static TCGv_i64 cpu_exclusive_high;
+#ifdef CONFIG_USER_ONLY
+static TCGv_i64 cpu_exclusive_test;
+static TCGv_i32 cpu_exclusive_info;
+#endif
+
 static const char *regnames[] = {
     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
@@ -70,6 +79,19 @@ void a64_translate_init(void)
     cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
     cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
     cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
+
+    cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
+        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
+    cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
+        offsetof(CPUARMState, exclusive_val), "exclusive_val");
+    cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
+        offsetof(CPUARMState, exclusive_high), "exclusive_high");
+#ifdef CONFIG_USER_ONLY
+    cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
+        offsetof(CPUARMState, exclusive_test), "exclusive_test");
+    cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
+        offsetof(CPUARMState, exclusive_info), "exclusive_info");
+#endif
 }
 
 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
@@ -97,6 +119,31 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
                 psr & PSTATE_C ? 'C' : '-',
                 psr & PSTATE_V ? 'V' : '-');
     cpu_fprintf(f, "\n");
+
+    if (flags & CPU_DUMP_FPU) {
+        int numvfpregs = 32;
+        for (i = 0; i < numvfpregs; i += 2) {
+            uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
+            uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
+            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
+                        i, vhi, vlo);
+            vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
+            vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
+            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
+                        i + 1, vhi, vlo);
+        }
+        cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
+                    vfp_get_fpcr(env), vfp_get_fpsr(env));
+    }
+}
+
+static int get_mem_index(DisasContext *s)
+{
+#ifdef CONFIG_USER_ONLY
+    return 1;
+#else
+    return s->user;
+#endif
 }
 
 void gen_a64_set_pc_im(uint64_t val)
@@ -250,6 +297,91 @@ static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
     return v;
 }
 
+static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
+{
+    TCGv_i64 v = new_tmp_a64(s);
+    if (sf) {
+        tcg_gen_mov_i64(v, cpu_X[reg]);
+    } else {
+        tcg_gen_ext32u_i64(v, cpu_X[reg]);
+    }
+    return v;
+}
+
+/* Return the offset into CPUARMState of a slice (from
+ * the least significant end) of FP register Qn (ie
+ * Dn, Sn, Hn or Bn).
+ * (Note that this is not the same mapping as for A32; see cpu.h)
+ */
+static inline int fp_reg_offset(int regno, TCGMemOp size)
+{
+    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
+#ifdef HOST_WORDS_BIGENDIAN
+    offs += (8 - (1 << size));
+#endif
+    return offs;
+}
+
+/* Offset of the high half of the 128 bit vector Qn */
+static inline int fp_reg_hi_offset(int regno)
+{
+    return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
+}
+
+/* Convenience accessors for reading and writing single and double
+ * FP registers. Writing clears the upper parts of the associated
+ * 128 bit vector register, as required by the architecture.
+ * Note that unlike the GP register accessors, the values returned
+ * by the read functions must be manually freed.
+ */
+static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
+{
+    TCGv_i64 v = tcg_temp_new_i64();
+
+    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
+    return v;
+}
+
+static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
+{
+    TCGv_i32 v = tcg_temp_new_i32();
+
+    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(reg, MO_32));
+    return v;
+}
+
+static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
+{
+    TCGv_i64 tcg_zero = tcg_const_i64(0);
+
+    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
+    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(reg));
+    tcg_temp_free_i64(tcg_zero);
+}
+
+static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    tcg_gen_extu_i32_i64(tmp, v);
+    write_fp_dreg(s, reg, tmp);
+    tcg_temp_free_i64(tmp);
+}
+
+static TCGv_ptr get_fpstatus_ptr(void)
+{
+    TCGv_ptr statusptr = tcg_temp_new_ptr();
+    int offset;
+
+    /* In A64 all instructions (both FP and Neon) use the FPCR;
+     * there is no equivalent of the A32 Neon "standard FPSCR value"
+     * and all operations use vfp.fp_status.
+     */
+    offset = offsetof(CPUARMState, vfp.fp_status);
+    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
+    return statusptr;
+}
+
 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
  * than the 32 bit equivalent.
  */
@@ -277,6 +409,318 @@ static inline void gen_logic_CC(int sf, TCGv_i64 result)
     tcg_gen_movi_i32(cpu_VF, 0);
 }
 
+/* dest = T0 + T1; compute C, N, V and Z flags */
+static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+{
+    if (sf) {
+        TCGv_i64 result, flag, tmp;
+        result = tcg_temp_new_i64();
+        flag = tcg_temp_new_i64();
+        tmp = tcg_temp_new_i64();
+
+        tcg_gen_movi_i64(tmp, 0);
+        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
+
+        tcg_gen_trunc_i64_i32(cpu_CF, flag);
+
+        gen_set_NZ64(result);
+
+        tcg_gen_xor_i64(flag, result, t0);
+        tcg_gen_xor_i64(tmp, t0, t1);
+        tcg_gen_andc_i64(flag, flag, tmp);
+        tcg_temp_free_i64(tmp);
+        tcg_gen_shri_i64(flag, flag, 32);
+        tcg_gen_trunc_i64_i32(cpu_VF, flag);
+
+        tcg_gen_mov_i64(dest, result);
+        tcg_temp_free_i64(result);
+        tcg_temp_free_i64(flag);
+    } else {
+        /* 32 bit arithmetic */
+        TCGv_i32 t0_32 = tcg_temp_new_i32();
+        TCGv_i32 t1_32 = tcg_temp_new_i32();
+        TCGv_i32 tmp = tcg_temp_new_i32();
+
+        tcg_gen_movi_i32(tmp, 0);
+        tcg_gen_trunc_i64_i32(t0_32, t0);
+        tcg_gen_trunc_i64_i32(t1_32, t1);
+        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
+        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
+        tcg_gen_xor_i32(tmp, t0_32, t1_32);
+        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
+        tcg_gen_extu_i32_i64(dest, cpu_NF);
+
+        tcg_temp_free_i32(tmp);
+        tcg_temp_free_i32(t0_32);
+        tcg_temp_free_i32(t1_32);
+    }
+}
+
+/* dest = T0 - T1; compute C, N, V and Z flags */
+static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+{
+    if (sf) {
+        /* 64 bit arithmetic */
+        TCGv_i64 result, flag, tmp;
+
+        result = tcg_temp_new_i64();
+        flag = tcg_temp_new_i64();
+        tcg_gen_sub_i64(result, t0, t1);
+
+        gen_set_NZ64(result);
+
+        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
+        tcg_gen_trunc_i64_i32(cpu_CF, flag);
+
+        tcg_gen_xor_i64(flag, result, t0);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_xor_i64(tmp, t0, t1);
+        tcg_gen_and_i64(flag, flag, tmp);
+        tcg_temp_free_i64(tmp);
+        tcg_gen_shri_i64(flag, flag, 32);
+        tcg_gen_trunc_i64_i32(cpu_VF, flag);
+        tcg_gen_mov_i64(dest, result);
+        tcg_temp_free_i64(flag);
+        tcg_temp_free_i64(result);
+    } else {
+        /* 32 bit arithmetic */
+        TCGv_i32 t0_32 = tcg_temp_new_i32();
+        TCGv_i32 t1_32 = tcg_temp_new_i32();
+        TCGv_i32 tmp;
+
+        tcg_gen_trunc_i64_i32(t0_32, t0);
+        tcg_gen_trunc_i64_i32(t1_32, t1);
+        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
+        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
+        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, t0_32, t1_32);
+        tcg_temp_free_i32(t0_32);
+        tcg_temp_free_i32(t1_32);
+        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
+        tcg_temp_free_i32(tmp);
+        tcg_gen_extu_i32_i64(dest, cpu_NF);
+    }
+}
+
+/* dest = T0 + T1 + CF; do not compute flags. */
+static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+{
+    TCGv_i64 flag = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(flag, cpu_CF);
+    tcg_gen_add_i64(dest, t0, t1);
+    tcg_gen_add_i64(dest, dest, flag);
+    tcg_temp_free_i64(flag);
+
+    if (!sf) {
+        tcg_gen_ext32u_i64(dest, dest);
+    }
+}
+
+/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
+static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+{
+    if (sf) {
+        TCGv_i64 result, cf_64, vf_64, tmp;
+        result = tcg_temp_new_i64();
+        cf_64 = tcg_temp_new_i64();
+        vf_64 = tcg_temp_new_i64();
+        tmp = tcg_const_i64(0);
+
+        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
+        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
+        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
+        tcg_gen_trunc_i64_i32(cpu_CF, cf_64);
+        gen_set_NZ64(result);
+
+        tcg_gen_xor_i64(vf_64, result, t0);
+        tcg_gen_xor_i64(tmp, t0, t1);
+        tcg_gen_andc_i64(vf_64, vf_64, tmp);
+        tcg_gen_shri_i64(vf_64, vf_64, 32);
+        tcg_gen_trunc_i64_i32(cpu_VF, vf_64);
+
+        tcg_gen_mov_i64(dest, result);
+
+        tcg_temp_free_i64(tmp);
+        tcg_temp_free_i64(vf_64);
+        tcg_temp_free_i64(cf_64);
+        tcg_temp_free_i64(result);
+    } else {
+        TCGv_i32 t0_32, t1_32, tmp;
+        t0_32 = tcg_temp_new_i32();
+        t1_32 = tcg_temp_new_i32();
+        tmp = tcg_const_i32(0);
+
+        tcg_gen_trunc_i64_i32(t0_32, t0);
+        tcg_gen_trunc_i64_i32(t1_32, t1);
+        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
+        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
+
+        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
+        tcg_gen_xor_i32(tmp, t0_32, t1_32);
+        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
+        tcg_gen_extu_i32_i64(dest, cpu_NF);
+
+        tcg_temp_free_i32(tmp);
+        tcg_temp_free_i32(t1_32);
+        tcg_temp_free_i32(t0_32);
+    }
+}
+
+/*
+ * Load/Store generators
+ */
+
+/*
+ * Store from GPR register to memory
+ */
+static void do_gpr_st(DisasContext *s, TCGv_i64 source,
+                      TCGv_i64 tcg_addr, int size)
+{
+    g_assert(size <= 3);
+    tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size);
+}
+
+/*
+ * Load from memory to GPR register
+ */
+static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+                      int size, bool is_signed, bool extend)
+{
+    TCGMemOp memop = MO_TE + size;
+
+    g_assert(size <= 3);
+
+    if (is_signed) {
+        memop += MO_SIGN;
+    }
+
+    tcg_gen_qemu_ld_i64(dest, tcg_addr, get_mem_index(s), memop);
+
+    if (extend && is_signed) {
+        g_assert(size < 3);
+        tcg_gen_ext32u_i64(dest, dest);
+    }
+}
+
+/*
+ * Store from FP register to memory
+ */
+static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
+{
+    /* This writes the bottom N bits of a 128 bit wide vector to memory */
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(srcidx, MO_64));
+    if (size < 4) {
+        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
+    } else {
+        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
+        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
+        tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
+        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(srcidx));
+        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
+        tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
+        tcg_temp_free_i64(tcg_hiaddr);
+    }
+
+    tcg_temp_free_i64(tmp);
+}
+
+/*
+ * Load from memory to FP register
+ */
+static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
+{
+    /* This always zero-extends and writes to a full 128 bit wide vector */
+    TCGv_i64 tmplo = tcg_temp_new_i64();
+    TCGv_i64 tmphi;
+
+    if (size < 4) {
+        TCGMemOp memop = MO_TE + size;
+        tmphi = tcg_const_i64(0);
+        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
+    } else {
+        TCGv_i64 tcg_hiaddr;
+        tmphi = tcg_temp_new_i64();
+        tcg_hiaddr = tcg_temp_new_i64();
+
+        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
+        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
+        tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
+        tcg_temp_free_i64(tcg_hiaddr);
+    }
+
+    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(destidx, MO_64));
+    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(destidx));
+
+    tcg_temp_free_i64(tmplo);
+    tcg_temp_free_i64(tmphi);
+}
+
+/*
+ * This utility function is for doing register extension with an
+ * optional shift. You will likely want to pass a temporary for the
+ * destination register. See DecodeRegExtend() in the ARM ARM.
+ */
+static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
+                              int option, unsigned int shift)
+{
+    int extsize = extract32(option, 0, 2);
+    bool is_signed = extract32(option, 2, 1);
+
+    if (is_signed) {
+        switch (extsize) {
+        case 0:
+            tcg_gen_ext8s_i64(tcg_out, tcg_in);
+            break;
+        case 1:
+            tcg_gen_ext16s_i64(tcg_out, tcg_in);
+            break;
+        case 2:
+            tcg_gen_ext32s_i64(tcg_out, tcg_in);
+            break;
+        case 3:
+            tcg_gen_mov_i64(tcg_out, tcg_in);
+            break;
+        }
+    } else {
+        switch (extsize) {
+        case 0:
+            tcg_gen_ext8u_i64(tcg_out, tcg_in);
+            break;
+        case 1:
+            tcg_gen_ext16u_i64(tcg_out, tcg_in);
+            break;
+        case 2:
+            tcg_gen_ext32u_i64(tcg_out, tcg_in);
+            break;
+        case 3:
+            tcg_gen_mov_i64(tcg_out, tcg_in);
+            break;
+        }
+    }
+
+    if (shift) {
+        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
+    }
+}
+
+static inline void gen_check_sp_alignment(DisasContext *s)
+{
+    /* The AArch64 architecture mandates that (if enabled via PSTATE
+     * or SCTLR bits) there is a check that SP is 16-aligned on every
+     * SP-relative load or store (with an exception generated if it is not).
+     * In line with general QEMU practice regarding misaligned accesses,
+     * we omit these checks for the sake of guest program performance.
+     * This function is provided as a hook so we can more easily add these
+     * checks in future (possibly as a "favour catching guest program bugs
+     * over speed" user selectable option).
+     */
+}
+
 /*
  * the instruction disassembly implemented here matches
  * the instruction encoding classifications in chapter 3 (C3)
@@ -418,6 +862,11 @@ static void handle_hint(DisasContext *s, uint32_t insn,
     }
 }
 
+static void gen_clrex(DisasContext *s, uint32_t insn)
+{
+    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
+}
+
 /* CLREX, DSB, DMB, ISB */
 static void handle_sync(DisasContext *s, uint32_t insn,
                         unsigned int op1, unsigned int op2, unsigned int crm)
@@ -429,7 +878,7 @@ static void handle_sync(DisasContext *s, uint32_t insn,
 
     switch (op2) {
     case 2: /* CLREX */
-        unsupported_encoding(s, insn);
+        gen_clrex(s, insn);
         return;
     case 4: /* DSB */
     case 5: /* DMB */
@@ -449,28 +898,140 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,
     unsupported_encoding(s, insn);
 }
 
-/* C5.6.204 SYS */
-static void handle_sys(DisasContext *s, uint32_t insn, unsigned int l,
-                       unsigned int op1, unsigned int op2,
-                       unsigned int crn, unsigned int crm, unsigned int rt)
+static void gen_get_nzcv(TCGv_i64 tcg_rt)
 {
-    unsupported_encoding(s, insn);
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    TCGv_i32 nzcv = tcg_temp_new_i32();
+
+    /* build bit 31, N */
+    tcg_gen_andi_i32(nzcv, cpu_NF, (1 << 31));
+    /* build bit 30, Z */
+    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
+    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
+    /* build bit 29, C */
+    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
+    /* build bit 28, V */
+    tcg_gen_shri_i32(tmp, cpu_VF, 31);
+    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
+    /* generate result */
+    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
+
+    tcg_temp_free_i32(nzcv);
+    tcg_temp_free_i32(tmp);
 }
 
-/* C5.6.129 MRS - move from system register */
-static void handle_mrs(DisasContext *s, uint32_t insn, unsigned int op0,
-                       unsigned int op1, unsigned int op2,
-                       unsigned int crn, unsigned int crm, unsigned int rt)
+static void gen_set_nzcv(TCGv_i64 tcg_rt)
+
 {
-    unsupported_encoding(s, insn);
+    TCGv_i32 nzcv = tcg_temp_new_i32();
+
+    /* take NZCV from R[t] */
+    tcg_gen_trunc_i64_i32(nzcv, tcg_rt);
+
+    /* bit 31, N */
+    tcg_gen_andi_i32(cpu_NF, nzcv, (1 << 31));
+    /* bit 30, Z */
+    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
+    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
+    /* bit 29, C */
+    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
+    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
+    /* bit 28, V */
+    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
+    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
+    tcg_temp_free_i32(nzcv);
 }
 
-/* C5.6.131 MSR (register) - move to system register */
-static void handle_msr(DisasContext *s, uint32_t insn, unsigned int op0,
-                       unsigned int op1, unsigned int op2,
+/* C5.6.129 MRS - move from system register
+ * C5.6.131 MSR (register) - move to system register
+ * C5.6.204 SYS
+ * C5.6.205 SYSL
+ * These are all essentially the same insn in 'read' and 'write'
+ * versions, with varying op0 fields.
+ */
+static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
+                       unsigned int op0, unsigned int op1, unsigned int op2,
                        unsigned int crn, unsigned int crm, unsigned int rt)
 {
-    unsupported_encoding(s, insn);
+    const ARMCPRegInfo *ri;
+    TCGv_i64 tcg_rt;
+
+    ri = get_arm_cp_reginfo(s->cp_regs,
+                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
+                                               crn, crm, op0, op1, op2));
+
+    if (!ri) {
+        /* Unknown register */
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* Check access permissions */
+    if (!cp_access_ok(s->current_pl, ri, isread)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* Handle special cases first */
+    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
+    case ARM_CP_NOP:
+        return;
+    case ARM_CP_NZCV:
+        tcg_rt = cpu_reg(s, rt);
+        if (isread) {
+            gen_get_nzcv(tcg_rt);
+        } else {
+            gen_set_nzcv(tcg_rt);
+        }
+        return;
+    default:
+        break;
+    }
+
+    if (use_icount && (ri->type & ARM_CP_IO)) {
+        gen_io_start();
+    }
+
+    tcg_rt = cpu_reg(s, rt);
+
+    if (isread) {
+        if (ri->type & ARM_CP_CONST) {
+            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
+        } else if (ri->readfn) {
+            TCGv_ptr tmpptr;
+            gen_a64_set_pc_im(s->pc - 4);
+            tmpptr = tcg_const_ptr(ri);
+            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
+            tcg_temp_free_ptr(tmpptr);
+        } else {
+            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
+        }
+    } else {
+        if (ri->type & ARM_CP_CONST) {
+            /* If not forbidden by access permissions, treat as WI */
+            return;
+        } else if (ri->writefn) {
+            TCGv_ptr tmpptr;
+            gen_a64_set_pc_im(s->pc - 4);
+            tmpptr = tcg_const_ptr(ri);
+            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
+            tcg_temp_free_ptr(tmpptr);
+        } else {
+            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
+        }
+    }
+
+    if (use_icount && (ri->type & ARM_CP_IO)) {
+        /* I/O operations must end the TB here (whether read or write) */
+        gen_io_end();
+        s->is_jmp = DISAS_UPDATE;
+    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
+        /* We default to ending the TB on a coprocessor register write,
+         * but allow this to be suppressed by the register definition
+         * (usually only necessary to work around guest bugs).
+         */
+        s->is_jmp = DISAS_UPDATE;
+    }
 }
 
 /* C3.2.4 System
@@ -511,23 +1072,60 @@ static void disas_system(DisasContext *s, uint32_t insn)
         }
         return;
     }
-
-    if (op0 == 1) {
-        /* C5.6.204 SYS */
-        handle_sys(s, insn, l, op1, op2, crn, crm, rt);
-    } else if (l) { /* op0 > 1 */
-        /* C5.6.129 MRS - move from system register */
-        handle_mrs(s, insn, op0, op1, op2, crn, crm, rt);
-    } else {
-        /* C5.6.131 MSR (register) - move to system register */
-        handle_msr(s, insn, op0, op1, op2, crn, crm, rt);
-    }
+    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
 }
 
-/* Exception generation */
+/* C3.2.3 Exception generation
+ *
+ *  31             24 23 21 20                     5 4   2 1  0
+ * +-----------------+-----+------------------------+-----+----+
+ * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
+ * +-----------------------+------------------------+----------+
+ */
 static void disas_exc(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int opc = extract32(insn, 21, 3);
+    int op2_ll = extract32(insn, 0, 5);
+
+    switch (opc) {
+    case 0:
+        /* SVC, HVC, SMC; since we don't support the Virtualization
+         * or TrustZone extensions these all UNDEF except SVC.
+         */
+        if (op2_ll != 1) {
+            unallocated_encoding(s);
+            break;
+        }
+        gen_exception_insn(s, 0, EXCP_SWI);
+        break;
+    case 1:
+        if (op2_ll != 0) {
+            unallocated_encoding(s);
+            break;
+        }
+        /* BRK */
+        gen_exception_insn(s, 0, EXCP_BKPT);
+        break;
+    case 2:
+        if (op2_ll != 0) {
+            unallocated_encoding(s);
+            break;
+        }
+        /* HLT */
+        unsupported_encoding(s, insn);
+        break;
+    case 5:
+        if (op2_ll < 1 || op2_ll > 3) {
+            unallocated_encoding(s);
+            break;
+        }
+        /* DCPS1, DCPS2, DCPS3 */
+        unsupported_encoding(s, insn);
+        break;
+    default:
+        unallocated_encoding(s);
+        break;
+    }
 }
 
 /* C3.2.7 Unconditional branch (register)
@@ -608,28 +1206,633 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
     }
 }
 
-/* Load/store exclusive */
+/*
+ * Load/Store exclusive instructions are implemented by remembering
+ * the value/address loaded, and seeing if these are the same
+ * when the store is performed. This is not actually the architecturally
+ * mandated semantics, but it works for typical guest code sequences
+ * and avoids having to monitor regular stores.
+ *
+ * In system emulation mode only one CPU will be running at once, so
+ * this sequence is effectively atomic.  In user emulation mode we
+ * throw an exception and handle the atomic operation elsewhere.
+ */
+static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
+                               TCGv_i64 addr, int size, bool is_pair)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGMemOp memop = MO_TE + size;
+
+    g_assert(size <= 3);
+    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
+
+    if (is_pair) {
+        TCGv_i64 addr2 = tcg_temp_new_i64();
+        TCGv_i64 hitmp = tcg_temp_new_i64();
+
+        g_assert(size >= 2);
+        tcg_gen_addi_i64(addr2, addr, 1 << size);
+        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
+        tcg_temp_free_i64(addr2);
+        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
+        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
+        tcg_temp_free_i64(hitmp);
+    }
+
+    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
+    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
+
+    tcg_temp_free_i64(tmp);
+    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
+}
+
+#ifdef CONFIG_USER_ONLY
+static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
+                                TCGv_i64 addr, int size, int is_pair)
+{
+    tcg_gen_mov_i64(cpu_exclusive_test, addr);
+    tcg_gen_movi_i32(cpu_exclusive_info,
+                     size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
+    gen_exception_insn(s, 4, EXCP_STREX);
+}
+#else
+static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
+                                TCGv_i64 addr, int size, int is_pair)
+{
+    qemu_log_mask(LOG_UNIMP,
+                  "%s:%d: system mode store_exclusive unsupported "
+                  "at pc=%016" PRIx64 "\n",
+                  __FILE__, __LINE__, s->pc - 4);
+}
+#endif
+
+/* C3.3.6 Load/store exclusive
+ *
+ *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
+ * +-----+-------------+----+---+----+------+----+-------+------+------+
+ * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
+ * +-----+-------------+----+---+----+------+----+-------+------+------+
+ *
+ *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
+ *   L: 0 -> store, 1 -> load
+ *  o2: 0 -> exclusive, 1 -> not
+ *  o1: 0 -> single register, 1 -> register pair
+ *  o0: 1 -> load-acquire/store-release, 0 -> not
+ *
+ *  o0 == 0 AND o2 == 1 is un-allocated
+ *  o1 == 1 is un-allocated except for 32 and 64 bit sizes
+ */
 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int rt2 = extract32(insn, 10, 5);
+    int is_lasr = extract32(insn, 15, 1);
+    int rs = extract32(insn, 16, 5);
+    int is_pair = extract32(insn, 21, 1);
+    int is_store = !extract32(insn, 22, 1);
+    int is_excl = !extract32(insn, 23, 1);
+    int size = extract32(insn, 30, 2);
+    TCGv_i64 tcg_addr;
+
+    if ((!is_excl && !is_lasr) ||
+        (is_pair && size < 2)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+    tcg_addr = read_cpu_reg_sp(s, rn, 1);
+
+    /* Note that since TCG is single threaded load-acquire/store-release
+     * semantics require no extra if (is_lasr) { ... } handling.
+     */
+
+    if (is_excl) {
+        if (!is_store) {
+            gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
+        } else {
+            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
+        }
+    } else {
+        TCGv_i64 tcg_rt = cpu_reg(s, rt);
+        if (is_store) {
+            do_gpr_st(s, tcg_rt, tcg_addr, size);
+        } else {
+            do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
+        }
+        if (is_pair) {
+            TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
+            tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
+            if (is_store) {
+                do_gpr_st(s, tcg_rt2, tcg_addr, size);
+            } else {
+                do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
+            }
+        }
+    }
 }
 
-/* Load register (literal) */
+/*
+ * C3.3.5 Load register (literal)
+ *
+ *  31 30 29   27  26 25 24 23                5 4     0
+ * +-----+-------+---+-----+-------------------+-------+
+ * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
+ * +-----+-------+---+-----+-------------------+-------+
+ *
+ * V: 1 -> vector (simd/fp)
+ * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
+ *                   10-> 32 bit signed, 11 -> prefetch
+ * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
+ */
 static void disas_ld_lit(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int rt = extract32(insn, 0, 5);
+    int64_t imm = sextract32(insn, 5, 19) << 2;
+    bool is_vector = extract32(insn, 26, 1);
+    int opc = extract32(insn, 30, 2);
+    bool is_signed = false;
+    int size = 2;
+    TCGv_i64 tcg_rt, tcg_addr;
+
+    if (is_vector) {
+        if (opc == 3) {
+            unallocated_encoding(s);
+            return;
+        }
+        size = 2 + opc;
+    } else {
+        if (opc == 3) {
+            /* PRFM (literal) : prefetch */
+            return;
+        }
+        size = 2 + extract32(opc, 0, 1);
+        is_signed = extract32(opc, 1, 1);
+    }
+
+    tcg_rt = cpu_reg(s, rt);
+
+    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
+    if (is_vector) {
+        do_fp_ld(s, rt, tcg_addr, size);
+    } else {
+        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
+    }
+    tcg_temp_free_i64(tcg_addr);
 }
 
-/* Load/store pair (all forms) */
+/*
+ * C5.6.80 LDNP (Load Pair - non-temporal hint)
+ * C5.6.81 LDP (Load Pair - non vector)
+ * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
+ * C5.6.176 STNP (Store Pair - non-temporal hint)
+ * C5.6.177 STP (Store Pair - non vector)
+ * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
+ * C6.3.165 LDP (Load Pair of SIMD&FP)
+ * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
+ * C6.3.284 STP (Store Pair of SIMD&FP)
+ *
+ *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
+ * +-----+-------+---+---+-------+---+-----------------------------+
+ * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
+ * +-----+-------+---+---+-------+---+-------+-------+------+------+
+ *
+ * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
+ *      LDPSW                    01
+ *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
+ *   V: 0 -> GPR, 1 -> Vector
+ * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
+ *      10 -> signed offset, 11 -> pre-index
+ *   L: 0 -> Store 1 -> Load
+ *
+ * Rt, Rt2 = GPR or SIMD registers to be stored
+ * Rn = general purpose register containing address
+ * imm7 = signed offset (multiple of 4 or 8 depending on size)
+ */
 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int rt2 = extract32(insn, 10, 5);
+    int64_t offset = sextract32(insn, 15, 7);
+    int index = extract32(insn, 23, 2);
+    bool is_vector = extract32(insn, 26, 1);
+    bool is_load = extract32(insn, 22, 1);
+    int opc = extract32(insn, 30, 2);
+
+    bool is_signed = false;
+    bool postindex = false;
+    bool wback = false;
+
+    TCGv_i64 tcg_addr; /* calculated address */
+    int size;
+
+    if (opc == 3) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (is_vector) {
+        size = 2 + opc;
+    } else {
+        size = 2 + extract32(opc, 1, 1);
+        is_signed = extract32(opc, 0, 1);
+        if (!is_load && is_signed) {
+            unallocated_encoding(s);
+            return;
+        }
+    }
+
+    switch (index) {
+    case 1: /* post-index */
+        postindex = true;
+        wback = true;
+        break;
+    case 0:
+        /* signed offset with "non-temporal" hint. Since we don't emulate
+         * caches we don't care about hints to the cache system about
+         * data access patterns, and handle this identically to plain
+         * signed offset.
+         */
+        if (is_signed) {
+            /* There is no non-temporal-hint version of LDPSW */
+            unallocated_encoding(s);
+            return;
+        }
+        postindex = false;
+        break;
+    case 2: /* signed offset, rn not updated */
+        postindex = false;
+        break;
+    case 3: /* pre-index */
+        postindex = false;
+        wback = true;
+        break;
+    }
+
+    offset <<= size;
+
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+
+    tcg_addr = read_cpu_reg_sp(s, rn, 1);
+
+    if (!postindex) {
+        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
+    }
+
+    if (is_vector) {
+        if (is_load) {
+            do_fp_ld(s, rt, tcg_addr, size);
+        } else {
+            do_fp_st(s, rt, tcg_addr, size);
+        }
+    } else {
+        TCGv_i64 tcg_rt = cpu_reg(s, rt);
+        if (is_load) {
+            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
+        } else {
+            do_gpr_st(s, tcg_rt, tcg_addr, size);
+        }
+    }
+    tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
+    if (is_vector) {
+        if (is_load) {
+            do_fp_ld(s, rt2, tcg_addr, size);
+        } else {
+            do_fp_st(s, rt2, tcg_addr, size);
+        }
+    } else {
+        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
+        if (is_load) {
+            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
+        } else {
+            do_gpr_st(s, tcg_rt2, tcg_addr, size);
+        }
+    }
+
+    if (wback) {
+        if (postindex) {
+            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
+        } else {
+            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
+        }
+        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
+    }
+}
+
+/*
+ * C3.3.8 Load/store (immediate post-indexed)
+ * C3.3.9 Load/store (immediate pre-indexed)
+ * C3.3.12 Load/store (unscaled immediate)
+ *
+ * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
+ * +----+-------+---+-----+-----+---+--------+-----+------+------+
+ * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
+ * +----+-------+---+-----+-----+---+--------+-----+------+------+
+ *
+ * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
+ * V = 0 -> non-vector
+ * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
+ * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
+ */
+static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
+{
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int imm9 = sextract32(insn, 12, 9);
+    int opc = extract32(insn, 22, 2);
+    int size = extract32(insn, 30, 2);
+    int idx = extract32(insn, 10, 2);
+    bool is_signed = false;
+    bool is_store = false;
+    bool is_extended = false;
+    bool is_vector = extract32(insn, 26, 1);
+    bool post_index;
+    bool writeback;
+
+    TCGv_i64 tcg_addr;
+
+    if (is_vector) {
+        size |= (opc & 2) << 1;
+        if (size > 4) {
+            unallocated_encoding(s);
+            return;
+        }
+        is_store = ((opc & 1) == 0);
+    } else {
+        if (size == 3 && opc == 2) {
+            /* PRFM - prefetch */
+            return;
+        }
+        if (opc == 3 && size > 1) {
+            unallocated_encoding(s);
+            return;
+        }
+        is_store = (opc == 0);
+        is_signed = opc & (1<<1);
+        is_extended = (size < 3) && (opc & 1);
+    }
+
+    switch (idx) {
+    case 0:
+        post_index = false;
+        writeback = false;
+        break;
+    case 1:
+        post_index = true;
+        writeback = true;
+        break;
+    case 3:
+        post_index = false;
+        writeback = true;
+        break;
+    case 2:
+        g_assert(false);
+        break;
+    }
+
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+    tcg_addr = read_cpu_reg_sp(s, rn, 1);
+
+    if (!post_index) {
+        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
+    }
+
+    if (is_vector) {
+        if (is_store) {
+            do_fp_st(s, rt, tcg_addr, size);
+        } else {
+            do_fp_ld(s, rt, tcg_addr, size);
+        }
+    } else {
+        TCGv_i64 tcg_rt = cpu_reg(s, rt);
+        if (is_store) {
+            do_gpr_st(s, tcg_rt, tcg_addr, size);
+        } else {
+            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
+        }
+    }
+
+    if (writeback) {
+        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
+        if (post_index) {
+            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
+        }
+        tcg_gen_mov_i64(tcg_rn, tcg_addr);
+    }
+}
+
+/*
+ * C3.3.10 Load/store (register offset)
+ *
+ * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
+ * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
+ * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
+ * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
+ *
+ * For non-vector:
+ *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
+ *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
+ * For vector:
+ *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
+ *   opc<0>: 0 -> store, 1 -> load
+ * V: 1 -> vector/simd
+ * opt: extend encoding (see DecodeRegExtend)
+ * S: if S=1 then scale (essentially index by sizeof(size))
+ * Rt: register to transfer into/out of
+ * Rn: address register or SP for base
+ * Rm: offset register or ZR for offset
+ */
+static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
+{
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int shift = extract32(insn, 12, 1);
+    int rm = extract32(insn, 16, 5);
+    int opc = extract32(insn, 22, 2);
+    int opt = extract32(insn, 13, 3);
+    int size = extract32(insn, 30, 2);
+    bool is_signed = false;
+    bool is_store = false;
+    bool is_extended = false;
+    bool is_vector = extract32(insn, 26, 1);
+
+    TCGv_i64 tcg_rm;
+    TCGv_i64 tcg_addr;
+
+    if (extract32(opt, 1, 1) == 0) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (is_vector) {
+        size |= (opc & 2) << 1;
+        if (size > 4) {
+            unallocated_encoding(s);
+            return;
+        }
+        is_store = !extract32(opc, 0, 1);
+    } else {
+        if (size == 3 && opc == 2) {
+            /* PRFM - prefetch */
+            return;
+        }
+        if (opc == 3 && size > 1) {
+            unallocated_encoding(s);
+            return;
+        }
+        is_store = (opc == 0);
+        is_signed = extract32(opc, 1, 1);
+        is_extended = (size < 3) && extract32(opc, 0, 1);
+    }
+
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+    tcg_addr = read_cpu_reg_sp(s, rn, 1);
+
+    tcg_rm = read_cpu_reg(s, rm, 1);
+    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
+
+    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
+
+    if (is_vector) {
+        if (is_store) {
+            do_fp_st(s, rt, tcg_addr, size);
+        } else {
+            do_fp_ld(s, rt, tcg_addr, size);
+        }
+    } else {
+        TCGv_i64 tcg_rt = cpu_reg(s, rt);
+        if (is_store) {
+            do_gpr_st(s, tcg_rt, tcg_addr, size);
+        } else {
+            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
+        }
+    }
+}
+
+/*
+ * C3.3.13 Load/store (unsigned immediate)
+ *
+ * 31 30 29   27  26 25 24 23 22 21        10 9     5
+ * +----+-------+---+-----+-----+------------+-------+------+
+ * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
+ * +----+-------+---+-----+-----+------------+-------+------+
+ *
+ * For non-vector:
+ *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
+ *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
+ * For vector:
+ *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
+ *   opc<0>: 0 -> store, 1 -> load
+ * Rn: base address register (inc SP)
+ * Rt: target register
+ */
+static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
+{
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    unsigned int imm12 = extract32(insn, 10, 12);
+    bool is_vector = extract32(insn, 26, 1);
+    int size = extract32(insn, 30, 2);
+    int opc = extract32(insn, 22, 2);
+    unsigned int offset;
+
+    TCGv_i64 tcg_addr;
+
+    bool is_store;
+    bool is_signed = false;
+    bool is_extended = false;
+
+    if (is_vector) {
+        size |= (opc & 2) << 1;
+        if (size > 4) {
+            unallocated_encoding(s);
+            return;
+        }
+        is_store = !extract32(opc, 0, 1);
+    } else {
+        if (size == 3 && opc == 2) {
+            /* PRFM - prefetch */
+            return;
+        }
+        if (opc == 3 && size > 1) {
+            unallocated_encoding(s);
+            return;
+        }
+        is_store = (opc == 0);
+        is_signed = extract32(opc, 1, 1);
+        is_extended = (size < 3) && extract32(opc, 0, 1);
+    }
+
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+    tcg_addr = read_cpu_reg_sp(s, rn, 1);
+    offset = imm12 << size;
+    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
+
+    if (is_vector) {
+        if (is_store) {
+            do_fp_st(s, rt, tcg_addr, size);
+        } else {
+            do_fp_ld(s, rt, tcg_addr, size);
+        }
+    } else {
+        TCGv_i64 tcg_rt = cpu_reg(s, rt);
+        if (is_store) {
+            do_gpr_st(s, tcg_rt, tcg_addr, size);
+        } else {
+            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
+        }
+    }
+}
+
+/* Load/store register (immediate forms) */
+static void disas_ldst_reg_imm(DisasContext *s, uint32_t insn)
+{
+    switch (extract32(insn, 10, 2)) {
+    case 0: case 1: case 3:
+        /* Load/store register (unscaled immediate) */
+        /* Load/store immediate pre/post-indexed */
+        disas_ldst_reg_imm9(s, insn);
+        break;
+    case 2:
+        /* Load/store register unprivileged */
+        unsupported_encoding(s, insn);
+        break;
+    default:
+        unallocated_encoding(s);
+        break;
+    }
 }
 
 /* Load/store register (all forms) */
 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    switch (extract32(insn, 24, 2)) {
+    case 0:
+        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
+            disas_ldst_reg_roffset(s, insn);
+        } else {
+            disas_ldst_reg_imm(s, insn);
+        }
+        break;
+    case 1:
+        disas_ldst_reg_unsigned_imm(s, insn);
+        break;
+    default:
+        unallocated_encoding(s);
+        break;
+    }
 }
 
 /* AdvSIMD load/store multiple structures */
@@ -701,10 +1904,68 @@ static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
 }
 
-/* Add/subtract (immediate) */
+/*
+ * C3.4.1 Add/subtract (immediate)
+ *
+ *  31 30 29 28       24 23 22 21         10 9   5 4   0
+ * +--+--+--+-----------+-----+-------------+-----+-----+
+ * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
+ * +--+--+--+-----------+-----+-------------+-----+-----+
+ *
+ *    sf: 0 -> 32bit, 1 -> 64bit
+ *    op: 0 -> add  , 1 -> sub
+ *     S: 1 -> set flags
+ * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
+ */
 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    uint64_t imm = extract32(insn, 10, 12);
+    int shift = extract32(insn, 22, 2);
+    bool setflags = extract32(insn, 29, 1);
+    bool sub_op = extract32(insn, 30, 1);
+    bool is_64bit = extract32(insn, 31, 1);
+
+    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
+    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
+    TCGv_i64 tcg_result;
+
+    switch (shift) {
+    case 0x0:
+        break;
+    case 0x1:
+        imm <<= 12;
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    tcg_result = tcg_temp_new_i64();
+    if (!setflags) {
+        if (sub_op) {
+            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
+        } else {
+            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
+        }
+    } else {
+        TCGv_i64 tcg_imm = tcg_const_i64(imm);
+        if (sub_op) {
+            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
+        } else {
+            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
+        }
+        tcg_temp_free_i64(tcg_imm);
+    }
+
+    if (is_64bit) {
+        tcg_gen_mov_i64(tcg_rd, tcg_result);
+    } else {
+        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
+    }
+
+    tcg_temp_free_i64(tcg_result);
 }
 
 /* The input should be a value in the bottom e bits (with higher
@@ -863,10 +2124,57 @@ static void disas_logic_imm(DisasContext *s, uint32_t insn)
     }
 }
 
-/* Move wide (immediate) */
+/*
+ * C3.4.5 Move wide (immediate)
+ *
+ *  31 30 29 28         23 22 21 20             5 4    0
+ * +--+-----+-------------+-----+----------------+------+
+ * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
+ * +--+-----+-------------+-----+----------------+------+
+ *
+ * sf: 0 -> 32 bit, 1 -> 64 bit
+ * opc: 00 -> N, 10 -> Z, 11 -> K
+ * hw: shift/16 (0,16, and sf only 32, 48)
+ */
 static void disas_movw_imm(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int rd = extract32(insn, 0, 5);
+    uint64_t imm = extract32(insn, 5, 16);
+    int sf = extract32(insn, 31, 1);
+    int opc = extract32(insn, 29, 2);
+    int pos = extract32(insn, 21, 2) << 4;
+    TCGv_i64 tcg_rd = cpu_reg(s, rd);
+    TCGv_i64 tcg_imm;
+
+    if (!sf && (pos >= 32)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (opc) {
+    case 0: /* MOVN */
+    case 2: /* MOVZ */
+        imm <<= pos;
+        if (opc == 0) {
+            imm = ~imm;
+        }
+        if (!sf) {
+            imm &= 0xffffffffu;
+        }
+        tcg_gen_movi_i64(tcg_rd, imm);
+        break;
+    case 3: /* MOVK */
+        tcg_imm = tcg_const_i64(imm);
+        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
+        tcg_temp_free_i64(tcg_imm);
+        if (!sf) {
+            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+        }
+        break;
+    default:
+        unallocated_encoding(s);
+        break;
+    }
 }
 
 /* C3.4.2 Bitfield
@@ -1162,40 +2470,346 @@ static void disas_logic_reg(DisasContext *s, uint32_t insn)
     }
 }
 
-/* Add/subtract (extended register) */
+/*
+ * C3.5.1 Add/subtract (extended register)
+ *
+ *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
+ * +--+--+--+-----------+-----+--+-------+------+------+----+----+
+ * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
+ * +--+--+--+-----------+-----+--+-------+------+------+----+----+
+ *
+ *  sf: 0 -> 32bit, 1 -> 64bit
+ *  op: 0 -> add  , 1 -> sub
+ *   S: 1 -> set flags
+ * opt: 00
+ * option: extension type (see DecodeRegExtend)
+ * imm3: optional shift to Rm
+ *
+ * Rd = Rn + LSL(extend(Rm), amount)
+ */
 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int imm3 = extract32(insn, 10, 3);
+    int option = extract32(insn, 13, 3);
+    int rm = extract32(insn, 16, 5);
+    bool setflags = extract32(insn, 29, 1);
+    bool sub_op = extract32(insn, 30, 1);
+    bool sf = extract32(insn, 31, 1);
+
+    TCGv_i64 tcg_rm, tcg_rn; /* temps */
+    TCGv_i64 tcg_rd;
+    TCGv_i64 tcg_result;
+
+    if (imm3 > 4) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* non-flag setting ops may use SP */
+    if (!setflags) {
+        tcg_rn = read_cpu_reg_sp(s, rn, sf);
+        tcg_rd = cpu_reg_sp(s, rd);
+    } else {
+        tcg_rn = read_cpu_reg(s, rn, sf);
+        tcg_rd = cpu_reg(s, rd);
+    }
+
+    tcg_rm = read_cpu_reg(s, rm, sf);
+    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
+
+    tcg_result = tcg_temp_new_i64();
+
+    if (!setflags) {
+        if (sub_op) {
+            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
+        } else {
+            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
+        }
+    } else {
+        if (sub_op) {
+            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
+        } else {
+            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
+        }
+    }
+
+    if (sf) {
+        tcg_gen_mov_i64(tcg_rd, tcg_result);
+    } else {
+        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
+    }
+
+    tcg_temp_free_i64(tcg_result);
 }
 
-/* Add/subtract (shifted register) */
+/*
+ * C3.5.2 Add/subtract (shifted register)
+ *
+ *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
+ * +--+--+--+-----------+-----+--+-------+---------+------+------+
+ * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
+ * +--+--+--+-----------+-----+--+-------+---------+------+------+
+ *
+ *    sf: 0 -> 32bit, 1 -> 64bit
+ *    op: 0 -> add  , 1 -> sub
+ *     S: 1 -> set flags
+ * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
+ *  imm6: Shift amount to apply to Rm before the add/sub
+ */
 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int imm6 = extract32(insn, 10, 6);
+    int rm = extract32(insn, 16, 5);
+    int shift_type = extract32(insn, 22, 2);
+    bool setflags = extract32(insn, 29, 1);
+    bool sub_op = extract32(insn, 30, 1);
+    bool sf = extract32(insn, 31, 1);
+
+    TCGv_i64 tcg_rd = cpu_reg(s, rd);
+    TCGv_i64 tcg_rn, tcg_rm;
+    TCGv_i64 tcg_result;
+
+    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    tcg_rn = read_cpu_reg(s, rn, sf);
+    tcg_rm = read_cpu_reg(s, rm, sf);
+
+    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
+
+    tcg_result = tcg_temp_new_i64();
+
+    if (!setflags) {
+        if (sub_op) {
+            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
+        } else {
+            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
+        }
+    } else {
+        if (sub_op) {
+            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
+        } else {
+            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
+        }
+    }
+
+    if (sf) {
+        tcg_gen_mov_i64(tcg_rd, tcg_result);
+    } else {
+        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
+    }
+
+    tcg_temp_free_i64(tcg_result);
 }
 
-/* Data-processing (3 source) */
+/* C3.5.9 Data-processing (3 source)
+
+   31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
+  +--+------+-----------+------+------+----+------+------+------+
+  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
+  +--+------+-----------+------+------+----+------+------+------+
+
+ */
 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int ra = extract32(insn, 10, 5);
+    int rm = extract32(insn, 16, 5);
+    int op_id = (extract32(insn, 29, 3) << 4) |
+        (extract32(insn, 21, 3) << 1) |
+        extract32(insn, 15, 1);
+    bool sf = extract32(insn, 31, 1);
+    bool is_sub = extract32(op_id, 0, 1);
+    bool is_high = extract32(op_id, 2, 1);
+    bool is_signed = false;
+    TCGv_i64 tcg_op1;
+    TCGv_i64 tcg_op2;
+    TCGv_i64 tcg_tmp;
+
+    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
+    switch (op_id) {
+    case 0x42: /* SMADDL */
+    case 0x43: /* SMSUBL */
+    case 0x44: /* SMULH */
+        is_signed = true;
+        break;
+    case 0x0: /* MADD (32bit) */
+    case 0x1: /* MSUB (32bit) */
+    case 0x40: /* MADD (64bit) */
+    case 0x41: /* MSUB (64bit) */
+    case 0x4a: /* UMADDL */
+    case 0x4b: /* UMSUBL */
+    case 0x4c: /* UMULH */
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (is_high) {
+        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
+        TCGv_i64 tcg_rd = cpu_reg(s, rd);
+        TCGv_i64 tcg_rn = cpu_reg(s, rn);
+        TCGv_i64 tcg_rm = cpu_reg(s, rm);
+
+        if (is_signed) {
+            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
+        } else {
+            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
+        }
+
+        tcg_temp_free_i64(low_bits);
+        return;
+    }
+
+    tcg_op1 = tcg_temp_new_i64();
+    tcg_op2 = tcg_temp_new_i64();
+    tcg_tmp = tcg_temp_new_i64();
+
+    if (op_id < 0x42) {
+        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
+        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
+    } else {
+        if (is_signed) {
+            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
+            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
+        } else {
+            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
+            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
+        }
+    }
+
+    if (ra == 31 && !is_sub) {
+        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
+        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
+    } else {
+        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
+        if (is_sub) {
+            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
+        } else {
+            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
+        }
+    }
+
+    if (!sf) {
+        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
+    }
+
+    tcg_temp_free_i64(tcg_op1);
+    tcg_temp_free_i64(tcg_op2);
+    tcg_temp_free_i64(tcg_tmp);
 }
 
-/* Add/subtract (with carry) */
+/* C3.5.3 - Add/subtract (with carry)
+ *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
+ * +--+--+--+------------------------+------+---------+------+-----+
+ * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
+ * +--+--+--+------------------------+------+---------+------+-----+
+ *                                            [000000]
+ */
+
 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
-}
+    unsigned int sf, op, setflags, rm, rn, rd;
+    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
 
-/* Conditional compare (immediate) */
-static void disas_cc_imm(DisasContext *s, uint32_t insn)
-{
-    unsupported_encoding(s, insn);
+    if (extract32(insn, 10, 6) != 0) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    sf = extract32(insn, 31, 1);
+    op = extract32(insn, 30, 1);
+    setflags = extract32(insn, 29, 1);
+    rm = extract32(insn, 16, 5);
+    rn = extract32(insn, 5, 5);
+    rd = extract32(insn, 0, 5);
+
+    tcg_rd = cpu_reg(s, rd);
+    tcg_rn = cpu_reg(s, rn);
+
+    if (op) {
+        tcg_y = new_tmp_a64(s);
+        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
+    } else {
+        tcg_y = cpu_reg(s, rm);
+    }
+
+    if (setflags) {
+        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
+    } else {
+        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
+    }
 }
 
-/* Conditional compare (register) */
-static void disas_cc_reg(DisasContext *s, uint32_t insn)
+/* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
+ *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
+ * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
+ * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
+ * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
+ *        [1]                             y                [0]       [0]
+ */
+static void disas_cc(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
+    int label_continue = -1;
+    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
+
+    if (!extract32(insn, 29, 1)) {
+        unallocated_encoding(s);
+        return;
+    }
+    if (insn & (1 << 10 | 1 << 4)) {
+        unallocated_encoding(s);
+        return;
+    }
+    sf = extract32(insn, 31, 1);
+    op = extract32(insn, 30, 1);
+    is_imm = extract32(insn, 11, 1);
+    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
+    cond = extract32(insn, 12, 4);
+    rn = extract32(insn, 5, 5);
+    nzcv = extract32(insn, 0, 4);
+
+    if (cond < 0x0e) { /* not always */
+        int label_match = gen_new_label();
+        label_continue = gen_new_label();
+        arm_gen_test_cc(cond, label_match);
+        /* nomatch: */
+        tcg_tmp = tcg_temp_new_i64();
+        tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
+        gen_set_nzcv(tcg_tmp);
+        tcg_temp_free_i64(tcg_tmp);
+        tcg_gen_br(label_continue);
+        gen_set_label(label_match);
+    }
+    /* match, or condition is always */
+    if (is_imm) {
+        tcg_y = new_tmp_a64(s);
+        tcg_gen_movi_i64(tcg_y, y);
+    } else {
+        tcg_y = cpu_reg(s, y);
+    }
+    tcg_rn = cpu_reg(s, rn);
+
+    tcg_tmp = tcg_temp_new_i64();
+    if (op) {
+        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
+    } else {
+        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
+    }
+    tcg_temp_free_i64(tcg_tmp);
+
+    if (cond < 0x0e) { /* continue */
+        gen_set_label(label_continue);
+    }
 }
 
 /* C3.5.6 Conditional select
@@ -1549,11 +3163,7 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
             disas_adc_sbc(s, insn);
             break;
         case 0x2: /* Conditional compare */
-            if (insn & (1 << 11)) { /* (immediate) */
-                disas_cc_imm(s, insn);
-            } else {            /* (register) */
-                disas_cc_reg(s, insn);
-            }
+            disas_cc(s, insn); /* both imm and reg forms */
             break;
         case 0x4: /* Conditional select */
             disas_cond_select(s, insn);
@@ -1576,10 +3186,1062 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
     }
 }
 
+/* Convert ARM rounding mode to softfloat */
+static inline int arm_rmode_to_sf(int rmode)
+{
+    switch (rmode) {
+    case FPROUNDING_TIEAWAY:
+        rmode = float_round_ties_away;
+        break;
+    case FPROUNDING_ODD:
+        /* FIXME: add support for TIEAWAY and ODD */
+        qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
+                      rmode);
+    case FPROUNDING_TIEEVEN:
+    default:
+        rmode = float_round_nearest_even;
+        break;
+    case FPROUNDING_POSINF:
+        rmode = float_round_up;
+        break;
+    case FPROUNDING_NEGINF:
+        rmode = float_round_down;
+        break;
+    case FPROUNDING_ZERO:
+        rmode = float_round_to_zero;
+        break;
+    }
+    return rmode;
+}
+
+static void handle_fp_compare(DisasContext *s, bool is_double,
+                              unsigned int rn, unsigned int rm,
+                              bool cmp_with_zero, bool signal_all_nans)
+{
+    TCGv_i64 tcg_flags = tcg_temp_new_i64();
+    TCGv_ptr fpst = get_fpstatus_ptr();
+
+    if (is_double) {
+        TCGv_i64 tcg_vn, tcg_vm;
+
+        tcg_vn = read_fp_dreg(s, rn);
+        if (cmp_with_zero) {
+            tcg_vm = tcg_const_i64(0);
+        } else {
+            tcg_vm = read_fp_dreg(s, rm);
+        }
+        if (signal_all_nans) {
+            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+        } else {
+            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+        }
+        tcg_temp_free_i64(tcg_vn);
+        tcg_temp_free_i64(tcg_vm);
+    } else {
+        TCGv_i32 tcg_vn, tcg_vm;
+
+        tcg_vn = read_fp_sreg(s, rn);
+        if (cmp_with_zero) {
+            tcg_vm = tcg_const_i32(0);
+        } else {
+            tcg_vm = read_fp_sreg(s, rm);
+        }
+        if (signal_all_nans) {
+            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+        } else {
+            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+        }
+        tcg_temp_free_i32(tcg_vn);
+        tcg_temp_free_i32(tcg_vm);
+    }
+
+    tcg_temp_free_ptr(fpst);
+
+    gen_set_nzcv(tcg_flags);
+
+    tcg_temp_free_i64(tcg_flags);
+}
+
+/* C3.6.22 Floating point compare
+ *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
+ * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
+ * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
+ * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
+ */
+static void disas_fp_compare(DisasContext *s, uint32_t insn)
+{
+    unsigned int mos, type, rm, op, rn, opc, op2r;
+
+    mos = extract32(insn, 29, 3);
+    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
+    rm = extract32(insn, 16, 5);
+    op = extract32(insn, 14, 2);
+    rn = extract32(insn, 5, 5);
+    opc = extract32(insn, 3, 2);
+    op2r = extract32(insn, 0, 3);
+
+    if (mos || op || op2r || type > 1) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
+}
+
+/* C3.6.23 Floating point conditional compare
+ *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
+ * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
+ * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
+ * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
+ */
+static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
+{
+    unsigned int mos, type, rm, cond, rn, op, nzcv;
+    TCGv_i64 tcg_flags;
+    int label_continue = -1;
+
+    mos = extract32(insn, 29, 3);
+    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
+    rm = extract32(insn, 16, 5);
+    cond = extract32(insn, 12, 4);
+    rn = extract32(insn, 5, 5);
+    op = extract32(insn, 4, 1);
+    nzcv = extract32(insn, 0, 4);
+
+    if (mos || type > 1) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (cond < 0x0e) { /* not always */
+        int label_match = gen_new_label();
+        label_continue = gen_new_label();
+        arm_gen_test_cc(cond, label_match);
+        /* nomatch: */
+        tcg_flags = tcg_const_i64(nzcv << 28);
+        gen_set_nzcv(tcg_flags);
+        tcg_temp_free_i64(tcg_flags);
+        tcg_gen_br(label_continue);
+        gen_set_label(label_match);
+    }
+
+    handle_fp_compare(s, type, rn, rm, false, op);
+
+    if (cond < 0x0e) {
+        gen_set_label(label_continue);
+    }
+}
+
+/* copy src FP register to dst FP register; type specifies single or double */
+static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
+{
+    if (type) {
+        TCGv_i64 v = read_fp_dreg(s, src);
+        write_fp_dreg(s, dst, v);
+        tcg_temp_free_i64(v);
+    } else {
+        TCGv_i32 v = read_fp_sreg(s, src);
+        write_fp_sreg(s, dst, v);
+        tcg_temp_free_i32(v);
+    }
+}
+
+/* C3.6.24 Floating point conditional select
+ *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
+ * +---+---+---+-----------+------+---+------+------+-----+------+------+
+ * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
+ * +---+---+---+-----------+------+---+------+------+-----+------+------+
+ */
+static void disas_fp_csel(DisasContext *s, uint32_t insn)
+{
+    unsigned int mos, type, rm, cond, rn, rd;
+    int label_continue = -1;
+
+    mos = extract32(insn, 29, 3);
+    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
+    rm = extract32(insn, 16, 5);
+    cond = extract32(insn, 12, 4);
+    rn = extract32(insn, 5, 5);
+    rd = extract32(insn, 0, 5);
+
+    if (mos || type > 1) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (cond < 0x0e) { /* not always */
+        int label_match = gen_new_label();
+        label_continue = gen_new_label();
+        arm_gen_test_cc(cond, label_match);
+        /* nomatch: */
+        gen_mov_fp2fp(s, type, rd, rm);
+        tcg_gen_br(label_continue);
+        gen_set_label(label_match);
+    }
+
+    gen_mov_fp2fp(s, type, rd, rn);
+
+    if (cond < 0x0e) { /* continue */
+        gen_set_label(label_continue);
+    }
+}
+
+/* C3.6.25 Floating-point data-processing (1 source) - single precision */
+static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tcg_op;
+    TCGv_i32 tcg_res;
+
+    fpst = get_fpstatus_ptr();
+    tcg_op = read_fp_sreg(s, rn);
+    tcg_res = tcg_temp_new_i32();
+
+    switch (opcode) {
+    case 0x0: /* FMOV */
+        tcg_gen_mov_i32(tcg_res, tcg_op);
+        break;
+    case 0x1: /* FABS */
+        gen_helper_vfp_abss(tcg_res, tcg_op);
+        break;
+    case 0x2: /* FNEG */
+        gen_helper_vfp_negs(tcg_res, tcg_op);
+        break;
+    case 0x3: /* FSQRT */
+        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
+        break;
+    case 0x8: /* FRINTN */
+    case 0x9: /* FRINTP */
+    case 0xa: /* FRINTM */
+    case 0xb: /* FRINTZ */
+    case 0xc: /* FRINTA */
+    {
+        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
+
+        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+        gen_helper_rints(tcg_res, tcg_op, fpst);
+
+        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+        tcg_temp_free_i32(tcg_rmode);
+        break;
+    }
+    case 0xe: /* FRINTX */
+        gen_helper_rints_exact(tcg_res, tcg_op, fpst);
+        break;
+    case 0xf: /* FRINTI */
+        gen_helper_rints(tcg_res, tcg_op, fpst);
+        break;
+    default:
+        abort();
+    }
+
+    write_fp_sreg(s, rd, tcg_res);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tcg_op);
+    tcg_temp_free_i32(tcg_res);
+}
+
+/* C3.6.25 Floating-point data-processing (1 source) - double precision */
+static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
+{
+    TCGv_ptr fpst;
+    TCGv_i64 tcg_op;
+    TCGv_i64 tcg_res;
+
+    fpst = get_fpstatus_ptr();
+    tcg_op = read_fp_dreg(s, rn);
+    tcg_res = tcg_temp_new_i64();
+
+    switch (opcode) {
+    case 0x0: /* FMOV */
+        tcg_gen_mov_i64(tcg_res, tcg_op);
+        break;
+    case 0x1: /* FABS */
+        gen_helper_vfp_absd(tcg_res, tcg_op);
+        break;
+    case 0x2: /* FNEG */
+        gen_helper_vfp_negd(tcg_res, tcg_op);
+        break;
+    case 0x3: /* FSQRT */
+        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
+        break;
+    case 0x8: /* FRINTN */
+    case 0x9: /* FRINTP */
+    case 0xa: /* FRINTM */
+    case 0xb: /* FRINTZ */
+    case 0xc: /* FRINTA */
+    {
+        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
+
+        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+        gen_helper_rintd(tcg_res, tcg_op, fpst);
+
+        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+        tcg_temp_free_i32(tcg_rmode);
+        break;
+    }
+    case 0xe: /* FRINTX */
+        gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
+        break;
+    case 0xf: /* FRINTI */
+        gen_helper_rintd(tcg_res, tcg_op, fpst);
+        break;
+    default:
+        abort();
+    }
+
+    write_fp_dreg(s, rd, tcg_res);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i64(tcg_op);
+    tcg_temp_free_i64(tcg_res);
+}
+
+static void handle_fp_fcvt(DisasContext *s, int opcode,
+                           int rd, int rn, int dtype, int ntype)
+{
+    switch (ntype) {
+    case 0x0:
+    {
+        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
+        if (dtype == 1) {
+            /* Single to double */
+            TCGv_i64 tcg_rd = tcg_temp_new_i64();
+            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
+            write_fp_dreg(s, rd, tcg_rd);
+            tcg_temp_free_i64(tcg_rd);
+        } else {
+            /* Single to half */
+            TCGv_i32 tcg_rd = tcg_temp_new_i32();
+            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
+            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
+            write_fp_sreg(s, rd, tcg_rd);
+            tcg_temp_free_i32(tcg_rd);
+        }
+        tcg_temp_free_i32(tcg_rn);
+        break;
+    }
+    case 0x1:
+    {
+        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
+        TCGv_i32 tcg_rd = tcg_temp_new_i32();
+        if (dtype == 0) {
+            /* Double to single */
+            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
+        } else {
+            /* Double to half */
+            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
+            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
+        }
+        write_fp_sreg(s, rd, tcg_rd);
+        tcg_temp_free_i32(tcg_rd);
+        tcg_temp_free_i64(tcg_rn);
+        break;
+    }
+    case 0x3:
+    {
+        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
+        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
+        if (dtype == 0) {
+            /* Half to single */
+            TCGv_i32 tcg_rd = tcg_temp_new_i32();
+            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
+            write_fp_sreg(s, rd, tcg_rd);
+            tcg_temp_free_i32(tcg_rd);
+        } else {
+            /* Half to double */
+            TCGv_i64 tcg_rd = tcg_temp_new_i64();
+            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
+            write_fp_dreg(s, rd, tcg_rd);
+            tcg_temp_free_i64(tcg_rd);
+        }
+        tcg_temp_free_i32(tcg_rn);
+        break;
+    }
+    default:
+        abort();
+    }
+}
+
+/* C3.6.25 Floating point data-processing (1 source)
+ *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
+ * +---+---+---+-----------+------+---+--------+-----------+------+------+
+ * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
+ * +---+---+---+-----------+------+---+--------+-----------+------+------+
+ */
+static void disas_fp_1src(DisasContext *s, uint32_t insn)
+{
+    int type = extract32(insn, 22, 2);
+    int opcode = extract32(insn, 15, 6);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+
+    switch (opcode) {
+    case 0x4: case 0x5: case 0x7:
+    {
+        /* FCVT between half, single and double precision */
+        int dtype = extract32(opcode, 0, 2);
+        if (type == 2 || dtype == type) {
+            unallocated_encoding(s);
+            return;
+        }
+        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
+        break;
+    }
+    case 0x0 ... 0x3:
+    case 0x8 ... 0xc:
+    case 0xe ... 0xf:
+        /* 32-to-32 and 64-to-64 ops */
+        switch (type) {
+        case 0:
+            handle_fp_1src_single(s, opcode, rd, rn);
+            break;
+        case 1:
+            handle_fp_1src_double(s, opcode, rd, rn);
+            break;
+        default:
+            unallocated_encoding(s);
+        }
+        break;
+    default:
+        unallocated_encoding(s);
+        break;
+    }
+}
+
+/* C3.6.26 Floating-point data-processing (2 source) - single precision */
+static void handle_fp_2src_single(DisasContext *s, int opcode,
+                                  int rd, int rn, int rm)
+{
+    TCGv_i32 tcg_op1;
+    TCGv_i32 tcg_op2;
+    TCGv_i32 tcg_res;
+    TCGv_ptr fpst;
+
+    tcg_res = tcg_temp_new_i32();
+    fpst = get_fpstatus_ptr();
+    tcg_op1 = read_fp_sreg(s, rn);
+    tcg_op2 = read_fp_sreg(s, rm);
+
+    switch (opcode) {
+    case 0x0: /* FMUL */
+        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x1: /* FDIV */
+        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x2: /* FADD */
+        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x3: /* FSUB */
+        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x4: /* FMAX */
+        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x5: /* FMIN */
+        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x6: /* FMAXNM */
+        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x7: /* FMINNM */
+        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x8: /* FNMUL */
+        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
+        gen_helper_vfp_negs(tcg_res, tcg_res);
+        break;
+    }
+
+    write_fp_sreg(s, rd, tcg_res);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tcg_op1);
+    tcg_temp_free_i32(tcg_op2);
+    tcg_temp_free_i32(tcg_res);
+}
+
+/* C3.6.26 Floating-point data-processing (2 source) - double precision */
+static void handle_fp_2src_double(DisasContext *s, int opcode,
+                                  int rd, int rn, int rm)
+{
+    TCGv_i64 tcg_op1;
+    TCGv_i64 tcg_op2;
+    TCGv_i64 tcg_res;
+    TCGv_ptr fpst;
+
+    tcg_res = tcg_temp_new_i64();
+    fpst = get_fpstatus_ptr();
+    tcg_op1 = read_fp_dreg(s, rn);
+    tcg_op2 = read_fp_dreg(s, rm);
+
+    switch (opcode) {
+    case 0x0: /* FMUL */
+        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x1: /* FDIV */
+        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x2: /* FADD */
+        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x3: /* FSUB */
+        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x4: /* FMAX */
+        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x5: /* FMIN */
+        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x6: /* FMAXNM */
+        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x7: /* FMINNM */
+        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+        break;
+    case 0x8: /* FNMUL */
+        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
+        gen_helper_vfp_negd(tcg_res, tcg_res);
+        break;
+    }
+
+    write_fp_dreg(s, rd, tcg_res);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i64(tcg_op1);
+    tcg_temp_free_i64(tcg_op2);
+    tcg_temp_free_i64(tcg_res);
+}
+
+/* C3.6.26 Floating point data-processing (2 source)
+ *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
+ * +---+---+---+-----------+------+---+------+--------+-----+------+------+
+ * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
+ * +---+---+---+-----------+------+---+------+--------+-----+------+------+
+ */
+static void disas_fp_2src(DisasContext *s, uint32_t insn)
+{
+    int type = extract32(insn, 22, 2);
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int rm = extract32(insn, 16, 5);
+    int opcode = extract32(insn, 12, 4);
+
+    if (opcode > 8) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (type) {
+    case 0:
+        handle_fp_2src_single(s, opcode, rd, rn, rm);
+        break;
+    case 1:
+        handle_fp_2src_double(s, opcode, rd, rn, rm);
+        break;
+    default:
+        unallocated_encoding(s);
+    }
+}
+
+/* C3.6.27 Floating-point data-processing (3 source) - single precision */
+static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
+                                  int rd, int rn, int rm, int ra)
+{
+    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
+    TCGv_i32 tcg_res = tcg_temp_new_i32();
+    TCGv_ptr fpst = get_fpstatus_ptr();
+
+    tcg_op1 = read_fp_sreg(s, rn);
+    tcg_op2 = read_fp_sreg(s, rm);
+    tcg_op3 = read_fp_sreg(s, ra);
+
+    /* These are fused multiply-add, and must be done as one
+     * floating point operation with no rounding between the
+     * multiplication and addition steps.
+     * NB that doing the negations here as separate steps is
+     * correct : an input NaN should come out with its sign bit
+     * flipped if it is a negated-input.
+     */
+    if (o1 == true) {
+        gen_helper_vfp_negs(tcg_op3, tcg_op3);
+    }
+
+    if (o0 != o1) {
+        gen_helper_vfp_negs(tcg_op1, tcg_op1);
+    }
+
+    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
+
+    write_fp_sreg(s, rd, tcg_res);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tcg_op1);
+    tcg_temp_free_i32(tcg_op2);
+    tcg_temp_free_i32(tcg_op3);
+    tcg_temp_free_i32(tcg_res);
+}
+
+/* C3.6.27 Floating-point data-processing (3 source) - double precision */
+static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
+                                  int rd, int rn, int rm, int ra)
+{
+    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
+    TCGv_i64 tcg_res = tcg_temp_new_i64();
+    TCGv_ptr fpst = get_fpstatus_ptr();
+
+    tcg_op1 = read_fp_dreg(s, rn);
+    tcg_op2 = read_fp_dreg(s, rm);
+    tcg_op3 = read_fp_dreg(s, ra);
+
+    /* These are fused multiply-add, and must be done as one
+     * floating point operation with no rounding between the
+     * multiplication and addition steps.
+     * NB that doing the negations here as separate steps is
+     * correct : an input NaN should come out with its sign bit
+     * flipped if it is a negated-input.
+     */
+    if (o1 == true) {
+        gen_helper_vfp_negd(tcg_op3, tcg_op3);
+    }
+
+    if (o0 != o1) {
+        gen_helper_vfp_negd(tcg_op1, tcg_op1);
+    }
+
+    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
+
+    write_fp_dreg(s, rd, tcg_res);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i64(tcg_op1);
+    tcg_temp_free_i64(tcg_op2);
+    tcg_temp_free_i64(tcg_op3);
+    tcg_temp_free_i64(tcg_res);
+}
+
+/* C3.6.27 Floating point data-processing (3 source)
+ *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
+ * +---+---+---+-----------+------+----+------+----+------+------+------+
+ * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
+ * +---+---+---+-----------+------+----+------+----+------+------+------+
+ */
+static void disas_fp_3src(DisasContext *s, uint32_t insn)
+{
+    int type = extract32(insn, 22, 2);
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int ra = extract32(insn, 10, 5);
+    int rm = extract32(insn, 16, 5);
+    bool o0 = extract32(insn, 15, 1);
+    bool o1 = extract32(insn, 21, 1);
+
+    switch (type) {
+    case 0:
+        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
+        break;
+    case 1:
+        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
+        break;
+    default:
+        unallocated_encoding(s);
+    }
+}
+
+/* C3.6.28 Floating point immediate
+ *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
+ * +---+---+---+-----------+------+---+------------+-------+------+------+
+ * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
+ * +---+---+---+-----------+------+---+------------+-------+------+------+
+ */
+static void disas_fp_imm(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int imm8 = extract32(insn, 13, 8);
+    int is_double = extract32(insn, 22, 2);
+    uint64_t imm;
+    TCGv_i64 tcg_res;
+
+    if (is_double > 1) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* The imm8 encodes the sign bit, enough bits to represent
+     * an exponent in the range 01....1xx to 10....0xx,
+     * and the most significant 4 bits of the mantissa; see
+     * VFPExpandImm() in the v8 ARM ARM.
+     */
+    if (is_double) {
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
+            extract32(imm8, 0, 6);
+        imm <<= 48;
+    } else {
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
+            (extract32(imm8, 0, 6) << 3);
+        imm <<= 16;
+    }
+
+    tcg_res = tcg_const_i64(imm);
+    write_fp_dreg(s, rd, tcg_res);
+    tcg_temp_free_i64(tcg_res);
+}
+
+/* Handle floating point <=> fixed point conversions. Note that we can
+ * also deal with fp <=> integer conversions as a special case (scale == 64)
+ * OPTME: consider handling that special case specially or at least skipping
+ * the call to scalbn in the helpers for zero shifts.
+ */
+static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
+                           bool itof, int rmode, int scale, int sf, int type)
+{
+    bool is_signed = !(opcode & 1);
+    bool is_double = type;
+    TCGv_ptr tcg_fpstatus;
+    TCGv_i32 tcg_shift;
+
+    tcg_fpstatus = get_fpstatus_ptr();
+
+    tcg_shift = tcg_const_i32(64 - scale);
+
+    if (itof) {
+        TCGv_i64 tcg_int = cpu_reg(s, rn);
+        if (!sf) {
+            TCGv_i64 tcg_extend = new_tmp_a64(s);
+
+            if (is_signed) {
+                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
+            } else {
+                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
+            }
+
+            tcg_int = tcg_extend;
+        }
+
+        if (is_double) {
+            TCGv_i64 tcg_double = tcg_temp_new_i64();
+            if (is_signed) {
+                gen_helper_vfp_sqtod(tcg_double, tcg_int,
+                                     tcg_shift, tcg_fpstatus);
+            } else {
+                gen_helper_vfp_uqtod(tcg_double, tcg_int,
+                                     tcg_shift, tcg_fpstatus);
+            }
+            write_fp_dreg(s, rd, tcg_double);
+            tcg_temp_free_i64(tcg_double);
+        } else {
+            TCGv_i32 tcg_single = tcg_temp_new_i32();
+            if (is_signed) {
+                gen_helper_vfp_sqtos(tcg_single, tcg_int,
+                                     tcg_shift, tcg_fpstatus);
+            } else {
+                gen_helper_vfp_uqtos(tcg_single, tcg_int,
+                                     tcg_shift, tcg_fpstatus);
+            }
+            write_fp_sreg(s, rd, tcg_single);
+            tcg_temp_free_i32(tcg_single);
+        }
+    } else {
+        TCGv_i64 tcg_int = cpu_reg(s, rd);
+        TCGv_i32 tcg_rmode;
+
+        if (extract32(opcode, 2, 1)) {
+            /* There are too many rounding modes to all fit into rmode,
+             * so FCVTA[US] is a special case.
+             */
+            rmode = FPROUNDING_TIEAWAY;
+        }
+
+        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+
+        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+
+        if (is_double) {
+            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
+            if (is_signed) {
+                if (!sf) {
+                    gen_helper_vfp_tosld(tcg_int, tcg_double,
+                                         tcg_shift, tcg_fpstatus);
+                } else {
+                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
+                                         tcg_shift, tcg_fpstatus);
+                }
+            } else {
+                if (!sf) {
+                    gen_helper_vfp_tould(tcg_int, tcg_double,
+                                         tcg_shift, tcg_fpstatus);
+                } else {
+                    gen_helper_vfp_touqd(tcg_int, tcg_double,
+                                         tcg_shift, tcg_fpstatus);
+                }
+            }
+            tcg_temp_free_i64(tcg_double);
+        } else {
+            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
+            if (sf) {
+                if (is_signed) {
+                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
+                                         tcg_shift, tcg_fpstatus);
+                } else {
+                    gen_helper_vfp_touqs(tcg_int, tcg_single,
+                                         tcg_shift, tcg_fpstatus);
+                }
+            } else {
+                TCGv_i32 tcg_dest = tcg_temp_new_i32();
+                if (is_signed) {
+                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
+                                         tcg_shift, tcg_fpstatus);
+                } else {
+                    gen_helper_vfp_touls(tcg_dest, tcg_single,
+                                         tcg_shift, tcg_fpstatus);
+                }
+                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
+                tcg_temp_free_i32(tcg_dest);
+            }
+            tcg_temp_free_i32(tcg_single);
+        }
+
+        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+        tcg_temp_free_i32(tcg_rmode);
+
+        if (!sf) {
+            tcg_gen_ext32u_i64(tcg_int, tcg_int);
+        }
+    }
+
+    tcg_temp_free_ptr(tcg_fpstatus);
+    tcg_temp_free_i32(tcg_shift);
+}
+
+/* C3.6.29 Floating point <-> fixed point conversions
+ *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
+ * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
+ * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
+ * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
+ */
+static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int scale = extract32(insn, 10, 6);
+    int opcode = extract32(insn, 16, 3);
+    int rmode = extract32(insn, 19, 2);
+    int type = extract32(insn, 22, 2);
+    bool sbit = extract32(insn, 29, 1);
+    bool sf = extract32(insn, 31, 1);
+    bool itof;
+
+    if (sbit || (type > 1)
+        || (!sf && scale < 32)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch ((rmode << 3) | opcode) {
+    case 0x2: /* SCVTF */
+    case 0x3: /* UCVTF */
+        itof = true;
+        break;
+    case 0x18: /* FCVTZS */
+    case 0x19: /* FCVTZU */
+        itof = false;
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
+}
+
+static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
+{
+    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
+     * without conversion.
+     */
+
+    if (itof) {
+        TCGv_i64 tcg_rn = cpu_reg(s, rn);
+
+        switch (type) {
+        case 0:
+        {
+            /* 32 bit */
+            TCGv_i64 tmp = tcg_temp_new_i64();
+            tcg_gen_ext32u_i64(tmp, tcg_rn);
+            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(rd, MO_64));
+            tcg_gen_movi_i64(tmp, 0);
+            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
+            tcg_temp_free_i64(tmp);
+            break;
+        }
+        case 1:
+        {
+            /* 64 bit */
+            TCGv_i64 tmp = tcg_const_i64(0);
+            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(rd, MO_64));
+            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
+            tcg_temp_free_i64(tmp);
+            break;
+        }
+        case 2:
+            /* 64 bit to top half. */
+            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(rd));
+            break;
+        }
+    } else {
+        TCGv_i64 tcg_rd = cpu_reg(s, rd);
+
+        switch (type) {
+        case 0:
+            /* 32 bit */
+            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_32));
+            break;
+        case 1:
+            /* 64 bit */
+            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_64));
+            break;
+        case 2:
+            /* 64 bits from top half */
+            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(rn));
+            break;
+        }
+    }
+}
+
+/* C3.6.30 Floating point <-> integer conversions
+ *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
+ * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
+ * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
+ * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
+ */
+static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int opcode = extract32(insn, 16, 3);
+    int rmode = extract32(insn, 19, 2);
+    int type = extract32(insn, 22, 2);
+    bool sbit = extract32(insn, 29, 1);
+    bool sf = extract32(insn, 31, 1);
+
+    if (sbit) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (opcode > 5) {
+        /* FMOV */
+        bool itof = opcode & 1;
+
+        if (rmode >= 2) {
+            unallocated_encoding(s);
+            return;
+        }
+
+        switch (sf << 3 | type << 1 | rmode) {
+        case 0x0: /* 32 bit */
+        case 0xa: /* 64 bit */
+        case 0xd: /* 64 bit to top half of quad */
+            break;
+        default:
+            /* all other sf/type/rmode combinations are invalid */
+            unallocated_encoding(s);
+            break;
+        }
+
+        handle_fmov(s, rd, rn, type, itof);
+    } else {
+        /* actual FP conversions */
+        bool itof = extract32(opcode, 1, 1);
+
+        if (type > 1 || (rmode != 0 && opcode > 1)) {
+            unallocated_encoding(s);
+            return;
+        }
+
+        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
+    }
+}
+
+/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
+ *   31  30  29 28     25 24                          0
+ * +---+---+---+---------+-----------------------------+
+ * |   | 0 |   | 1 1 1 1 |                             |
+ * +---+---+---+---------+-----------------------------+
+ */
+static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
+{
+    if (extract32(insn, 24, 1)) {
+        /* Floating point data-processing (3 source) */
+        disas_fp_3src(s, insn);
+    } else if (extract32(insn, 21, 1) == 0) {
+        /* Floating point to fixed point conversions */
+        disas_fp_fixed_conv(s, insn);
+    } else {
+        switch (extract32(insn, 10, 2)) {
+        case 1:
+            /* Floating point conditional compare */
+            disas_fp_ccomp(s, insn);
+            break;
+        case 2:
+            /* Floating point data-processing (2 source) */
+            disas_fp_2src(s, insn);
+            break;
+        case 3:
+            /* Floating point conditional select */
+            disas_fp_csel(s, insn);
+            break;
+        case 0:
+            switch (ctz32(extract32(insn, 12, 4))) {
+            case 0: /* [15:12] == xxx1 */
+                /* Floating point immediate */
+                disas_fp_imm(s, insn);
+                break;
+            case 1: /* [15:12] == xx10 */
+                /* Floating point compare */
+                disas_fp_compare(s, insn);
+                break;
+            case 2: /* [15:12] == x100 */
+                /* Floating point data-processing (1 source) */
+                disas_fp_1src(s, insn);
+                break;
+            case 3: /* [15:12] == 1000 */
+                unallocated_encoding(s);
+                break;
+            default: /* [15:12] == 0000 */
+                /* Floating point <-> integer conversions */
+                disas_fp_int_conv(s, insn);
+                break;
+            }
+            break;
+        }
+    }
+}
+
+static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
+{
+    /* Note that this is called with all non-FP cases from
+     * table C3-6 so it must UNDEF for entries not specifically
+     * allocated to instructions in that table.
+     */
+    unsupported_encoding(s, insn);
+}
+
 /* C3.6 Data processing - SIMD and floating point */
 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
+        disas_data_proc_fp(s, insn);
+    } else {
+        /* SIMD, including crypto */
+        disas_data_proc_simd(s, insn);
+    }
 }
 
 /* C3.1 A64 instruction index by encoding */
@@ -1661,6 +4323,8 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
     dc->vfp_enabled = 0;
     dc->vec_len = 0;
     dc->vec_stride = 0;
+    dc->cp_regs = cpu->cp_regs;
+    dc->current_pl = arm_current_pl(env);
 
     init_tmp_a64_array(dc);
 
@@ -1750,8 +4414,10 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
             gen_goto_tb(dc, 1, dc->pc);
             break;
         default:
-        case DISAS_JUMP:
         case DISAS_UPDATE:
+            gen_a64_set_pc_im(dc->pc);
+            /* fall through */
+        case DISAS_JUMP:
             /* indicate that the hash table must be used to find the next TB */
             tcg_gen_exit_tb(0);
             break;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 1403ecf216..8d240e160d 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -61,11 +61,10 @@ TCGv_ptr cpu_env;
 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 static TCGv_i32 cpu_R[16];
 static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
-static TCGv_i32 cpu_exclusive_addr;
-static TCGv_i32 cpu_exclusive_val;
-static TCGv_i32 cpu_exclusive_high;
+static TCGv_i64 cpu_exclusive_addr;
+static TCGv_i64 cpu_exclusive_val;
 #ifdef CONFIG_USER_ONLY
-static TCGv_i32 cpu_exclusive_test;
+static TCGv_i64 cpu_exclusive_test;
 static TCGv_i32 cpu_exclusive_info;
 #endif
 
@@ -96,14 +95,12 @@ void arm_translate_init(void)
     cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
     cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
 
-    cpu_exclusive_addr = tcg_global_mem_new_i32(TCG_AREG0,
+    cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
-    cpu_exclusive_val = tcg_global_mem_new_i32(TCG_AREG0,
+    cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
         offsetof(CPUARMState, exclusive_val), "exclusive_val");
-    cpu_exclusive_high = tcg_global_mem_new_i32(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 #ifdef CONFIG_USER_ONLY
-    cpu_exclusive_test = tcg_global_mem_new_i32(TCG_AREG0,
+    cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
         offsetof(CPUARMState, exclusive_test), "exclusive_test");
     cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
         offsetof(CPUARMState, exclusive_info), "exclusive_info");
@@ -1101,27 +1098,29 @@ VFP_GEN_FTOI(tosi)
 VFP_GEN_FTOI(tosiz)
 #undef VFP_GEN_FTOI
 
-#define VFP_GEN_FIX(name) \
+#define VFP_GEN_FIX(name, round) \
 static inline void gen_vfp_##name(int dp, int shift, int neon) \
 { \
     TCGv_i32 tmp_shift = tcg_const_i32(shift); \
     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
     if (dp) { \
-        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, statusptr); \
+        gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
+                                        statusptr); \
     } else { \
-        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, statusptr); \
+        gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
+                                        statusptr); \
     } \
     tcg_temp_free_i32(tmp_shift); \
     tcg_temp_free_ptr(statusptr); \
 }
-VFP_GEN_FIX(tosh)
-VFP_GEN_FIX(tosl)
-VFP_GEN_FIX(touh)
-VFP_GEN_FIX(toul)
-VFP_GEN_FIX(shto)
-VFP_GEN_FIX(slto)
-VFP_GEN_FIX(uhto)
-VFP_GEN_FIX(ulto)
+VFP_GEN_FIX(tosh, _round_to_zero)
+VFP_GEN_FIX(tosl, _round_to_zero)
+VFP_GEN_FIX(touh, _round_to_zero)
+VFP_GEN_FIX(toul, _round_to_zero)
+VFP_GEN_FIX(shto, )
+VFP_GEN_FIX(slto, )
+VFP_GEN_FIX(uhto, )
+VFP_GEN_FIX(ulto, )
 #undef VFP_GEN_FIX
 
 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
@@ -2728,9 +2727,9 @@ static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
         tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
         tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
         if (vmin) {
-            gen_helper_vfp_minnmd(dest, frn, frm, fpst);
+            gen_helper_vfp_minnumd(dest, frn, frm, fpst);
         } else {
-            gen_helper_vfp_maxnmd(dest, frn, frm, fpst);
+            gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
         }
         tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
         tcg_temp_free_i64(frn);
@@ -2746,9 +2745,9 @@ static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
         tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
         tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
         if (vmin) {
-            gen_helper_vfp_minnms(dest, frn, frm, fpst);
+            gen_helper_vfp_minnums(dest, frn, frm, fpst);
         } else {
-            gen_helper_vfp_maxnms(dest, frn, frm, fpst);
+            gen_helper_vfp_maxnums(dest, frn, frm, fpst);
         }
         tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
         tcg_temp_free_i32(frn);
@@ -5124,9 +5123,9 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
         {
             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
             if (size == 0) {
-                gen_helper_neon_max_f32(tmp, tmp, tmp2, fpstatus);
+                gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
             } else {
-                gen_helper_neon_min_f32(tmp, tmp, tmp2, fpstatus);
+                gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
             }
             tcg_temp_free_ptr(fpstatus);
             break;
@@ -5136,9 +5135,9 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                 /* VMAXNM/VMINNM */
                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
                 if (size == 0) {
-                    gen_helper_vfp_maxnms(tmp, tmp, tmp2, fpstatus);
+                    gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
                 } else {
-                    gen_helper_vfp_minnms(tmp, tmp, tmp2, fpstatus);
+                    gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
                 }
                 tcg_temp_free_ptr(fpstatus);
             } else {
@@ -6498,7 +6497,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
 {
     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
     const ARMCPRegInfo *ri;
-    ARMCPU *cpu = arm_env_get_cpu(env);
 
     cpnum = (insn >> 8) & 0xf;
     if (arm_feature(env, ARM_FEATURE_XSCALE)
@@ -6541,11 +6539,11 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
     isread = (insn >> 20) & 1;
     rt = (insn >> 12) & 0xf;
 
-    ri = get_arm_cp_reginfo(cpu,
+    ri = get_arm_cp_reginfo(s->cp_regs,
                             ENCODE_CP_REG(cpnum, is64, crn, crm, opc1, opc2));
     if (ri) {
         /* Check access permissions */
-        if (!cp_access_ok(env, ri, isread)) {
+        if (!cp_access_ok(s->current_pl, ri, isread)) {
             return 1;
         }
 
@@ -6759,30 +6757,34 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
     default:
         abort();
     }
-    tcg_gen_mov_i32(cpu_exclusive_val, tmp);
-    store_reg(s, rt, tmp);
+
     if (size == 3) {
         TCGv_i32 tmp2 = tcg_temp_new_i32();
+        TCGv_i32 tmp3 = tcg_temp_new_i32();
+
         tcg_gen_addi_i32(tmp2, addr, 4);
-        tmp = tcg_temp_new_i32();
-        gen_aa32_ld32u(tmp, tmp2, IS_USER(s));
+        gen_aa32_ld32u(tmp3, tmp2, IS_USER(s));
         tcg_temp_free_i32(tmp2);
-        tcg_gen_mov_i32(cpu_exclusive_high, tmp);
-        store_reg(s, rt2, tmp);
+        tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
+        store_reg(s, rt2, tmp3);
+    } else {
+        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
     }
-    tcg_gen_mov_i32(cpu_exclusive_addr, addr);
+
+    store_reg(s, rt, tmp);
+    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
 }
 
 static void gen_clrex(DisasContext *s)
 {
-    tcg_gen_movi_i32(cpu_exclusive_addr, -1);
+    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
 }
 
 #ifdef CONFIG_USER_ONLY
 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
                                 TCGv_i32 addr, int size)
 {
-    tcg_gen_mov_i32(cpu_exclusive_test, addr);
+    tcg_gen_extu_i32_i64(cpu_exclusive_test, addr);
     tcg_gen_movi_i32(cpu_exclusive_info,
                      size | (rd << 4) | (rt << 8) | (rt2 << 12));
     gen_exception_insn(s, 4, EXCP_STREX);
@@ -6792,6 +6794,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
                                 TCGv_i32 addr, int size)
 {
     TCGv_i32 tmp;
+    TCGv_i64 val64, extaddr;
     int done_label;
     int fail_label;
 
@@ -6803,7 +6806,11 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
        } */
     fail_label = gen_new_label();
     done_label = gen_new_label();
-    tcg_gen_brcond_i32(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
+    extaddr = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(extaddr, addr);
+    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
+    tcg_temp_free_i64(extaddr);
+
     tmp = tcg_temp_new_i32();
     switch (size) {
     case 0:
@@ -6819,17 +6826,24 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     default:
         abort();
     }
-    tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
-    tcg_temp_free_i32(tmp);
+
+    val64 = tcg_temp_new_i64();
     if (size == 3) {
         TCGv_i32 tmp2 = tcg_temp_new_i32();
+        TCGv_i32 tmp3 = tcg_temp_new_i32();
         tcg_gen_addi_i32(tmp2, addr, 4);
-        tmp = tcg_temp_new_i32();
-        gen_aa32_ld32u(tmp, tmp2, IS_USER(s));
+        gen_aa32_ld32u(tmp3, tmp2, IS_USER(s));
         tcg_temp_free_i32(tmp2);
-        tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_high, fail_label);
-        tcg_temp_free_i32(tmp);
+        tcg_gen_concat_i32_i64(val64, tmp, tmp3);
+        tcg_temp_free_i32(tmp3);
+    } else {
+        tcg_gen_extu_i32_i64(val64, tmp);
     }
+    tcg_temp_free_i32(tmp);
+
+    tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
+    tcg_temp_free_i64(val64);
+
     tmp = load_reg(s, rt);
     switch (size) {
     case 0:
@@ -6857,7 +6871,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     gen_set_label(fail_label);
     tcg_gen_movi_i32(cpu_R[rd], 1);
     gen_set_label(done_label);
-    tcg_gen_movi_i32(cpu_exclusive_addr, -1);
+    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
 }
 #endif
 
@@ -10269,6 +10283,8 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
     dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
     dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
+    dc->cp_regs = cpu->cp_regs;
+    dc->current_pl = arm_current_pl(env);
 
     cpu_F0s = tcg_temp_new_i32();
     cpu_F1s = tcg_temp_new_i32();
diff --git a/target-arm/translate.h b/target-arm/translate.h
index a6f6b3e699..67da6996c9 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -24,6 +24,8 @@ typedef struct DisasContext {
     int vec_len;
     int vec_stride;
     int aarch64;
+    int current_pl;
+    GHashTable *cp_regs;
 #define TMP_A64_MAX 16
     int tmp_a64_count;
     TCGv_i64 tmp_a64[TMP_A64_MAX];
diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
index f4fab155bd..d1751a40c6 100644
--- a/target-i386/cpu-qom.h
+++ b/target-i386/cpu-qom.h
@@ -69,6 +69,8 @@ typedef struct X86CPU {
     bool hyperv_vapic;
     bool hyperv_relaxed_timing;
     int hyperv_spinlock_attempts;
+    bool check_cpuid;
+    bool enforce_cpuid;
 
     /* if true the CPUID code directly forward host cache leaves to the guest */
     bool cache_info_passthrough;
@@ -82,6 +84,10 @@ typedef struct X86CPU {
      * capabilities) directly to the guest.
      */
     bool enable_pmu;
+
+    /* in order to simplify APIC support, we leave this pointer to the
+       user */
+    struct DeviceState *apic_state;
 } X86CPU;
 
 static inline X86CPU *x86_env_get_cpu(CPUX86State *env)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 6b7b1a9e23..0eea8c7160 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -354,9 +354,6 @@ typedef struct model_features_t {
     FeatureWord feat_word;
 } model_features_t;
 
-int check_cpuid = 0;
-int enforce_cpuid = 0;
-
 static uint32_t kvm_default_features = (1 << KVM_FEATURE_CLOCKSOURCE) |
         (1 << KVM_FEATURE_NOP_IO_DELAY) |
         (1 << KVM_FEATURE_CLOCKSOURCE2) |
@@ -1596,6 +1593,46 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, void *opaque,
     error_propagate(errp, err);
 }
 
+static void x86_get_hv_spinlocks(Object *obj, Visitor *v, void *opaque,
+                                 const char *name, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+    int64_t value = cpu->hyperv_spinlock_attempts;
+
+    visit_type_int(v, &value, name, errp);
+}
+
+static void x86_set_hv_spinlocks(Object *obj, Visitor *v, void *opaque,
+                                 const char *name, Error **errp)
+{
+    const int64_t min = 0xFFF;
+    const int64_t max = UINT_MAX;
+    X86CPU *cpu = X86_CPU(obj);
+    Error *err = NULL;
+    int64_t value;
+
+    visit_type_int(v, &value, name, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    if (value < min || value > max) {
+        error_setg(errp, "Property %s.%s doesn't take value %" PRId64
+                  " (minimum: %" PRId64 ", maximum: %" PRId64 ")",
+                  object_get_typename(obj), name ? name : "null",
+                  value, min, max);
+        return;
+    }
+    cpu->hyperv_spinlock_attempts = value;
+}
+
+static PropertyInfo qdev_prop_spinlocks = {
+    .name  = "int",
+    .get   = x86_get_hv_spinlocks,
+    .set   = x86_set_hv_spinlocks,
+};
+
 static int cpu_x86_find_by_name(X86CPU *cpu, x86_def_t *x86_cpu_def,
                                 const char *name)
 {
@@ -1667,15 +1704,7 @@ static void cpu_x86_parse_featurestr(X86CPU *cpu, char *features, Error **errp)
         } else if ((val = strchr(featurestr, '='))) {
             *val = 0; val++;
             feat2prop(featurestr);
-            if (!strcmp(featurestr, "family")) {
-                object_property_parse(OBJECT(cpu), val, featurestr, errp);
-            } else if (!strcmp(featurestr, "model")) {
-                object_property_parse(OBJECT(cpu), val, featurestr, errp);
-            } else if (!strcmp(featurestr, "stepping")) {
-                object_property_parse(OBJECT(cpu), val, featurestr, errp);
-            } else if (!strcmp(featurestr, "level")) {
-                object_property_parse(OBJECT(cpu), val, featurestr, errp);
-            } else if (!strcmp(featurestr, "xlevel")) {
+            if (!strcmp(featurestr, "xlevel")) {
                 char *err;
                 char num[32];
 
@@ -1691,10 +1720,6 @@ static void cpu_x86_parse_featurestr(X86CPU *cpu, char *features, Error **errp)
                 }
                 snprintf(num, sizeof(num), "%" PRIu32, numvalue);
                 object_property_parse(OBJECT(cpu), num, featurestr, errp);
-            } else if (!strcmp(featurestr, "vendor")) {
-                object_property_parse(OBJECT(cpu), val, featurestr, errp);
-            } else if (!strcmp(featurestr, "model-id")) {
-                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "tsc-freq")) {
                 int64_t tsc_freq;
                 char *err;
@@ -1711,6 +1736,7 @@ static void cpu_x86_parse_featurestr(X86CPU *cpu, char *features, Error **errp)
             } else if (!strcmp(featurestr, "hv-spinlocks")) {
                 char *err;
                 const int min = 0xFFF;
+                char num[32];
                 numvalue = strtoul(val, &err, 0);
                 if (!*val || *err) {
                     error_setg(errp, "bad numerical value %s", val);
@@ -1722,23 +1748,14 @@ static void cpu_x86_parse_featurestr(X86CPU *cpu, char *features, Error **errp)
                             min);
                     numvalue = min;
                 }
-                cpu->hyperv_spinlock_attempts = numvalue;
+                snprintf(num, sizeof(num), "%" PRId32, numvalue);
+                object_property_parse(OBJECT(cpu), num, featurestr, errp);
             } else {
-                error_setg(errp, "unrecognized feature %s", featurestr);
-                goto out;
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             }
-        } else if (!strcmp(featurestr, "check")) {
-            check_cpuid = 1;
-        } else if (!strcmp(featurestr, "enforce")) {
-            check_cpuid = enforce_cpuid = 1;
-        } else if (!strcmp(featurestr, "hv_relaxed")) {
-            cpu->hyperv_relaxed_timing = true;
-        } else if (!strcmp(featurestr, "hv_vapic")) {
-            cpu->hyperv_vapic = true;
         } else {
-            error_setg(errp, "feature string `%s' not in format (+feature|"
-                       "-feature|feature=xyz)", featurestr);
-            goto out;
+            feat2prop(featurestr);
+            object_property_parse(OBJECT(cpu), "on", featurestr, errp);
         }
         if (error_is_set(errp)) {
             goto out;
@@ -2447,7 +2464,7 @@ static void x86_cpu_reset(CPUState *s)
 #if !defined(CONFIG_USER_ONLY)
     /* We hard-wire the BSP to the first CPU. */
     if (s->cpu_index == 0) {
-        apic_designate_bsp(env->apic_state);
+        apic_designate_bsp(cpu->apic_state);
     }
 
     s->halted = !cpu_is_bsp(cpu);
@@ -2457,7 +2474,7 @@ static void x86_cpu_reset(CPUState *s)
 #ifndef CONFIG_USER_ONLY
 bool cpu_is_bsp(X86CPU *cpu)
 {
-    return cpu_get_apic_base(cpu->env.apic_state) & MSR_IA32_APICBASE_BSP;
+    return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
 }
 
 /* TODO: remove me, when reset over QOM tree is implemented */
@@ -2498,31 +2515,29 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
         apic_type = "xen-apic";
     }
 
-    env->apic_state = qdev_try_create(qdev_get_parent_bus(dev), apic_type);
-    if (env->apic_state == NULL) {
+    cpu->apic_state = qdev_try_create(qdev_get_parent_bus(dev), apic_type);
+    if (cpu->apic_state == NULL) {
         error_setg(errp, "APIC device '%s' could not be created", apic_type);
         return;
     }
 
     object_property_add_child(OBJECT(cpu), "apic",
-                              OBJECT(env->apic_state), NULL);
-    qdev_prop_set_uint8(env->apic_state, "id", env->cpuid_apic_id);
+                              OBJECT(cpu->apic_state), NULL);
+    qdev_prop_set_uint8(cpu->apic_state, "id", env->cpuid_apic_id);
     /* TODO: convert to link<> */
-    apic = APIC_COMMON(env->apic_state);
+    apic = APIC_COMMON(cpu->apic_state);
     apic->cpu = cpu;
 }
 
 static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp)
 {
-    CPUX86State *env = &cpu->env;
-
-    if (env->apic_state == NULL) {
+    if (cpu->apic_state == NULL) {
         return;
     }
 
-    if (qdev_init(env->apic_state)) {
+    if (qdev_init(cpu->apic_state)) {
         error_setg(errp, "APIC device '%s' could not be initialized",
-                   object_get_typename(OBJECT(env->apic_state)));
+                   object_get_typename(OBJECT(cpu->apic_state)));
         return;
     }
 }
@@ -2566,8 +2581,8 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
         env->features[FEAT_8000_0001_ECX] &= TCG_EXT3_FEATURES;
         env->features[FEAT_SVM] &= TCG_SVM_FEATURES;
     } else {
-        if (check_cpuid && kvm_check_features_against_host(cpu)
-            && enforce_cpuid) {
+        if ((cpu->check_cpuid || cpu->enforce_cpuid)
+            && kvm_check_features_against_host(cpu) && cpu->enforce_cpuid) {
             error_setg(&local_err,
                        "Host's CPU doesn't support requested features");
             goto out;
@@ -2726,6 +2741,11 @@ static void x86_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
 
 static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false),
+    { .name  = "hv-spinlocks", .info  = &qdev_prop_spinlocks },
+    DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false),
+    DEFINE_PROP_BOOL("hv-vapic", X86CPU, hyperv_vapic, false),
+    DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false),
+    DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index ea373e82dc..1d94a9dbd7 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -895,10 +895,6 @@ typedef struct CPUX86State {
     int tsc_khz;
     void *kvm_xsave_buf;
 
-    /* in order to simplify APIC support, we leave this pointer to the
-       user */
-    struct DeviceState *apic_state;
-
     uint64_t mcg_cap;
     uint64_t mcg_ctl;
     uint64_t mce_banks[MCE_BANKS_DEF*4];
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 7c196ffc42..fe613b26e1 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -531,6 +531,12 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
 
     if (!(env->cr[0] & CR0_PG_MASK)) {
         pte = addr;
+#ifdef TARGET_X86_64
+        if (!(env->hflags & HF_LMA_MASK)) {
+            /* Without long mode we can only address 32bits in real mode */
+            pte = (uint32_t)pte;
+        }
+#endif
         virt_addr = addr & TARGET_PAGE_MASK;
         prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
         page_size = 4096;
@@ -1241,14 +1247,16 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
 
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access)
 {
+    X86CPU *cpu = x86_env_get_cpu(env);
+
     if (kvm_enabled()) {
         env->tpr_access_type = access;
 
-        cpu_interrupt(CPU(x86_env_get_cpu(env)), CPU_INTERRUPT_TPR);
+        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_TPR);
     } else {
         cpu_restore_state(env, env->mem_io_pc);
 
-        apic_handle_tpr_access_report(env->apic_state, env->eip, access);
+        apic_handle_tpr_access_report(cpu->apic_state, env->eip, access);
     }
 }
 #endif /* !CONFIG_USER_ONLY */
@@ -1295,14 +1303,12 @@ void do_cpu_init(X86CPU *cpu)
     cpu_reset(cs);
     cs->interrupt_request = sipi;
     env->pat = pat;
-    apic_init_reset(env->apic_state);
+    apic_init_reset(cpu->apic_state);
 }
 
 void do_cpu_sipi(X86CPU *cpu)
 {
-    CPUX86State *env = &cpu->env;
-
-    apic_sipi(env->apic_state);
+    apic_sipi(cpu->apic_state);
 }
 #else
 void do_cpu_init(X86CPU *cpu)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 1188482359..7522e98072 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1069,8 +1069,8 @@ static int kvm_put_sregs(X86CPU *cpu)
     sregs.cr3 = env->cr[3];
     sregs.cr4 = env->cr[4];
 
-    sregs.cr8 = cpu_get_apic_tpr(env->apic_state);
-    sregs.apic_base = cpu_get_apic_base(env->apic_state);
+    sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state);
+    sregs.apic_base = cpu_get_apic_base(cpu->apic_state);
 
     sregs.efer = env->efer;
 
@@ -1619,8 +1619,7 @@ static int kvm_get_mp_state(X86CPU *cpu)
 
 static int kvm_get_apic(X86CPU *cpu)
 {
-    CPUX86State *env = &cpu->env;
-    DeviceState *apic = env->apic_state;
+    DeviceState *apic = cpu->apic_state;
     struct kvm_lapic_state kapic;
     int ret;
 
@@ -1637,8 +1636,7 @@ static int kvm_get_apic(X86CPU *cpu)
 
 static int kvm_put_apic(X86CPU *cpu)
 {
-    CPUX86State *env = &cpu->env;
-    DeviceState *apic = env->apic_state;
+    DeviceState *apic = cpu->apic_state;
     struct kvm_lapic_state kapic;
 
     if (apic && kvm_irqchip_in_kernel()) {
@@ -1962,7 +1960,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
         }
 
         DPRINTF("setting tpr\n");
-        run->cr8 = cpu_get_apic_tpr(env->apic_state);
+        run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state);
     }
 }
 
@@ -1976,8 +1974,8 @@ void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
     } else {
         env->eflags &= ~IF_MASK;
     }
-    cpu_set_apic_tpr(env->apic_state, run->cr8);
-    cpu_set_apic_base(env->apic_state, run->apic_base);
+    cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8);
+    cpu_set_apic_base(x86_cpu->apic_state, run->apic_base);
 }
 
 int kvm_arch_process_async_events(CPUState *cs)
@@ -2014,7 +2012,7 @@ int kvm_arch_process_async_events(CPUState *cs)
 
     if (cs->interrupt_request & CPU_INTERRUPT_POLL) {
         cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
-        apic_poll_irq(env->apic_state);
+        apic_poll_irq(cpu->apic_state);
     }
     if (((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
          (env->eflags & IF_MASK)) ||
@@ -2032,7 +2030,7 @@ int kvm_arch_process_async_events(CPUState *cs)
     if (cs->interrupt_request & CPU_INTERRUPT_TPR) {
         cs->interrupt_request &= ~CPU_INTERRUPT_TPR;
         kvm_cpu_synchronize_state(cs);
-        apic_handle_tpr_access_report(env->apic_state, env->eip,
+        apic_handle_tpr_access_report(cpu->apic_state, env->eip,
                                       env->tpr_access_type);
     }
 
@@ -2056,11 +2054,10 @@ static int kvm_handle_halt(X86CPU *cpu)
 
 static int kvm_handle_tpr_access(X86CPU *cpu)
 {
-    CPUX86State *env = &cpu->env;
     CPUState *cs = CPU(cpu);
     struct kvm_run *run = cs->kvm_run;
 
-    apic_handle_tpr_access_report(env->apic_state, run->tpr_access.rip,
+    apic_handle_tpr_access_report(cpu->apic_state, run->tpr_access.rip,
                                   run->tpr_access.is_write ? TPR_ACCESS_WRITE
                                                            : TPR_ACCESS_READ);
     return 1;
diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c
index b6307ca386..47f6a2f7c1 100644
--- a/target-i386/misc_helper.c
+++ b/target-i386/misc_helper.c
@@ -155,7 +155,7 @@ target_ulong helper_read_crN(CPUX86State *env, int reg)
         break;
     case 8:
         if (!(env->hflags2 & HF2_VINTR_MASK)) {
-            val = cpu_get_apic_tpr(env->apic_state);
+            val = cpu_get_apic_tpr(x86_env_get_cpu(env)->apic_state);
         } else {
             val = env->v_tpr;
         }
@@ -179,7 +179,7 @@ void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
         break;
     case 8:
         if (!(env->hflags2 & HF2_VINTR_MASK)) {
-            cpu_set_apic_tpr(env->apic_state, t0);
+            cpu_set_apic_tpr(x86_env_get_cpu(env)->apic_state, t0);
         }
         env->v_tpr = t0 & 0x0f;
         break;
@@ -286,7 +286,7 @@ void helper_wrmsr(CPUX86State *env)
         env->sysenter_eip = val;
         break;
     case MSR_IA32_APICBASE:
-        cpu_set_apic_base(env->apic_state, val);
+        cpu_set_apic_base(x86_env_get_cpu(env)->apic_state, val);
         break;
     case MSR_EFER:
         {
@@ -437,7 +437,7 @@ void helper_rdmsr(CPUX86State *env)
         val = env->sysenter_eip;
         break;
     case MSR_IA32_APICBASE:
-        val = cpu_get_apic_base(env->apic_state);
+        val = cpu_get_apic_base(x86_env_get_cpu(env)->apic_state);
         break;
     case MSR_EFER:
         val = env->efer;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 7916e5b1f6..b0f227915a 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -85,7 +85,8 @@ typedef struct DisasContext {
     /* current insn context */
     int override; /* -1 if no override */
     int prefix;
-    int aflag, dflag;
+    TCGMemOp aflag;
+    TCGMemOp dflag;
     target_ulong pc; /* pc = eip + cs_base */
     int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
                    static state change (stop translation) */
@@ -126,7 +127,7 @@ typedef struct DisasContext {
 static void gen_eob(DisasContext *s);
 static void gen_jmp(DisasContext *s, target_ulong eip);
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
-static void gen_op(DisasContext *s1, int op, int ot, int d);
+static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 
 /* i386 arith/logic operations */
 enum {
@@ -163,14 +164,6 @@ enum {
     JCC_LE,
 };
 
-/* operand size */
-enum {
-    OT_BYTE = 0,
-    OT_WORD,
-    OT_LONG,
-    OT_QUAD,
-};
-
 enum {
     /* I386 int registers */
     OR_EAX,   /* MUST be even numbered */
@@ -260,73 +253,6 @@ static void gen_update_cc_op(DisasContext *s)
     }
 }
 
-static inline void gen_op_movl_T0_0(void)
-{
-    tcg_gen_movi_tl(cpu_T[0], 0);
-}
-
-static inline void gen_op_movl_T0_im(int32_t val)
-{
-    tcg_gen_movi_tl(cpu_T[0], val);
-}
-
-static inline void gen_op_movl_T0_imu(uint32_t val)
-{
-    tcg_gen_movi_tl(cpu_T[0], val);
-}
-
-static inline void gen_op_movl_T1_im(int32_t val)
-{
-    tcg_gen_movi_tl(cpu_T[1], val);
-}
-
-static inline void gen_op_movl_T1_imu(uint32_t val)
-{
-    tcg_gen_movi_tl(cpu_T[1], val);
-}
-
-static inline void gen_op_movl_A0_im(uint32_t val)
-{
-    tcg_gen_movi_tl(cpu_A0, val);
-}
-
-#ifdef TARGET_X86_64
-static inline void gen_op_movq_A0_im(int64_t val)
-{
-    tcg_gen_movi_tl(cpu_A0, val);
-}
-#endif
-
-static inline void gen_movtl_T0_im(target_ulong val)
-{
-    tcg_gen_movi_tl(cpu_T[0], val);
-}
-
-static inline void gen_movtl_T1_im(target_ulong val)
-{
-    tcg_gen_movi_tl(cpu_T[1], val);
-}
-
-static inline void gen_op_andl_T0_ffff(void)
-{
-    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
-}
-
-static inline void gen_op_andl_T0_im(uint32_t val)
-{
-    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val);
-}
-
-static inline void gen_op_movl_T0_T1(void)
-{
-    tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
-}
-
-static inline void gen_op_andl_A0_ffff(void)
-{
-    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff);
-}
-
 #ifdef TARGET_X86_64
 
 #define NB_OP_SIZES 4
@@ -370,66 +296,71 @@ static inline bool byte_reg_is_xH(int reg)
     return true;
 }
 
-static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
+/* Select the size of a push/pop operation.  */
+static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
+{
+    if (CODE64(s)) {
+        return ot == MO_16 ? MO_16 : MO_64;
+    } else {
+        return ot;
+    }
+}
+
+/* Select only size 64 else 32.  Used for SSE operand sizes.  */
+static inline TCGMemOp mo_64_32(TCGMemOp ot)
 {
-    switch(ot) {
-    case OT_BYTE:
-        if (!byte_reg_is_xH(reg)) {
-            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
-        } else {
-            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
-        }
-        break;
-    case OT_WORD:
-        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
-        break;
-    default: /* XXX this shouldn't be reached;  abort? */
-    case OT_LONG:
-        /* For x86_64, this sets the higher half of register to zero.
-           For i386, this is equivalent to a mov. */
-        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
-        break;
 #ifdef TARGET_X86_64
-    case OT_QUAD:
-        tcg_gen_mov_tl(cpu_regs[reg], t0);
-        break;
+    return ot == MO_64 ? MO_64 : MO_32;
+#else
+    return MO_32;
 #endif
-    }
 }
 
-static inline void gen_op_mov_reg_T0(int ot, int reg)
+/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
+   byte vs word opcodes.  */
+static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 {
-    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+    return b & 1 ? ot : MO_8;
 }
 
-static inline void gen_op_mov_reg_T1(int ot, int reg)
+/* Select size 8 if lsb of B is clear, else OT capped at 32.
+   Used for decoding operand size of port opcodes.  */
+static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 {
-    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 }
 
-static inline void gen_op_mov_reg_A0(int size, int reg)
+static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
 {
-    switch(size) {
-    case OT_BYTE:
-        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_A0, 0, 16);
+    switch(ot) {
+    case MO_8:
+        if (!byte_reg_is_xH(reg)) {
+            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
+        } else {
+            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
+        }
         break;
-    default: /* XXX this shouldn't be reached;  abort? */
-    case OT_WORD:
+    case MO_16:
+        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
+        break;
+    case MO_32:
         /* For x86_64, this sets the higher half of register to zero.
            For i386, this is equivalent to a mov. */
-        tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
+        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
         break;
 #ifdef TARGET_X86_64
-    case OT_LONG:
-        tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
+    case MO_64:
+        tcg_gen_mov_tl(cpu_regs[reg], t0);
         break;
 #endif
+    default:
+        tcg_abort();
     }
 }
 
-static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
+static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 {
-    if (ot == OT_BYTE && byte_reg_is_xH(reg)) {
+    if (ot == MO_8 && byte_reg_is_xH(reg)) {
         tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
         tcg_gen_ext8u_tl(t0, t0);
     } else {
@@ -437,11 +368,6 @@ static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
     }
 }
 
-static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
-{
-    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
-}
-
 static inline void gen_op_movl_A0_reg(int reg)
 {
     tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
@@ -472,58 +398,21 @@ static void gen_add_A0_im(DisasContext *s, int val)
         gen_op_addl_A0_im(val);
 }
 
-static inline void gen_op_addl_T0_T1(void)
+static inline void gen_op_jmp_v(TCGv dest)
 {
-    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 }
 
-static inline void gen_op_jmp_T0(void)
+static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
 {
-    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, eip));
+    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
+    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 }
 
-static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
+static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
 {
-    switch(size) {
-    case OT_BYTE:
-        tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
-        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
-        break;
-    case OT_WORD:
-        tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
-        /* For x86_64, this sets the higher half of register to zero.
-           For i386, this is equivalent to a nop. */
-        tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0);
-        tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
-        break;
-#ifdef TARGET_X86_64
-    case OT_LONG:
-        tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
-        break;
-#endif
-    }
-}
-
-static inline void gen_op_add_reg_T0(int size, int reg)
-{
-    switch(size) {
-    case OT_BYTE:
-        tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
-        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
-        break;
-    case OT_WORD:
-        tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
-        /* For x86_64, this sets the higher half of register to zero.
-           For i386, this is equivalent to a nop. */
-        tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0);
-        tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
-        break;
-#ifdef TARGET_X86_64
-    case OT_LONG:
-        tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
-        break;
-#endif
-    }
+    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
+    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 }
 
 static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
@@ -584,99 +473,29 @@ static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
 }
 #endif
 
-static inline void gen_op_lds_T0_A0(int idx)
+static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 {
-    int mem_index = (idx >> 2) - 1;
-    switch(idx & 3) {
-    case OT_BYTE:
-        tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
-        break;
-    case OT_WORD:
-        tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
-        break;
-    default:
-    case OT_LONG:
-        tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
-        break;
-    }
+    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 }
 
-static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
+static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 {
-    int mem_index = (idx >> 2) - 1;
-    switch(idx & 3) {
-    case OT_BYTE:
-        tcg_gen_qemu_ld8u(t0, a0, mem_index);
-        break;
-    case OT_WORD:
-        tcg_gen_qemu_ld16u(t0, a0, mem_index);
-        break;
-    case OT_LONG:
-        tcg_gen_qemu_ld32u(t0, a0, mem_index);
-        break;
-    default:
-    case OT_QUAD:
-        /* Should never happen on 32-bit targets.  */
-#ifdef TARGET_X86_64
-        tcg_gen_qemu_ld64(t0, a0, mem_index);
-#endif
-        break;
-    }
+    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 }
 
-/* XXX: always use ldu or lds */
-static inline void gen_op_ld_T0_A0(int idx)
+static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 {
-    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
-}
-
-static inline void gen_op_ldu_T0_A0(int idx)
-{
-    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
-}
-
-static inline void gen_op_ld_T1_A0(int idx)
-{
-    gen_op_ld_v(idx, cpu_T[1], cpu_A0);
-}
-
-static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
-{
-    int mem_index = (idx >> 2) - 1;
-    switch(idx & 3) {
-    case OT_BYTE:
-        tcg_gen_qemu_st8(t0, a0, mem_index);
-        break;
-    case OT_WORD:
-        tcg_gen_qemu_st16(t0, a0, mem_index);
-        break;
-    case OT_LONG:
-        tcg_gen_qemu_st32(t0, a0, mem_index);
-        break;
-    default:
-    case OT_QUAD:
-        /* Should never happen on 32-bit targets.  */
-#ifdef TARGET_X86_64
-        tcg_gen_qemu_st64(t0, a0, mem_index);
-#endif
-        break;
+    if (d == OR_TMP0) {
+        gen_op_st_v(s, idx, cpu_T[0], cpu_A0);
+    } else {
+        gen_op_mov_reg_v(idx, d, cpu_T[0]);
     }
 }
 
-static inline void gen_op_st_T0_A0(int idx)
-{
-    gen_op_st_v(idx, cpu_T[0], cpu_A0);
-}
-
-static inline void gen_op_st_T1_A0(int idx)
-{
-    gen_op_st_v(idx, cpu_T[1], cpu_A0);
-}
-
 static inline void gen_jmp_im(target_ulong pc)
 {
     tcg_gen_movi_tl(cpu_tmp0, pc);
-    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, eip));
+    gen_op_jmp_v(cpu_tmp0);
 }
 
 static inline void gen_string_movl_A0_ESI(DisasContext *s)
@@ -684,17 +503,18 @@ static inline void gen_string_movl_A0_ESI(DisasContext *s)
     int override;
 
     override = s->override;
+    switch (s->aflag) {
 #ifdef TARGET_X86_64
-    if (s->aflag == 2) {
+    case MO_64:
         if (override >= 0) {
             gen_op_movq_A0_seg(override);
             gen_op_addq_A0_reg_sN(0, R_ESI);
         } else {
             gen_op_movq_A0_reg(R_ESI);
         }
-    } else
+        break;
 #endif
-    if (s->aflag) {
+    case MO_32:
         /* 32 bit address */
         if (s->addseg && override < 0)
             override = R_DS;
@@ -704,54 +524,61 @@ static inline void gen_string_movl_A0_ESI(DisasContext *s)
         } else {
             gen_op_movl_A0_reg(R_ESI);
         }
-    } else {
+        break;
+    case MO_16:
         /* 16 address, always override */
         if (override < 0)
             override = R_DS;
-        gen_op_movl_A0_reg(R_ESI);
-        gen_op_andl_A0_ffff();
+        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_ESI]);
         gen_op_addl_A0_seg(s, override);
+        break;
+    default:
+        tcg_abort();
     }
 }
 
 static inline void gen_string_movl_A0_EDI(DisasContext *s)
 {
+    switch (s->aflag) {
 #ifdef TARGET_X86_64
-    if (s->aflag == 2) {
+    case MO_64:
         gen_op_movq_A0_reg(R_EDI);
-    } else
+        break;
 #endif
-    if (s->aflag) {
+    case MO_32:
         if (s->addseg) {
             gen_op_movl_A0_seg(R_ES);
             gen_op_addl_A0_reg_sN(0, R_EDI);
         } else {
             gen_op_movl_A0_reg(R_EDI);
         }
-    } else {
-        gen_op_movl_A0_reg(R_EDI);
-        gen_op_andl_A0_ffff();
+        break;
+    case MO_16:
+        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_EDI]);
         gen_op_addl_A0_seg(s, R_ES);
+        break;
+    default:
+        tcg_abort();
     }
 }
 
-static inline void gen_op_movl_T0_Dshift(int ot) 
+static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
 {
     tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, df));
     tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
 };
 
-static TCGv gen_ext_tl(TCGv dst, TCGv src, int size, bool sign)
+static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 {
     switch (size) {
-    case OT_BYTE:
+    case MO_8:
         if (sign) {
             tcg_gen_ext8s_tl(dst, src);
         } else {
             tcg_gen_ext8u_tl(dst, src);
         }
         return dst;
-    case OT_WORD:
+    case MO_16:
         if (sign) {
             tcg_gen_ext16s_tl(dst, src);
         } else {
@@ -759,7 +586,7 @@ static TCGv gen_ext_tl(TCGv dst, TCGv src, int size, bool sign)
         }
         return dst;
 #ifdef TARGET_X86_64
-    case OT_LONG:
+    case MO_32:
         if (sign) {
             tcg_gen_ext32s_tl(dst, src);
         } else {
@@ -772,61 +599,65 @@ static TCGv gen_ext_tl(TCGv dst, TCGv src, int size, bool sign)
     }
 }
 
-static void gen_extu(int ot, TCGv reg)
+static void gen_extu(TCGMemOp ot, TCGv reg)
 {
     gen_ext_tl(reg, reg, ot, false);
 }
 
-static void gen_exts(int ot, TCGv reg)
+static void gen_exts(TCGMemOp ot, TCGv reg)
 {
     gen_ext_tl(reg, reg, ot, true);
 }
 
-static inline void gen_op_jnz_ecx(int size, int label1)
+static inline void gen_op_jnz_ecx(TCGMemOp size, int label1)
 {
     tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
-    gen_extu(size + 1, cpu_tmp0);
+    gen_extu(size, cpu_tmp0);
     tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 }
 
-static inline void gen_op_jz_ecx(int size, int label1)
+static inline void gen_op_jz_ecx(TCGMemOp size, int label1)
 {
     tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
-    gen_extu(size + 1, cpu_tmp0);
+    gen_extu(size, cpu_tmp0);
     tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 }
 
-static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
+static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 {
     switch (ot) {
-    case OT_BYTE:
+    case MO_8:
         gen_helper_inb(v, n);
         break;
-    case OT_WORD:
+    case MO_16:
         gen_helper_inw(v, n);
         break;
-    case OT_LONG:
+    case MO_32:
         gen_helper_inl(v, n);
         break;
+    default:
+        tcg_abort();
     }
 }
 
-static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
+static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 {
     switch (ot) {
-    case OT_BYTE:
+    case MO_8:
         gen_helper_outb(v, n);
         break;
-    case OT_WORD:
+    case MO_16:
         gen_helper_outw(v, n);
         break;
-    case OT_LONG:
+    case MO_32:
         gen_helper_outl(v, n);
         break;
+    default:
+        tcg_abort();
     }
 }
 
-static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
+static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
                          uint32_t svm_flags)
 {
     int state_saved;
@@ -839,15 +670,17 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
         state_saved = 1;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         switch (ot) {
-        case OT_BYTE:
+        case MO_8:
             gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
             break;
-        case OT_WORD:
+        case MO_16:
             gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
             break;
-        case OT_LONG:
+        case MO_32:
             gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
             break;
+        default:
+            tcg_abort();
         }
     }
     if(s->flags & HF_SVMI_MASK) {
@@ -864,12 +697,12 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
     }
 }
 
-static inline void gen_movs(DisasContext *s, int ot)
+static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_T0_A0(ot + s->mem_index);
+    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_T0_A0(ot + s->mem_index);
+    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
     gen_op_add_reg_T0(s->aflag, R_EDI);
@@ -1058,7 +891,7 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
     default:
         {
-            int size = (s->cc_op - CC_OP_ADDB) & 3;
+            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
         }
@@ -1099,7 +932,7 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
     default:
         {
-            int size = (s->cc_op - CC_OP_ADDB) & 3;
+            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
         }
@@ -1110,7 +943,8 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
    value 'b'. In the fast case, T0 is guaranted not to be used. */
 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 {
-    int inv, jcc_op, size, cond;
+    int inv, jcc_op, cond;
+    TCGMemOp size;
     CCPrepare cc;
     TCGv t0;
 
@@ -1290,37 +1124,37 @@ static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
     return l2;
 }
 
-static inline void gen_stos(DisasContext *s, int ot)
+static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 {
-    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+    gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_T0_A0(ot + s->mem_index);
+    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
 
-static inline void gen_lods(DisasContext *s, int ot)
+static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_T0_A0(ot + s->mem_index);
-    gen_op_mov_reg_T0(ot, R_EAX);
+    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+    gen_op_mov_reg_v(ot, R_EAX, cpu_T[0]);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
 }
 
-static inline void gen_scas(DisasContext *s, int ot)
+static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_T1_A0(ot + s->mem_index);
+    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
     gen_op(s, OP_CMPL, ot, R_EAX);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
 
-static inline void gen_cmps(DisasContext *s, int ot)
+static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_T1_A0(ot + s->mem_index);
+    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
     gen_string_movl_A0_ESI(s);
     gen_op(s, OP_CMPL, ot, OR_TMP0);
     gen_op_movl_T0_Dshift(ot);
@@ -1328,35 +1162,33 @@ static inline void gen_cmps(DisasContext *s, int ot)
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
 
-static inline void gen_ins(DisasContext *s, int ot)
+static inline void gen_ins(DisasContext *s, TCGMemOp ot)
 {
     if (use_icount)
         gen_io_start();
     gen_string_movl_A0_EDI(s);
     /* Note: we must do this dummy write first to be restartable in
        case of page fault. */
-    gen_op_movl_T0_0();
-    gen_op_st_T0_A0(ot + s->mem_index);
-    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
-    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
+    tcg_gen_movi_tl(cpu_T[0], 0);
+    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
     gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32);
-    gen_op_st_T0_A0(ot + s->mem_index);
+    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
     if (use_icount)
         gen_io_end();
 }
 
-static inline void gen_outs(DisasContext *s, int ot)
+static inline void gen_outs(DisasContext *s, TCGMemOp ot)
 {
     if (use_icount)
         gen_io_start();
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_T0_A0(ot + s->mem_index);
+    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
 
-    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
-    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
+    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
     tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
     gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
@@ -1370,7 +1202,7 @@ static inline void gen_outs(DisasContext *s, int ot)
 /* same method as Valgrind : we generate jumps to current or next
    instruction */
 #define GEN_REPZ(op)                                                          \
-static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
+static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
                                  target_ulong cur_eip, target_ulong next_eip) \
 {                                                                             \
     int l2;\
@@ -1386,7 +1218,7 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
 }
 
 #define GEN_REPZ2(op)                                                         \
-static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
+static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
                                    target_ulong cur_eip,                      \
                                    target_ulong next_eip,                     \
                                    int nz)                                    \
@@ -1468,22 +1300,19 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
 }
 
 /* if d == OR_TMP0, it means memory operand (address in A0) */
-static void gen_op(DisasContext *s1, int op, int ot, int d)
+static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 {
     if (d != OR_TMP0) {
-        gen_op_mov_TN_reg(ot, 0, d);
+        gen_op_mov_v_reg(ot, cpu_T[0], d);
     } else {
-        gen_op_ld_T0_A0(ot + s1->mem_index);
+        gen_op_ld_v(s1, ot, cpu_T[0], cpu_A0);
     }
     switch(op) {
     case OP_ADCL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
-        if (d != OR_TMP0)
-            gen_op_mov_reg_T0(ot, d);
-        else
-            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update3_cc(cpu_tmp4);
         set_cc_op(s1, CC_OP_ADCB + ot);
         break;
@@ -1491,57 +1320,39 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         gen_compute_eflags_c(s1, cpu_tmp4);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
-        if (d != OR_TMP0)
-            gen_op_mov_reg_T0(ot, d);
-        else
-            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update3_cc(cpu_tmp4);
         set_cc_op(s1, CC_OP_SBBB + ot);
         break;
     case OP_ADDL:
-        gen_op_addl_T0_T1();
-        if (d != OR_TMP0)
-            gen_op_mov_reg_T0(ot, d);
-        else
-            gen_op_st_T0_A0(ot + s1->mem_index);
+        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update2_cc();
         set_cc_op(s1, CC_OP_ADDB + ot);
         break;
     case OP_SUBL:
         tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-        if (d != OR_TMP0)
-            gen_op_mov_reg_T0(ot, d);
-        else
-            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update2_cc();
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     default:
     case OP_ANDL:
         tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-        if (d != OR_TMP0)
-            gen_op_mov_reg_T0(ot, d);
-        else
-            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update1_cc();
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_ORL:
         tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-        if (d != OR_TMP0)
-            gen_op_mov_reg_T0(ot, d);
-        else
-            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update1_cc();
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_XORL:
         tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-        if (d != OR_TMP0)
-            gen_op_mov_reg_T0(ot, d);
-        else
-            gen_op_st_T0_A0(ot + s1->mem_index);
+        gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update1_cc();
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
@@ -1555,12 +1366,13 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
 }
 
 /* if d == OR_TMP0, it means memory operand (address in A0) */
-static void gen_inc(DisasContext *s1, int ot, int d, int c)
+static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
 {
-    if (d != OR_TMP0)
-        gen_op_mov_TN_reg(ot, 0, d);
-    else
-        gen_op_ld_T0_A0(ot + s1->mem_index);
+    if (d != OR_TMP0) {
+        gen_op_mov_v_reg(ot, cpu_T[0], d);
+    } else {
+        gen_op_ld_v(s1, ot, cpu_T[0], cpu_A0);
+    }
     gen_compute_eflags_c(s1, cpu_cc_src);
     if (c > 0) {
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
@@ -1569,15 +1381,12 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
         set_cc_op(s1, CC_OP_DECB + ot);
     }
-    if (d != OR_TMP0)
-        gen_op_mov_reg_T0(ot, d);
-    else
-        gen_op_st_T0_A0(ot + s1->mem_index);
+    gen_op_st_rm_T0_A0(s1, ot, d);
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
-static void gen_shift_flags(DisasContext *s, int ot, TCGv result, TCGv shm1,
-                            TCGv count, bool is_right)
+static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
+                            TCGv shm1, TCGv count, bool is_right)
 {
     TCGv_i32 z32, s32, oldop;
     TCGv z_tl;
@@ -1621,16 +1430,16 @@ static void gen_shift_flags(DisasContext *s, int ot, TCGv result, TCGv shm1,
     set_cc_op(s, CC_OP_DYNAMIC);
 }
 
-static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, 
+static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
                             int is_right, int is_arith)
 {
-    target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_T0_A0(ot + s->mem_index);
+        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
     } else {
-        gen_op_mov_TN_reg(ot, 0, op1);
+        gen_op_mov_v_reg(ot, cpu_T[0], op1);
     }
 
     tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
@@ -1652,25 +1461,21 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
     }
 
     /* store */
-    if (op1 == OR_TMP0) {
-        gen_op_st_T0_A0(ot + s->mem_index);
-    } else {
-        gen_op_mov_reg_T0(ot, op1);
-    }
+    gen_op_st_rm_T0_A0(s, ot, op1);
 
     gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right);
 }
 
-static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
+static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
                             int is_right, int is_arith)
 {
-    int mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+    int mask = (ot == MO_64 ? 0x3f : 0x1f);
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_T0_A0(ot + s->mem_index);
+        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
     else
-        gen_op_mov_TN_reg(ot, 0, op1);
+        gen_op_mov_v_reg(ot, cpu_T[0], op1);
 
     op2 &= mask;
     if (op2 != 0) {
@@ -1691,11 +1496,8 @@ static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
     }
 
     /* store */
-    if (op1 == OR_TMP0)
-        gen_op_st_T0_A0(ot + s->mem_index);
-    else
-        gen_op_mov_reg_T0(ot, op1);
-        
+    gen_op_st_rm_T0_A0(s, ot, op1);
+
     /* update eflags if non zero shift */
     if (op2 != 0) {
         tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
@@ -1712,33 +1514,33 @@ static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
         tcg_gen_shri_tl(ret, arg1, -arg2);
 }
 
-static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, int is_right)
+static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 {
-    target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
     TCGv_i32 t0, t1;
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_T0_A0(ot + s->mem_index);
+        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
     } else {
-        gen_op_mov_TN_reg(ot, 0, op1);
+        gen_op_mov_v_reg(ot, cpu_T[0], op1);
     }
 
     tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
 
     switch (ot) {
-    case OT_BYTE:
+    case MO_8:
         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
         tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
         tcg_gen_muli_tl(cpu_T[0], cpu_T[0], 0x01010101);
         goto do_long;
-    case OT_WORD:
+    case MO_16:
         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
         tcg_gen_deposit_tl(cpu_T[0], cpu_T[0], cpu_T[0], 16, 16);
         goto do_long;
     do_long:
 #ifdef TARGET_X86_64
-    case OT_LONG:
+    case MO_32:
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         if (is_right) {
@@ -1759,11 +1561,7 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, int is_right)
     }
 
     /* store */
-    if (op1 == OR_TMP0) {
-        gen_op_st_T0_A0(ot + s->mem_index);
-    } else {
-        gen_op_mov_reg_T0(ot, op1);
-    }
+    gen_op_st_rm_T0_A0(s, ot, op1);
 
     /* We'll need the flags computed into CC_SRC.  */
     gen_compute_eflags(s);
@@ -1801,24 +1599,24 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, int is_right)
     set_cc_op(s, CC_OP_DYNAMIC);
 }
 
-static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
+static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
                           int is_right)
 {
-    int mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+    int mask = (ot == MO_64 ? 0x3f : 0x1f);
     int shift;
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_T0_A0(ot + s->mem_index);
+        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
     } else {
-        gen_op_mov_TN_reg(ot, 0, op1);
+        gen_op_mov_v_reg(ot, cpu_T[0], op1);
     }
 
     op2 &= mask;
     if (op2 != 0) {
         switch (ot) {
 #ifdef TARGET_X86_64
-        case OT_LONG:
+        case MO_32:
             tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
             if (is_right) {
                 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
@@ -1835,10 +1633,10 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
                 tcg_gen_rotli_tl(cpu_T[0], cpu_T[0], op2);
             }
             break;
-        case OT_BYTE:
+        case MO_8:
             mask = 7;
             goto do_shifts;
-        case OT_WORD:
+        case MO_16:
             mask = 15;
         do_shifts:
             shift = op2 & mask;
@@ -1854,11 +1652,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
     }
 
     /* store */
-    if (op1 == OR_TMP0) {
-        gen_op_st_T0_A0(ot + s->mem_index);
-    } else {
-        gen_op_mov_reg_T0(ot, op1);
-    }
+    gen_op_st_rm_T0_A0(s, ot, op1);
 
     if (op2 != 0) {
         /* Compute the flags into CC_SRC.  */
@@ -1883,7 +1677,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
 }
 
 /* XXX: add faster immediate = 1 case */
-static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, 
+static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
                            int is_right)
 {
     gen_compute_eflags(s);
@@ -1891,71 +1685,72 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_T0_A0(ot + s->mem_index);
+        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
     else
-        gen_op_mov_TN_reg(ot, 0, op1);
+        gen_op_mov_v_reg(ot, cpu_T[0], op1);
     
     if (is_right) {
         switch (ot) {
-        case OT_BYTE:
+        case MO_8:
             gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
-        case OT_WORD:
+        case MO_16:
             gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
-        case OT_LONG:
+        case MO_32:
             gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
 #ifdef TARGET_X86_64
-        case OT_QUAD:
+        case MO_64:
             gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
 #endif
+        default:
+            tcg_abort();
         }
     } else {
         switch (ot) {
-        case OT_BYTE:
+        case MO_8:
             gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
-        case OT_WORD:
+        case MO_16:
             gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
-        case OT_LONG:
+        case MO_32:
             gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
 #ifdef TARGET_X86_64
-        case OT_QUAD:
+        case MO_64:
             gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
 #endif
+        default:
+            tcg_abort();
         }
     }
     /* store */
-    if (op1 == OR_TMP0)
-        gen_op_st_T0_A0(ot + s->mem_index);
-    else
-        gen_op_mov_reg_T0(ot, op1);
+    gen_op_st_rm_T0_A0(s, ot, op1);
 }
 
 /* XXX: add faster immediate case */
-static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
+static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
                              bool is_right, TCGv count_in)
 {
-    target_ulong mask = (ot == OT_QUAD ? 63 : 31);
+    target_ulong mask = (ot == MO_64 ? 63 : 31);
     TCGv count;
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_T0_A0(ot + s->mem_index);
+        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
     } else {
-        gen_op_mov_TN_reg(ot, 0, op1);
+        gen_op_mov_v_reg(ot, cpu_T[0], op1);
     }
 
     count = tcg_temp_new();
     tcg_gen_andi_tl(count, count_in, mask);
 
     switch (ot) {
-    case OT_WORD:
+    case MO_16:
         /* Note: we implement the Intel behaviour for shift count > 16.
            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
            portion by constructing it as a 32-bit value.  */
@@ -1968,7 +1763,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
         }
         /* FALLTHRU */
 #ifdef TARGET_X86_64
-    case OT_LONG:
+    case MO_32:
         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
         tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
@@ -1994,7 +1789,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
             tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
         } else {
             tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
-            if (ot == OT_WORD) {
+            if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
                 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
                 tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4);
@@ -2013,20 +1808,16 @@ static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
     }
 
     /* store */
-    if (op1 == OR_TMP0) {
-        gen_op_st_T0_A0(ot + s->mem_index);
-    } else {
-        gen_op_mov_reg_T0(ot, op1);
-    }
+    gen_op_st_rm_T0_A0(s, ot, op1);
 
     gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right);
     tcg_temp_free(count);
 }
 
-static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
+static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
 {
     if (s != OR_TMP1)
-        gen_op_mov_TN_reg(ot, 1, s);
+        gen_op_mov_v_reg(ot, cpu_T[1], s);
     switch(op) {
     case OP_ROL:
         gen_rot_rm_T1(s1, ot, d, 0);
@@ -2053,7 +1844,7 @@ static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
     }
 }
 
-static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
+static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
 {
     switch(op) {
     case OP_ROL:
@@ -2074,21 +1865,19 @@ static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
         break;
     default:
         /* currently not optimized */
-        gen_op_movl_T1_im(c);
+        tcg_gen_movi_tl(cpu_T[1], c);
         gen_shift(s1, op, ot, d, OR_TMP1);
         break;
     }
 }
 
-static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm,
-                          int *reg_ptr, int *offset_ptr)
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
 {
     target_long disp;
     int havesib;
     int base;
     int index;
     int scale;
-    int opreg;
     int mod, rm, code, override, must_add_seg;
     TCGv sum;
 
@@ -2099,7 +1888,9 @@ static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm,
     mod = (modrm >> 6) & 3;
     rm = modrm & 7;
 
-    if (s->aflag) {
+    switch (s->aflag) {
+    case MO_64:
+    case MO_32:
         havesib = 0;
         base = rm;
         index = -1;
@@ -2179,26 +1970,28 @@ static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm,
             tcg_gen_ld_tl(cpu_tmp0, cpu_env,
                           offsetof(CPUX86State, segs[override].base));
             if (CODE64(s)) {
-                if (s->aflag != 2) {
+                if (s->aflag == MO_32) {
                     tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
                 }
                 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-                goto done;
+                return;
             }
 
             tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
         }
 
-        if (s->aflag != 2) {
+        if (s->aflag == MO_32) {
             tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
         }
-    } else {
+        break;
+
+    case MO_16:
         switch (mod) {
         case 0:
             if (rm == 6) {
                 disp = cpu_lduw_code(env, s->pc);
                 s->pc += 2;
-                gen_op_movl_A0_im(disp);
+                tcg_gen_movi_tl(cpu_A0, disp);
                 rm = 0; /* avoid SS override */
                 goto no_rm;
             } else {
@@ -2210,61 +2003,57 @@ static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm,
             break;
         default:
         case 2:
-            disp = cpu_lduw_code(env, s->pc);
+            disp = (int16_t)cpu_lduw_code(env, s->pc);
             s->pc += 2;
             break;
         }
-        switch(rm) {
+
+        sum = cpu_A0;
+        switch (rm) {
         case 0:
-            gen_op_movl_A0_reg(R_EBX);
-            gen_op_addl_A0_reg_sN(0, R_ESI);
+            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_ESI]);
             break;
         case 1:
-            gen_op_movl_A0_reg(R_EBX);
-            gen_op_addl_A0_reg_sN(0, R_EDI);
+            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_EDI]);
             break;
         case 2:
-            gen_op_movl_A0_reg(R_EBP);
-            gen_op_addl_A0_reg_sN(0, R_ESI);
+            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_ESI]);
             break;
         case 3:
-            gen_op_movl_A0_reg(R_EBP);
-            gen_op_addl_A0_reg_sN(0, R_EDI);
+            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_EDI]);
             break;
         case 4:
-            gen_op_movl_A0_reg(R_ESI);
+            sum = cpu_regs[R_ESI];
             break;
         case 5:
-            gen_op_movl_A0_reg(R_EDI);
+            sum = cpu_regs[R_EDI];
             break;
         case 6:
-            gen_op_movl_A0_reg(R_EBP);
+            sum = cpu_regs[R_EBP];
             break;
         default:
         case 7:
-            gen_op_movl_A0_reg(R_EBX);
+            sum = cpu_regs[R_EBX];
             break;
         }
-        if (disp != 0)
-            gen_op_addl_A0_im(disp);
-        gen_op_andl_A0_ffff();
+        tcg_gen_addi_tl(cpu_A0, sum, disp);
+        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
     no_rm:
         if (must_add_seg) {
             if (override < 0) {
-                if (rm == 2 || rm == 3 || rm == 6)
+                if (rm == 2 || rm == 3 || rm == 6) {
                     override = R_SS;
-                else
+                } else {
                     override = R_DS;
+                }
             }
             gen_op_addl_A0_seg(s, override);
         }
-    }
+        break;
 
- done:
-    opreg = OR_A0;
-    disp = 0;
-    *reg_ptr = opreg;
-    *offset_ptr = disp;
+    default:
+        tcg_abort();
+    }
 }
 
 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
@@ -2276,8 +2065,9 @@ static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
         return;
     rm = modrm & 7;
 
-    if (s->aflag) {
-
+    switch (s->aflag) {
+    case MO_64:
+    case MO_32:
         base = rm;
 
         if (base == 4) {
@@ -2299,7 +2089,9 @@ static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
             s->pc += 4;
             break;
         }
-    } else {
+        break;
+
+    case MO_16:
         switch (mod) {
         case 0:
             if (rm == 6) {
@@ -2314,6 +2106,10 @@ static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
             s->pc += 2;
             break;
         }
+        break;
+
+    default:
+        tcg_abort();
     }
 }
 
@@ -2342,64 +2138,69 @@ static void gen_add_A0_ds_seg(DisasContext *s)
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
    OR_TMP0 */
 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
-                           int ot, int reg, int is_store)
+                           TCGMemOp ot, int reg, int is_store)
 {
-    int mod, rm, opreg, disp;
+    int mod, rm;
 
     mod = (modrm >> 6) & 3;
     rm = (modrm & 7) | REX_B(s);
     if (mod == 3) {
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_TN_reg(ot, 0, reg);
-            gen_op_mov_reg_T0(ot, rm);
+                gen_op_mov_v_reg(ot, cpu_T[0], reg);
+            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
         } else {
-            gen_op_mov_TN_reg(ot, 0, rm);
+            gen_op_mov_v_reg(ot, cpu_T[0], rm);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_T0(ot, reg);
+                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         }
     } else {
-        gen_lea_modrm(env, s, modrm, &opreg, &disp);
+        gen_lea_modrm(env, s, modrm);
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_TN_reg(ot, 0, reg);
-            gen_op_st_T0_A0(ot + s->mem_index);
+                gen_op_mov_v_reg(ot, cpu_T[0], reg);
+            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
         } else {
-            gen_op_ld_T0_A0(ot + s->mem_index);
+            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_T0(ot, reg);
+                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         }
     }
 }
 
-static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, int ot)
+static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
 {
     uint32_t ret;
 
-    switch(ot) {
-    case OT_BYTE:
+    switch (ot) {
+    case MO_8:
         ret = cpu_ldub_code(env, s->pc);
         s->pc++;
         break;
-    case OT_WORD:
+    case MO_16:
         ret = cpu_lduw_code(env, s->pc);
         s->pc += 2;
         break;
-    default:
-    case OT_LONG:
+    case MO_32:
+#ifdef TARGET_X86_64
+    case MO_64:
+#endif
         ret = cpu_ldl_code(env, s->pc);
         s->pc += 4;
         break;
+    default:
+        tcg_abort();
     }
     return ret;
 }
 
-static inline int insn_const_size(unsigned int ot)
+static inline int insn_const_size(TCGMemOp ot)
 {
-    if (ot <= OT_LONG)
+    if (ot <= MO_32) {
         return 1 << ot;
-    else
+    } else {
         return 4;
+    }
 }
 
 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
@@ -2452,7 +2253,7 @@ static inline void gen_jcc(DisasContext *s, int b,
     }
 }
 
-static void gen_cmovcc1(CPUX86State *env, DisasContext *s, int ot, int b,
+static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
                         int modrm, int reg)
 {
     CCPrepare cc;
@@ -2471,7 +2272,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, int ot, int b,
 
     tcg_gen_movcond_tl(cc.cond, cpu_T[0], cc.reg, cc.reg2,
                        cpu_T[0], cpu_regs[reg]);
-    gen_op_mov_reg_T0(ot, reg);
+    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
 
     if (cc.mask != -1) {
         tcg_temp_free(cc.reg);
@@ -2548,135 +2349,80 @@ static inline void gen_stack_update(DisasContext *s, int addend)
 {
 #ifdef TARGET_X86_64
     if (CODE64(s)) {
-        gen_op_add_reg_im(2, R_ESP, addend);
+        gen_op_add_reg_im(MO_64, R_ESP, addend);
     } else
 #endif
     if (s->ss32) {
-        gen_op_add_reg_im(1, R_ESP, addend);
+        gen_op_add_reg_im(MO_32, R_ESP, addend);
     } else {
-        gen_op_add_reg_im(0, R_ESP, addend);
+        gen_op_add_reg_im(MO_16, R_ESP, addend);
     }
 }
 
-/* generate a push. It depends on ss32, addseg and dflag */
-static void gen_push_T0(DisasContext *s)
+/* Generate a push. It depends on ss32, addseg and dflag.  */
+static void gen_push_v(DisasContext *s, TCGv val)
 {
-#ifdef TARGET_X86_64
-    if (CODE64(s)) {
-        gen_op_movq_A0_reg(R_ESP);
-        if (s->dflag) {
-            gen_op_addq_A0_im(-8);
-            gen_op_st_T0_A0(OT_QUAD + s->mem_index);
-        } else {
-            gen_op_addq_A0_im(-2);
-            gen_op_st_T0_A0(OT_WORD + s->mem_index);
-        }
-        gen_op_mov_reg_A0(2, R_ESP);
-    } else
-#endif
-    {
-        gen_op_movl_A0_reg(R_ESP);
-        if (!s->dflag)
-            gen_op_addl_A0_im(-2);
-        else
-            gen_op_addl_A0_im(-4);
-        if (s->ss32) {
-            if (s->addseg) {
-                tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-                gen_op_addl_A0_seg(s, R_SS);
-            }
-        } else {
-            gen_op_andl_A0_ffff();
-            tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-            gen_op_addl_A0_seg(s, R_SS);
-        }
-        gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
-        if (s->ss32 && !s->addseg)
-            gen_op_mov_reg_A0(1, R_ESP);
-        else
-            gen_op_mov_reg_T1(s->ss32 + 1, R_ESP);
-    }
-}
+    TCGMemOp a_ot, d_ot = mo_pushpop(s, s->dflag);
+    int size = 1 << d_ot;
+    TCGv new_esp = cpu_A0;
+
+    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
 
-/* generate a push. It depends on ss32, addseg and dflag */
-/* slower version for T1, only used for call Ev */
-static void gen_push_T1(DisasContext *s)
-{
-#ifdef TARGET_X86_64
     if (CODE64(s)) {
-        gen_op_movq_A0_reg(R_ESP);
-        if (s->dflag) {
-            gen_op_addq_A0_im(-8);
-            gen_op_st_T1_A0(OT_QUAD + s->mem_index);
-        } else {
-            gen_op_addq_A0_im(-2);
-            gen_op_st_T0_A0(OT_WORD + s->mem_index);
-        }
-        gen_op_mov_reg_A0(2, R_ESP);
-    } else
-#endif
-    {
-        gen_op_movl_A0_reg(R_ESP);
-        if (!s->dflag)
-            gen_op_addl_A0_im(-2);
-        else
-            gen_op_addl_A0_im(-4);
-        if (s->ss32) {
-            if (s->addseg) {
-                gen_op_addl_A0_seg(s, R_SS);
-            }
-        } else {
-            gen_op_andl_A0_ffff();
+        a_ot = MO_64;
+    } else if (s->ss32) {
+        a_ot = MO_32;
+        if (s->addseg) {
+            new_esp = cpu_tmp4;
+            tcg_gen_mov_tl(new_esp, cpu_A0);
             gen_op_addl_A0_seg(s, R_SS);
+        } else {
+            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
         }
-        gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
-
-        if (s->ss32 && !s->addseg)
-            gen_op_mov_reg_A0(1, R_ESP);
-        else
-            gen_stack_update(s, (-2) << s->dflag);
+    } else {
+        a_ot = MO_16;
+        new_esp = cpu_tmp4;
+        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
+        tcg_gen_mov_tl(new_esp, cpu_A0);
+        gen_op_addl_A0_seg(s, R_SS);
     }
+
+    gen_op_st_v(s, d_ot, val, cpu_A0);
+    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
 }
 
 /* two step pop is necessary for precise exceptions */
-static void gen_pop_T0(DisasContext *s)
+static TCGMemOp gen_pop_T0(DisasContext *s)
 {
-#ifdef TARGET_X86_64
+    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
+    TCGv addr = cpu_A0;
+
     if (CODE64(s)) {
-        gen_op_movq_A0_reg(R_ESP);
-        gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index);
-    } else
-#endif
-    {
-        gen_op_movl_A0_reg(R_ESP);
-        if (s->ss32) {
-            if (s->addseg)
-                gen_op_addl_A0_seg(s, R_SS);
-        } else {
-            gen_op_andl_A0_ffff();
-            gen_op_addl_A0_seg(s, R_SS);
-        }
-        gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
+        addr = cpu_regs[R_ESP];
+    } else if (!s->ss32) {
+        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_ESP]);
+        gen_op_addl_A0_seg(s, R_SS);
+    } else if (s->addseg) {
+        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_ESP]);
+        gen_op_addl_A0_seg(s, R_SS);
+    } else {
+        tcg_gen_ext32u_tl(cpu_A0, cpu_regs[R_ESP]);
     }
+
+    gen_op_ld_v(s, d_ot, cpu_T[0], addr);
+    return d_ot;
 }
 
-static void gen_pop_update(DisasContext *s)
+static void gen_pop_update(DisasContext *s, TCGMemOp ot)
 {
-#ifdef TARGET_X86_64
-    if (CODE64(s) && s->dflag) {
-        gen_stack_update(s, 8);
-    } else
-#endif
-    {
-        gen_stack_update(s, 2 << s->dflag);
-    }
+    gen_stack_update(s, 1 << ot);
 }
 
 static void gen_stack_A0(DisasContext *s)
 {
     gen_op_movl_A0_reg(R_ESP);
     if (!s->ss32)
-        gen_op_andl_A0_ffff();
+        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
     if (s->addseg)
         gen_op_addl_A0_seg(s, R_SS);
@@ -2687,18 +2433,18 @@ static void gen_pusha(DisasContext *s)
 {
     int i;
     gen_op_movl_A0_reg(R_ESP);
-    gen_op_addl_A0_im(-16 <<  s->dflag);
+    gen_op_addl_A0_im(-8 << s->dflag);
     if (!s->ss32)
-        gen_op_andl_A0_ffff();
+        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
     if (s->addseg)
         gen_op_addl_A0_seg(s, R_SS);
     for(i = 0;i < 8; i++) {
-        gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
-        gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
-        gen_op_addl_A0_im(2 <<  s->dflag);
+        gen_op_mov_v_reg(MO_32, cpu_T[0], 7 - i);
+        gen_op_st_v(s, s->dflag, cpu_T[0], cpu_A0);
+        gen_op_addl_A0_im(1 << s->dflag);
     }
-    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+    gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
 }
 
 /* NOTE: wrap around in 16 bit not fully handled */
@@ -2707,73 +2453,68 @@ static void gen_popa(DisasContext *s)
     int i;
     gen_op_movl_A0_reg(R_ESP);
     if (!s->ss32)
-        gen_op_andl_A0_ffff();
+        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-    tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 <<  s->dflag);
+    tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 8 << s->dflag);
     if (s->addseg)
         gen_op_addl_A0_seg(s, R_SS);
     for(i = 0;i < 8; i++) {
         /* ESP is not reloaded */
         if (i != 3) {
-            gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index);
-            gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i);
+            gen_op_ld_v(s, s->dflag, cpu_T[0], cpu_A0);
+            gen_op_mov_reg_v(s->dflag, 7 - i, cpu_T[0]);
         }
-        gen_op_addl_A0_im(2 <<  s->dflag);
+        gen_op_addl_A0_im(1 << s->dflag);
     }
-    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+    gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
 }
 
 static void gen_enter(DisasContext *s, int esp_addend, int level)
 {
-    int ot, opsize;
+    TCGMemOp ot = mo_pushpop(s, s->dflag);
+    int opsize = 1 << ot;
 
     level &= 0x1f;
 #ifdef TARGET_X86_64
     if (CODE64(s)) {
-        ot = s->dflag ? OT_QUAD : OT_WORD;
-        opsize = 1 << ot;
-
         gen_op_movl_A0_reg(R_ESP);
         gen_op_addq_A0_im(-opsize);
         tcg_gen_mov_tl(cpu_T[1], cpu_A0);
 
         /* push bp */
-        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
-        gen_op_st_T0_A0(ot + s->mem_index);
+        gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
+        gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
         if (level) {
             /* XXX: must save state */
             gen_helper_enter64_level(cpu_env, tcg_const_i32(level),
-                                     tcg_const_i32((ot == OT_QUAD)),
+                                     tcg_const_i32((ot == MO_64)),
                                      cpu_T[1]);
         }
-        gen_op_mov_reg_T1(ot, R_EBP);
+        gen_op_mov_reg_v(ot, R_EBP, cpu_T[1]);
         tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
-        gen_op_mov_reg_T1(OT_QUAD, R_ESP);
+        gen_op_mov_reg_v(MO_64, R_ESP, cpu_T[1]);
     } else
 #endif
     {
-        ot = s->dflag + OT_WORD;
-        opsize = 2 << s->dflag;
-
         gen_op_movl_A0_reg(R_ESP);
         gen_op_addl_A0_im(-opsize);
         if (!s->ss32)
-            gen_op_andl_A0_ffff();
+            tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
         tcg_gen_mov_tl(cpu_T[1], cpu_A0);
         if (s->addseg)
             gen_op_addl_A0_seg(s, R_SS);
         /* push bp */
-        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
-        gen_op_st_T0_A0(ot + s->mem_index);
+        gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
+        gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
         if (level) {
             /* XXX: must save state */
             gen_helper_enter_level(cpu_env, tcg_const_i32(level),
-                                   tcg_const_i32(s->dflag),
+                                   tcg_const_i32(s->dflag - 1),
                                    cpu_T[1]);
         }
-        gen_op_mov_reg_T1(ot, R_EBP);
+        gen_op_mov_reg_v(ot, R_EBP, cpu_T[1]);
         tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
-        gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+        gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
     }
 }
 
@@ -2846,38 +2587,36 @@ static void gen_jmp(DisasContext *s, target_ulong eip)
     gen_jmp_tb(s, eip, 0);
 }
 
-static inline void gen_ldq_env_A0(int idx, int offset)
+static inline void gen_ldq_env_A0(DisasContext *s, int offset)
 {
-    int mem_index = (idx >> 2) - 1;
-    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
+    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
 }
 
-static inline void gen_stq_env_A0(int idx, int offset)
+static inline void gen_stq_env_A0(DisasContext *s, int offset)
 {
-    int mem_index = (idx >> 2) - 1;
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
-    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
+    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
 }
 
-static inline void gen_ldo_env_A0(int idx, int offset)
+static inline void gen_ldo_env_A0(DisasContext *s, int offset)
 {
-    int mem_index = (idx >> 2) - 1;
-    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
+    int mem_index = s->mem_index;
+    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
     tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
-    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index);
+    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
 }
 
-static inline void gen_sto_env_A0(int idx, int offset)
+static inline void gen_sto_env_A0(DisasContext *s, int offset)
 {
-    int mem_index = (idx >> 2) - 1;
+    int mem_index = s->mem_index;
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
-    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
+    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
     tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
-    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index);
+    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
 }
 
 static inline void gen_op_movo(int d_offset, int s_offset)
@@ -3239,12 +2978,13 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = {
 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     target_ulong pc_start, int rex_r)
 {
-    int b1, op1_offset, op2_offset, is_xmm, val, ot;
-    int modrm, mod, rm, reg, reg_addr, offset_addr;
+    int b1, op1_offset, op2_offset, is_xmm, val;
+    int modrm, mod, rm, reg;
     SSEFunc_0_epp sse_fn_epp;
     SSEFunc_0_eppi sse_fn_eppi;
     SSEFunc_0_ppi sse_fn_ppi;
     SSEFunc_0_eppt sse_fn_eppt;
+    TCGMemOp ot;
 
     b &= 0xff;
     if (s->prefix & PREFIX_DATA)
@@ -3311,46 +3051,45 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x0e7: /* movntq */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-            gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+            gen_lea_modrm(env, s, modrm);
+            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             break;
         case 0x1e7: /* movntdq */
         case 0x02b: /* movntps */
         case 0x12b: /* movntps */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-            gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+            gen_lea_modrm(env, s, modrm);
+            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-            gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+            gen_lea_modrm(env, s, modrm);
+            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x22b: /* movntss */
         case 0x32b: /* movntsd */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             if (b1 & 1) {
-                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,
-                    xmm_regs[reg]));
+                gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
                     xmm_regs[reg].XMM_L(0)));
-                gen_op_st_T0_A0(OT_LONG + s->mem_index);
+                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
             }
             break;
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
-            if (s->dflag == 2) {
-                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 0);
+            if (s->dflag == MO_64) {
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
                 tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3359,15 +3098,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
-            if (s->dflag == 2) {
-                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 0);
+            if (s->dflag == MO_64) {
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3376,8 +3115,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+                gen_lea_modrm(env, s, modrm);
+                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
                 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
@@ -3393,8 +3132,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x16f: /* movdqa xmm, ea */
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+                gen_lea_modrm(env, s, modrm);
+                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
@@ -3403,10 +3142,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                gen_lea_modrm(env, s, modrm);
+                gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_op_movl_T0_0();
+                tcg_gen_movi_tl(cpu_T[0], 0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
@@ -3418,9 +3157,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_op_movl_T0_0();
+                gen_lea_modrm(env, s, modrm);
+                gen_ldq_env_A0(s, offsetof(CPUX86State,
+                                           xmm_regs[reg].XMM_Q(0)));
+                tcg_gen_movi_tl(cpu_T[0], 0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
             } else {
@@ -3432,8 +3172,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x012: /* movlps */
         case 0x112: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_lea_modrm(env, s, modrm);
+                gen_ldq_env_A0(s, offsetof(CPUX86State,
+                                           xmm_regs[reg].XMM_Q(0)));
             } else {
                 /* movhlps */
                 rm = (modrm & 7) | REX_B(s);
@@ -3443,8 +3184,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x212: /* movsldup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+                gen_lea_modrm(env, s, modrm);
+                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
@@ -3459,8 +3200,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_lea_modrm(env, s, modrm);
+                gen_ldq_env_A0(s, offsetof(CPUX86State,
+                                           xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
@@ -3472,8 +3214,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+                gen_lea_modrm(env, s, modrm);
+                gen_ldq_env_A0(s, offsetof(CPUX86State,
+                                           xmm_regs[reg].XMM_Q(1)));
             } else {
                 /* movlhps */
                 rm = (modrm & 7) | REX_B(s);
@@ -3483,8 +3226,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x216: /* movshdup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+                gen_lea_modrm(env, s, modrm);
+                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
@@ -3520,36 +3263,37 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
-            if (s->dflag == 2) {
+            if (s->dflag == MO_64) {
                 tcg_gen_ld_i64(cpu_T[0], cpu_env, 
                                offsetof(CPUX86State,fpregs[reg].mmx));
-                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
             } else
 #endif
             {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
-                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
             }
             break;
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
-            if (s->dflag == 2) {
+            if (s->dflag == MO_64) {
                 tcg_gen_ld_i64(cpu_T[0], cpu_env, 
                                offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
             } else
 #endif
             {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
             }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_lea_modrm(env, s, modrm);
+                gen_ldq_env_A0(s, offsetof(CPUX86State,
+                                           xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
@@ -3559,8 +3303,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+                gen_lea_modrm(env, s, modrm);
+                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
                 gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
@@ -3574,8 +3318,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x17f: /* movdqa ea, xmm */
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+                gen_lea_modrm(env, s, modrm);
+                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
@@ -3584,9 +3328,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_op_st_T0_A0(OT_LONG + s->mem_index);
+                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
@@ -3595,8 +3339,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_lea_modrm(env, s, modrm);
+                gen_stq_env_A0(s, offsetof(CPUX86State,
+                                           xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
@@ -3606,8 +3351,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x013: /* movlps */
         case 0x113: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_lea_modrm(env, s, modrm);
+                gen_stq_env_A0(s, offsetof(CPUX86State,
+                                           xmm_regs[reg].XMM_Q(0)));
             } else {
                 goto illegal_op;
             }
@@ -3615,8 +3361,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x017: /* movhps */
         case 0x117: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+                gen_lea_modrm(env, s, modrm);
+                gen_stq_env_A0(s, offsetof(CPUX86State,
+                                           xmm_regs[reg].XMM_Q(1)));
             } else {
                 goto illegal_op;
             }
@@ -3632,15 +3379,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
             val = cpu_ldub_code(env, s->pc++);
             if (is_xmm) {
-                gen_op_movl_T0_im(val);
+                tcg_gen_movi_tl(cpu_T[0], val);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
-                gen_op_movl_T0_0();
+                tcg_gen_movi_tl(cpu_T[0], 0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
                 op1_offset = offsetof(CPUX86State,xmm_t0);
             } else {
-                gen_op_movl_T0_im(val);
+                tcg_gen_movi_tl(cpu_T[0], val);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
-                gen_op_movl_T0_0();
+                tcg_gen_movi_tl(cpu_T[0], 0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
                 op1_offset = offsetof(CPUX86State,mmx_t0);
             }
@@ -3665,24 +3412,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                              offsetof(CPUX86State,xmm_regs[rm]));
             gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
-            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-            gen_op_mov_reg_T0(OT_LONG, reg);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
             break;
         case 0x150: /* movmskpd */
             rm = (modrm & 7) | REX_B(s);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                              offsetof(CPUX86State,xmm_regs[rm]));
             gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
-            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-            gen_op_mov_reg_T0(OT_LONG, reg);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
             break;
         case 0x02a: /* cvtpi2ps */
         case 0x12a: /* cvtpi2pd */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
-                gen_ldq_env_A0(s->mem_index, op2_offset);
+                gen_ldq_env_A0(s, op2_offset);
             } else {
                 rm = (modrm & 7);
                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
@@ -3702,11 +3447,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         case 0x22a: /* cvtsi2ss */
         case 0x32a: /* cvtsi2sd */
-            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+            ot = mo_64_32(s->dflag);
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            if (ot == OT_LONG) {
+            if (ot == MO_32) {
                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
@@ -3725,9 +3470,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x12d: /* cvtpd2pi */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
-                gen_ldo_env_A0(s->mem_index, op2_offset);
+                gen_ldo_env_A0(s, op2_offset);
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
@@ -3754,13 +3499,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x32c: /* cvttsd2si */
         case 0x22d: /* cvtss2si */
         case 0x32d: /* cvtsd2si */
-            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+            ot = mo_64_32(s->dflag);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
                 if ((b >> 8) & 1) {
-                    gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
+                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0)));
                 } else {
-                    gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                    gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
                     tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
                 }
                 op2_offset = offsetof(CPUX86State,xmm_t0);
@@ -3769,7 +3514,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
             }
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
-            if (ot == OT_LONG) {
+            if (ot == MO_32) {
                 SSEFunc_i_ep sse_fn_i_ep =
                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
                 sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
@@ -3783,12 +3528,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto illegal_op;
 #endif
             }
-            gen_op_mov_reg_T0(ot, reg);
+            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4:
             s->rip_offset = 1;
-            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
@@ -3804,7 +3549,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x1c5:
             if (mod != 3)
                 goto illegal_op;
-            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+            ot = mo_64_32(s->dflag);
             val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
@@ -3818,12 +3563,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_op_mov_reg_T0(ot, reg);
+            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_lea_modrm(env, s, modrm);
+                gen_stq_env_A0(s, offsetof(CPUX86State,
+                                           xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
@@ -3857,9 +3603,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
                 gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             }
-            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_op_mov_reg_T0(OT_LONG, reg);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
             break;
 
         case 0x138:
@@ -3889,33 +3634,32 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm);
                     switch (b) {
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
-                        gen_ldq_env_A0(s->mem_index, op2_offset +
+                        gen_ldq_env_A0(s, op2_offset +
                                         offsetof(XMMReg, XMM_Q(0)));
                         break;
                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
-                        tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
-                                          (s->mem_index >> 2) - 1);
-                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUL);
                         tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
                                         offsetof(XMMReg, XMM_L(0)));
                         break;
                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
-                        tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0,
-                                          (s->mem_index >> 2) - 1);
+                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
+                                           s->mem_index, MO_LEUW);
                         tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
                                         offsetof(XMMReg, XMM_W(0)));
                         break;
                     case 0x2a:            /* movntqda */
-                        gen_ldo_env_A0(s->mem_index, op1_offset);
+                        gen_ldo_env_A0(s, op1_offset);
                         return;
                     default:
-                        gen_ldo_env_A0(s->mem_index, op2_offset);
+                        gen_ldo_env_A0(s, op2_offset);
                     }
                 }
             } else {
@@ -3924,8 +3668,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                    gen_ldq_env_A0(s->mem_index, op2_offset);
+                    gen_lea_modrm(env, s, modrm);
+                    gen_ldq_env_A0(s, op2_offset);
                 }
             }
             if (sse_fn_epp == SSE_SPECIAL) {
@@ -3957,21 +3701,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     goto illegal_op;
                 }
                 if ((b & 0xff) == 0xf0) {
-                    ot = OT_BYTE;
-                } else if (s->dflag != 2) {
-                    ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG);
+                    ot = MO_8;
+                } else if (s->dflag != MO_64) {
+                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
                 } else {
-                    ot = OT_QUAD;
+                    ot = MO_64;
                 }
 
-                gen_op_mov_TN_reg(OT_LONG, 0, reg);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
                                  cpu_T[0], tcg_const_i32(8 << ot));
 
-                ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
-                gen_op_mov_reg_T0(ot, reg);
+                ot = mo_64_32(s->dflag);
+                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 break;
 
             case 0x1f0: /* crc32 or movbe */
@@ -3988,50 +3731,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
                     goto illegal_op;
                 }
-                if (s->dflag != 2) {
-                    ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG);
+                if (s->dflag != MO_64) {
+                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
                 } else {
-                    ot = OT_QUAD;
+                    ot = MO_64;
                 }
 
-                /* Load the data incoming to the bswap.  Note that the TCG
-                   implementation of bswap requires the input be zero
-                   extended.  In the case of the loads, we simply know that
-                   gen_op_ld_v via gen_ldst_modrm does that already.  */
+                gen_lea_modrm(env, s, modrm);
                 if ((b & 1) == 0) {
-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
+                                       s->mem_index, ot | MO_BE);
+                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 } else {
-                    switch (ot) {
-                    case OT_WORD:
-                        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[reg]);
-                        break;
-                    default:
-                        tcg_gen_ext32u_tl(cpu_T[0], cpu_regs[reg]);
-                        break;
-                    case OT_QUAD:
-                        tcg_gen_mov_tl(cpu_T[0], cpu_regs[reg]);
-                        break;
-                    }
-                }
-
-                switch (ot) {
-                case OT_WORD:
-                    tcg_gen_bswap16_tl(cpu_T[0], cpu_T[0]);
-                    break;
-                default:
-                    tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
-                    break;
-#ifdef TARGET_X86_64
-                case OT_QUAD:
-                    tcg_gen_bswap64_tl(cpu_T[0], cpu_T[0]);
-                    break;
-#endif
-                }
-
-                if ((b & 1) == 0) {
-                    gen_op_mov_reg_T0(ot, reg);
-                } else {
-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
+                                       s->mem_index, ot | MO_BE);
                 }
                 break;
 
@@ -4041,10 +3754,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     || s->vex_l != 0) {
                     goto illegal_op;
                 }
-                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
-                gen_op_mov_reg_T0(ot, reg);
+                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 gen_op_update1_cc();
                 set_cc_op(s, CC_OP_LOGICB + ot);
                 break;
@@ -4055,7 +3768,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     || s->vex_l != 0) {
                     goto illegal_op;
                 }
-                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                ot = mo_64_32(s->dflag);
                 {
                     TCGv bound, zero;
 
@@ -4065,7 +3778,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
                     tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_A0);
 
-                    bound = tcg_const_tl(ot == OT_QUAD ? 63 : 31);
+                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     zero = tcg_const_tl(0);
                     tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T[0], cpu_A0, bound,
                                        cpu_T[0], zero);
@@ -4083,7 +3796,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_subi_tl(cpu_T[1], cpu_T[1], 1);
                     tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
 
-                    gen_op_mov_reg_T0(ot, reg);
+                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                     gen_op_update1_cc();
                     set_cc_op(s, CC_OP_LOGICB + ot);
                 }
@@ -4095,11 +3808,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     || s->vex_l != 0) {
                     goto illegal_op;
                 }
-                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
                 {
-                    TCGv bound = tcg_const_tl(ot == OT_QUAD ? 63 : 31);
+                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     /* Note that since we're using BMILG (in order to get O
                        cleared) we need to store the inverse into C.  */
                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
@@ -4111,7 +3824,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 tcg_gen_movi_tl(cpu_A0, -1);
                 tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T[1]);
                 tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_A0);
-                gen_op_mov_reg_T0(ot, reg);
+                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 gen_op_update1_cc();
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
@@ -4122,7 +3835,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     || s->vex_l != 0) {
                     goto illegal_op;
                 }
-                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 switch (ot) {
                 default:
@@ -4134,7 +3847,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
                     break;
 #ifdef TARGET_X86_64
-                case OT_QUAD:
+                case MO_64:
                     tcg_gen_mulu2_i64(cpu_regs[s->vex_v], cpu_regs[reg],
                                       cpu_T[0], cpu_regs[R_EDX]);
                     break;
@@ -4148,11 +3861,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     || s->vex_l != 0) {
                     goto illegal_op;
                 }
-                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
-                if (s->dflag == 2) {
+                if (ot == MO_64) {
                     tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
                 } else {
                     tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
@@ -4166,11 +3879,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     || s->vex_l != 0) {
                     goto illegal_op;
                 }
-                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
-                if (s->dflag == 2) {
+                if (ot == MO_64) {
                     tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
                 } else {
                     tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
@@ -4186,7 +3899,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     TCGv carry_in, carry_out, zero;
                     int end_op;
 
-                    ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
+                    ot = mo_64_32(s->dflag);
                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
                     /* Re-use the carry-out from a previous round.  */
@@ -4230,7 +3943,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                     switch (ot) {
 #ifdef TARGET_X86_64
-                    case OT_LONG:
+                    case MO_32:
                         /* If we know TL is 64-bit, and we want a 32-bit
                            result, just do everything in 64-bit arithmetic.  */
                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
@@ -4265,9 +3978,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     || s->vex_l != 0) {
                     goto illegal_op;
                 }
-                ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
+                ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                if (ot == OT_QUAD) {
+                if (ot == MO_64) {
                     tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
                 } else {
                     tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 31);
@@ -4275,17 +3988,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if (b == 0x1f7) {
                     tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
                 } else if (b == 0x2f7) {
-                    if (ot != OT_QUAD) {
+                    if (ot != MO_64) {
                         tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
                     }
                     tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
                 } else {
-                    if (ot != OT_QUAD) {
+                    if (ot != MO_64) {
                         tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
                     }
                     tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
                 }
-                gen_op_mov_reg_T0(ot, reg);
+                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 break;
 
             case 0x0f3:
@@ -4297,14 +4010,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     || s->vex_l != 0) {
                     goto illegal_op;
                 }
-                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
                     tcg_gen_neg_tl(cpu_T[1], cpu_T[0]);
                     tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                    gen_op_mov_reg_T0(ot, s->vex_v);
+                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T[0]);
                     gen_op_update2_cc();
                     set_cc_op(s, CC_OP_BMILGB + ot);
                     break;
@@ -4354,52 +4067,55 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto illegal_op;
 
             if (sse_fn_eppi == SSE_SPECIAL) {
-                ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+                ot = mo_64_32(s->dflag);
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
-                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
                 case 0x14: /* pextrb */
                     tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].XMM_B(val & 15)));
-                    if (mod == 3)
-                        gen_op_mov_reg_T0(ot, rm);
-                    else
-                        tcg_gen_qemu_st8(cpu_T[0], cpu_A0,
-                                        (s->mem_index >> 2) - 1);
+                    if (mod == 3) {
+                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                    } else {
+                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
+                                           s->mem_index, MO_UB);
+                    }
                     break;
                 case 0x15: /* pextrw */
                     tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].XMM_W(val & 7)));
-                    if (mod == 3)
-                        gen_op_mov_reg_T0(ot, rm);
-                    else
-                        tcg_gen_qemu_st16(cpu_T[0], cpu_A0,
-                                        (s->mem_index >> 2) - 1);
+                    if (mod == 3) {
+                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                    } else {
+                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
+                                           s->mem_index, MO_LEUW);
+                    }
                     break;
                 case 0x16:
-                    if (ot == OT_LONG) { /* pextrd */
+                    if (ot == MO_32) { /* pextrd */
                         tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].XMM_L(val & 3)));
-                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-                        if (mod == 3)
-                            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
-                        else
-                            tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
-                                            (s->mem_index >> 2) - 1);
+                        if (mod == 3) {
+                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
+                        } else {
+                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                                                s->mem_index, MO_LEUL);
+                        }
                     } else { /* pextrq */
 #ifdef TARGET_X86_64
                         tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].XMM_Q(val & 1)));
-                        if (mod == 3)
-                            gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64);
-                        else
-                            tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
-                                            (s->mem_index >> 2) - 1);
+                        if (mod == 3) {
+                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
+                        } else {
+                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                                                s->mem_index, MO_LEQ);
+                        }
 #else
                         goto illegal_op;
 #endif
@@ -4408,18 +4124,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 case 0x17: /* extractps */
                     tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].XMM_L(val & 3)));
-                    if (mod == 3)
-                        gen_op_mov_reg_T0(ot, rm);
-                    else
-                        tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
-                                        (s->mem_index >> 2) - 1);
+                    if (mod == 3) {
+                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                    } else {
+                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
+                                           s->mem_index, MO_LEUL);
+                    }
                     break;
                 case 0x20: /* pinsrb */
-                    if (mod == 3)
-                        gen_op_mov_TN_reg(OT_LONG, 0, rm);
-                    else
-                        tcg_gen_qemu_ld8u(cpu_T[0], cpu_A0,
-                                        (s->mem_index >> 2) - 1);
+                    if (mod == 3) {
+                        gen_op_mov_v_reg(MO_32, cpu_T[0], rm);
+                    } else {
+                        tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
+                                           s->mem_index, MO_UB);
+                    }
                     tcg_gen_st8_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].XMM_B(val & 15)));
                     break;
@@ -4429,9 +4147,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                         offsetof(CPUX86State,xmm_regs[rm]
                                                 .XMM_L((val >> 6) & 3)));
                     } else {
-                        tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
-                                        (s->mem_index >> 2) - 1);
-                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUL);
                     }
                     tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
                                     offsetof(CPUX86State,xmm_regs[reg]
@@ -4454,23 +4171,24 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                                 xmm_regs[reg].XMM_L(3)));
                     break;
                 case 0x22:
-                    if (ot == OT_LONG) { /* pinsrd */
-                        if (mod == 3)
-                            gen_op_mov_v_reg(ot, cpu_tmp0, rm);
-                        else
-                            tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
-                                            (s->mem_index >> 2) - 1);
-                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+                    if (ot == MO_32) { /* pinsrd */
+                        if (mod == 3) {
+                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
+                        } else {
+                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                                s->mem_index, MO_LEUL);
+                        }
                         tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].XMM_L(val & 3)));
                     } else { /* pinsrq */
 #ifdef TARGET_X86_64
-                        if (mod == 3)
+                        if (mod == 3) {
                             gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
-                        else
-                            tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
-                                            (s->mem_index >> 2) - 1);
+                        } else {
+                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                                                s->mem_index, MO_LEQ);
+                        }
                         tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].XMM_Q(val & 1)));
@@ -4489,8 +4207,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                    gen_ldo_env_A0(s->mem_index, op2_offset);
+                    gen_lea_modrm(env, s, modrm);
+                    gen_ldo_env_A0(s, op2_offset);
                 }
             } else {
                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
@@ -4498,8 +4216,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                    gen_ldq_env_A0(s->mem_index, op2_offset);
+                    gen_lea_modrm(env, s, modrm);
+                    gen_ldq_env_A0(s, op2_offset);
                 }
             }
             val = cpu_ldub_code(env, s->pc++);
@@ -4507,9 +4225,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
                 set_cc_op(s, CC_OP_EFLAGS);
 
-                if (s->dflag == 2)
+                if (s->dflag == MO_64) {
                     /* The helper must use entire 64-bit gp registers */
                     val |= 1 << 8;
+                }
             }
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -4530,17 +4249,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     || s->vex_l != 0) {
                     goto illegal_op;
                 }
-                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 b = cpu_ldub_code(env, s->pc++);
-                if (ot == OT_QUAD) {
+                if (ot == MO_64) {
                     tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
                 } else {
                     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                     tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
                     tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                 }
-                gen_op_mov_reg_T0(ot, reg);
+                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 break;
 
             default:
@@ -4565,21 +4284,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         if (is_xmm) {
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
                                 b == 0xc2)) {
                     /* specific case for SSE single instructions */
                     if (b1 == 2) {
                         /* 32 bit access */
-                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+                        gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
                         tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
                     } else {
                         /* 64 bit access */
-                        gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0)));
+                        gen_ldq_env_A0(s, offsetof(CPUX86State,
+                                                   xmm_t0.XMM_D(0)));
                     }
                 } else {
-                    gen_ldo_env_A0(s->mem_index, op2_offset);
+                    gen_ldo_env_A0(s, op2_offset);
                 }
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -4588,9 +4308,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         } else {
             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
-                gen_ldq_env_A0(s->mem_index, op2_offset);
+                gen_ldq_env_A0(s, op2_offset);
             } else {
                 rm = (modrm & 7);
                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
@@ -4633,16 +4353,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             /* maskmov : we must prepare A0 */
             if (mod != 3)
                 goto illegal_op;
-#ifdef TARGET_X86_64
-            if (s->aflag == 2) {
-                gen_op_movq_A0_reg(R_EDI);
-            } else
-#endif
-            {
-                gen_op_movl_A0_reg(R_EDI);
-                if (s->aflag == 0)
-                    gen_op_andl_A0_ffff();
-            }
+            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
+            gen_extu(s->aflag, cpu_A0);
             gen_add_A0_ds_seg(s);
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -4668,9 +4380,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                                target_ulong pc_start)
 {
-    int b, prefixes, aflag, dflag;
-    int shift, ot;
-    int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val;
+    int b, prefixes;
+    int shift;
+    TCGMemOp ot, aflag, dflag;
+    int modrm, reg, rm, mod, op, opreg, val;
     target_ulong next_eip, tval;
     int rex_w, rex_r;
 
@@ -4805,19 +4518,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
            over 0x66 if both are present.  */
-        dflag = (rex_w > 0 ? 2 : prefixes & PREFIX_DATA ? 0 : 1);
+        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
-        aflag = (prefixes & PREFIX_ADR ? 1 : 2);
+        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
     } else {
         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
-        dflag = s->code32;
-        if (prefixes & PREFIX_DATA) {
-            dflag ^= 1;
+        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
+            dflag = MO_32;
+        } else {
+            dflag = MO_16;
         }
         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
-        aflag = s->code32;
-        if (prefixes & PREFIX_ADR) {
-            aflag ^= 1;
+        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
+            aflag = MO_32;
+        }  else {
+            aflag = MO_16;
         }
     }
 
@@ -4853,10 +4568,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             op = (b >> 3) & 7;
             f = (b >> 1) & 3;
 
-            if ((b & 1) == 0)
-                ot = OT_BYTE;
-            else
-                ot = dflag + OT_WORD;
+            ot = mo_b_d(b, dflag);
 
             switch(f) {
             case 0: /* OP Ev, Gv */
@@ -4865,19 +4577,19 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm);
                     opreg = OR_TMP0;
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
                     /* xor reg, reg optimisation */
                     set_cc_op(s, CC_OP_CLR);
-                    gen_op_movl_T0_0();
-                    gen_op_mov_reg_T0(ot, reg);
+                    tcg_gen_movi_tl(cpu_T[0], 0);
+                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                     break;
                 } else {
                     opreg = rm;
                 }
-                gen_op_mov_TN_reg(ot, 1, reg);
+                gen_op_mov_v_reg(ot, cpu_T[1], reg);
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
@@ -4886,18 +4598,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                    gen_op_ld_T1_A0(ot + s->mem_index);
+                    gen_lea_modrm(env, s, modrm);
+                    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
                 } else {
-                    gen_op_mov_TN_reg(ot, 1, rm);
+                    gen_op_mov_v_reg(ot, cpu_T[1], rm);
                 }
                 gen_op(s, op, ot, reg);
                 break;
             case 2: /* OP A, Iv */
                 val = insn_get(env, s, ot);
-                gen_op_movl_T1_im(val);
+                tcg_gen_movi_tl(cpu_T[1], val);
                 gen_op(s, op, ot, OR_EAX);
                 break;
             }
@@ -4913,10 +4625,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         {
             int val;
 
-            if ((b & 1) == 0)
-                ot = OT_BYTE;
-            else
-                ot = dflag + OT_WORD;
+            ot = mo_b_d(b, dflag);
 
             modrm = cpu_ldub_code(env, s->pc++);
             mod = (modrm >> 6) & 3;
@@ -4928,7 +4637,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     s->rip_offset = 1;
                 else
                     s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
                 opreg = OR_TMP0;
             } else {
                 opreg = rm;
@@ -4942,10 +4651,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 val = insn_get(env, s, ot);
                 break;
             case 0x83:
-                val = (int8_t)insn_get(env, s, OT_BYTE);
+                val = (int8_t)insn_get(env, s, MO_8);
                 break;
             }
-            gen_op_movl_T1_im(val);
+            tcg_gen_movi_tl(cpu_T[1], val);
             gen_op(s, op, ot, opreg);
         }
         break;
@@ -4953,19 +4662,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         /**************************/
         /* inc, dec, and other misc arith */
     case 0x40 ... 0x47: /* inc Gv */
-        ot = dflag ? OT_LONG : OT_WORD;
+        ot = dflag;
         gen_inc(s, ot, OR_EAX + (b & 7), 1);
         break;
     case 0x48 ... 0x4f: /* dec Gv */
-        ot = dflag ? OT_LONG : OT_WORD;
+        ot = dflag;
         gen_inc(s, ot, OR_EAX + (b & 7), -1);
         break;
     case 0xf6: /* GRP3 */
     case 0xf7:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
 
         modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
@@ -4974,65 +4680,65 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (mod != 3) {
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-            gen_op_ld_T0_A0(ot + s->mem_index);
+            gen_lea_modrm(env, s, modrm);
+            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
-            gen_op_mov_TN_reg(ot, 0, rm);
+            gen_op_mov_v_reg(ot, cpu_T[0], rm);
         }
 
         switch(op) {
         case 0: /* test */
             val = insn_get(env, s, ot);
-            gen_op_movl_T1_im(val);
+            tcg_gen_movi_tl(cpu_T[1], val);
             gen_op_testl_T0_T1_cc();
             set_cc_op(s, CC_OP_LOGICB + ot);
             break;
         case 2: /* not */
             tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
             if (mod != 3) {
-                gen_op_st_T0_A0(ot + s->mem_index);
+                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
             } else {
-                gen_op_mov_reg_T0(ot, rm);
+                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             }
             break;
         case 3: /* neg */
             tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
             if (mod != 3) {
-                gen_op_st_T0_A0(ot + s->mem_index);
+                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
             } else {
-                gen_op_mov_reg_T0(ot, rm);
+                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             }
             gen_op_update_neg_cc();
             set_cc_op(s, CC_OP_SUBB + ot);
             break;
         case 4: /* mul */
             switch(ot) {
-            case OT_BYTE:
-                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
+            case MO_8:
+                gen_op_mov_v_reg(MO_8, cpu_T[1], R_EAX);
                 tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
                 tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
                 tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
                 set_cc_op(s, CC_OP_MULB);
                 break;
-            case OT_WORD:
-                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
+            case MO_16:
+                gen_op_mov_v_reg(MO_16, cpu_T[1], R_EAX);
                 tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
                 tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
                 tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
-                gen_op_mov_reg_T0(OT_WORD, R_EDX);
+                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
-            case OT_LONG:
+            case MO_32:
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
                 tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
@@ -5044,7 +4750,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
-            case OT_QUAD:
+            case MO_64:
                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
                                   cpu_T[0], cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
@@ -5056,34 +4762,34 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             break;
         case 5: /* imul */
             switch(ot) {
-            case OT_BYTE:
-                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
+            case MO_8:
+                gen_op_mov_v_reg(MO_8, cpu_T[1], R_EAX);
                 tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
                 tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
                 tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
                 tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
                 set_cc_op(s, CC_OP_MULB);
                 break;
-            case OT_WORD:
-                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
+            case MO_16:
+                gen_op_mov_v_reg(MO_16, cpu_T[1], R_EAX);
                 tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
                 tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
                 tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
                 tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
                 tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
-                gen_op_mov_reg_T0(OT_WORD, R_EDX);
+                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
-            case OT_LONG:
+            case MO_32:
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
                 tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
@@ -5097,7 +4803,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
-            case OT_QUAD:
+            case MO_64:
                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
                                   cpu_T[0], cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
@@ -5110,21 +4816,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             break;
         case 6: /* div */
             switch(ot) {
-            case OT_BYTE:
+            case MO_8:
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_divb_AL(cpu_env, cpu_T[0]);
                 break;
-            case OT_WORD:
+            case MO_16:
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_divw_AX(cpu_env, cpu_T[0]);
                 break;
             default:
-            case OT_LONG:
+            case MO_32:
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_divl_EAX(cpu_env, cpu_T[0]);
                 break;
 #ifdef TARGET_X86_64
-            case OT_QUAD:
+            case MO_64:
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_divq_EAX(cpu_env, cpu_T[0]);
                 break;
@@ -5133,21 +4839,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             break;
         case 7: /* idiv */
             switch(ot) {
-            case OT_BYTE:
+            case MO_8:
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_idivb_AL(cpu_env, cpu_T[0]);
                 break;
-            case OT_WORD:
+            case MO_16:
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_idivw_AX(cpu_env, cpu_T[0]);
                 break;
             default:
-            case OT_LONG:
+            case MO_32:
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_idivl_EAX(cpu_env, cpu_T[0]);
                 break;
 #ifdef TARGET_X86_64
-            case OT_QUAD:
+            case MO_64:
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_idivq_EAX(cpu_env, cpu_T[0]);
                 break;
@@ -5161,10 +4867,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
     case 0xfe: /* GRP4 */
     case 0xff: /* GRP5 */
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
 
         modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
@@ -5176,20 +4879,20 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (CODE64(s)) {
             if (op == 2 || op == 4) {
                 /* operand size for jumps is 64 bit */
-                ot = OT_QUAD;
+                ot = MO_64;
             } else if (op == 3 || op == 5) {
-                ot = dflag ? OT_LONG + (rex_w == 1) : OT_WORD;
+                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
             } else if (op == 6) {
                 /* default push size is 64 bit */
-                ot = dflag ? OT_QUAD : OT_WORD;
+                ot = mo_pushpop(s, dflag);
             }
         }
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             if (op >= 2 && op != 3 && op != 5)
-                gen_op_ld_T0_A0(ot + s->mem_index);
+                gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
-            gen_op_mov_TN_reg(ot, 0, rm);
+            gen_op_mov_v_reg(ot, cpu_T[0], rm);
         }
 
         switch(op) {
@@ -5209,44 +4912,46 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             break;
         case 2: /* call Ev */
             /* XXX: optimize if memory (no 'and' is necessary) */
-            if (s->dflag == 0)
-                gen_op_andl_T0_ffff();
+            if (dflag == MO_16) {
+                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+            }
             next_eip = s->pc - s->cs_base;
-            gen_movtl_T1_im(next_eip);
-            gen_push_T1(s);
-            gen_op_jmp_T0();
+            tcg_gen_movi_tl(cpu_T[1], next_eip);
+            gen_push_v(s, cpu_T[1]);
+            gen_op_jmp_v(cpu_T[0]);
             gen_eob(s);
             break;
         case 3: /* lcall Ev */
-            gen_op_ld_T1_A0(ot + s->mem_index);
-            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
-            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+            gen_add_A0_im(s, 1 << ot);
+            gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
         do_lcall:
             if (s->pe && !s->vm86) {
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
-                                           tcg_const_i32(dflag),
+                                           tcg_const_i32(dflag - 1),
                                            tcg_const_i32(s->pc - pc_start));
             } else {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T[1],
-                                      tcg_const_i32(dflag),
+                                      tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
             gen_eob(s);
             break;
         case 4: /* jmp Ev */
-            if (s->dflag == 0)
-                gen_op_andl_T0_ffff();
-            gen_op_jmp_T0();
+            if (dflag == MO_16) {
+                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+            }
+            gen_op_jmp_v(cpu_T[0]);
             gen_eob(s);
             break;
         case 5: /* ljmp Ev */
-            gen_op_ld_T1_A0(ot + s->mem_index);
-            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
-            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+            gen_add_A0_im(s, 1 << ot);
+            gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
                 gen_update_cc_op(s);
@@ -5256,13 +4961,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                                           tcg_const_i32(s->pc - pc_start));
             } else {
                 gen_op_movl_seg_T0_vm(R_CS);
-                gen_op_movl_T0_T1();
-                gen_op_jmp_T0();
+                gen_op_jmp_v(cpu_T[1]);
             }
             gen_eob(s);
             break;
         case 6: /* push Ev */
-            gen_push_T0(s);
+            gen_push_v(s, cpu_T[0]);
             break;
         default:
             goto illegal_op;
@@ -5271,76 +4975,80 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
     case 0x84: /* test Ev, Gv */
     case 0x85:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
 
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_TN_reg(ot, 1, reg);
+        gen_op_mov_v_reg(ot, cpu_T[1], reg);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
     case 0xa8: /* test eAX, Iv */
     case 0xa9:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
         val = insn_get(env, s, ot);
 
-        gen_op_mov_TN_reg(ot, 0, OR_EAX);
-        gen_op_movl_T1_im(val);
+        gen_op_mov_v_reg(ot, cpu_T[0], OR_EAX);
+        tcg_gen_movi_tl(cpu_T[1], val);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
     case 0x98: /* CWDE/CBW */
+        switch (dflag) {
 #ifdef TARGET_X86_64
-        if (dflag == 2) {
-            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+        case MO_64:
+            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
             tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
-            gen_op_mov_reg_T0(OT_QUAD, R_EAX);
-        } else
+            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T[0]);
+            break;
 #endif
-        if (dflag == 1) {
-            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
+        case MO_32:
+            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EAX);
             tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
-            gen_op_mov_reg_T0(OT_LONG, R_EAX);
-        } else {
-            gen_op_mov_TN_reg(OT_BYTE, 0, R_EAX);
+            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T[0]);
+            break;
+        case MO_16:
+            gen_op_mov_v_reg(MO_8, cpu_T[0], R_EAX);
             tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
-            gen_op_mov_reg_T0(OT_WORD, R_EAX);
+            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
+            break;
+        default:
+            tcg_abort();
         }
         break;
     case 0x99: /* CDQ/CWD */
+        switch (dflag) {
 #ifdef TARGET_X86_64
-        if (dflag == 2) {
-            gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
+        case MO_64:
+            gen_op_mov_v_reg(MO_64, cpu_T[0], R_EAX);
             tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
-            gen_op_mov_reg_T0(OT_QUAD, R_EDX);
-        } else
+            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T[0]);
+            break;
 #endif
-        if (dflag == 1) {
-            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+        case MO_32:
+            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
             tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
             tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
-            gen_op_mov_reg_T0(OT_LONG, R_EDX);
-        } else {
-            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
+            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T[0]);
+            break;
+        case MO_16:
+            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EAX);
             tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
             tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
-            gen_op_mov_reg_T0(OT_WORD, R_EDX);
+            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
+            break;
+        default:
+            tcg_abort();
         }
         break;
     case 0x1af: /* imul Gv, Ev */
     case 0x69: /* imul Gv, Ev, I */
     case 0x6b:
-        ot = dflag + OT_WORD;
+        ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (b == 0x69)
@@ -5350,23 +5058,23 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
-            gen_op_movl_T1_im(val);
+            tcg_gen_movi_tl(cpu_T[1], val);
         } else if (b == 0x6b) {
-            val = (int8_t)insn_get(env, s, OT_BYTE);
-            gen_op_movl_T1_im(val);
+            val = (int8_t)insn_get(env, s, MO_8);
+            tcg_gen_movi_tl(cpu_T[1], val);
         } else {
-            gen_op_mov_TN_reg(ot, 1, reg);
+            gen_op_mov_v_reg(ot, cpu_T[1], reg);
         }
         switch (ot) {
 #ifdef TARGET_X86_64
-        case OT_QUAD:
+        case MO_64:
             tcg_gen_muls2_i64(cpu_regs[reg], cpu_T[1], cpu_T[0], cpu_T[1]);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T[1]);
             break;
 #endif
-        case OT_LONG:
+        case MO_32:
             tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
             tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
             tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
@@ -5385,34 +5093,31 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
             tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
             tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-            gen_op_mov_reg_T0(ot, reg);
+            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
             break;
         }
         set_cc_op(s, CC_OP_MULB + ot);
         break;
     case 0x1c0:
     case 0x1c1: /* xadd Ev, Gv */
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_TN_reg(ot, 0, reg);
-            gen_op_mov_TN_reg(ot, 1, rm);
-            gen_op_addl_T0_T1();
-            gen_op_mov_reg_T1(ot, reg);
-            gen_op_mov_reg_T0(ot, rm);
+            gen_op_mov_v_reg(ot, cpu_T[0], reg);
+            gen_op_mov_v_reg(ot, cpu_T[1], rm);
+            tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
         } else {
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-            gen_op_mov_TN_reg(ot, 0, reg);
-            gen_op_ld_T1_A0(ot + s->mem_index);
-            gen_op_addl_T0_T1();
-            gen_op_st_T0_A0(ot + s->mem_index);
-            gen_op_mov_reg_T1(ot, reg);
+            gen_lea_modrm(env, s, modrm);
+            gen_op_mov_v_reg(ot, cpu_T[0], reg);
+            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+            tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         }
         gen_op_update2_cc();
         set_cc_op(s, CC_OP_ADDB + ot);
@@ -5423,10 +5128,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             int label1, label2;
             TCGv t0, t1, t2, a0;
 
-            if ((b & 1) == 0)
-                ot = OT_BYTE;
-            else
-                ot = dflag + OT_WORD;
+            ot = mo_b_d(b, dflag);
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
@@ -5439,9 +5141,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             } else {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
                 tcg_gen_mov_tl(a0, cpu_A0);
-                gen_op_ld_v(ot + s->mem_index, t0, a0);
+                gen_op_ld_v(s, ot, t0, a0);
                 rm = 0; /* avoid warning */
             }
             label1 = gen_new_label();
@@ -5459,11 +5161,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 /* perform no-op store cycle like physical cpu; must be
                    before changing accumulator to ensure idempotency if
                    the store faults and the instruction is restarted */
-                gen_op_st_v(ot + s->mem_index, t0, a0);
+                gen_op_st_v(s, ot, t0, a0);
                 gen_op_mov_reg_v(ot, R_EAX, t0);
                 tcg_gen_br(label2);
                 gen_set_label(label1);
-                gen_op_st_v(ot + s->mem_index, t1, a0);
+                gen_op_st_v(s, ot, t1, a0);
             }
             gen_set_label(label2);
             tcg_gen_mov_tl(cpu_cc_src, t0);
@@ -5482,12 +5184,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if ((mod == 3) || ((modrm & 0x38) != 0x8))
             goto illegal_op;
 #ifdef TARGET_X86_64
-        if (dflag == 2) {
+        if (dflag == MO_64) {
             if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
 #endif        
@@ -5496,7 +5198,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5505,19 +5207,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         /**************************/
         /* push/pop */
     case 0x50 ... 0x57: /* push */
-        gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s));
-        gen_push_T0(s);
+        gen_op_mov_v_reg(MO_32, cpu_T[0], (b & 7) | REX_B(s));
+        gen_push_v(s, cpu_T[0]);
         break;
     case 0x58 ... 0x5f: /* pop */
-        if (CODE64(s)) {
-            ot = dflag ? OT_QUAD : OT_WORD;
-        } else {
-            ot = dflag + OT_WORD;
-        }
-        gen_pop_T0(s);
+        ot = gen_pop_T0(s);
         /* NOTE: order is important for pop %sp */
-        gen_pop_update(s);
-        gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s));
+        gen_pop_update(s, ot);
+        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T[0]);
         break;
     case 0x60: /* pusha */
         if (CODE64(s))
@@ -5531,38 +5228,29 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0x68: /* push Iv */
     case 0x6a:
-        if (CODE64(s)) {
-            ot = dflag ? OT_QUAD : OT_WORD;
-        } else {
-            ot = dflag + OT_WORD;
-        }
+        ot = mo_pushpop(s, dflag);
         if (b == 0x68)
             val = insn_get(env, s, ot);
         else
-            val = (int8_t)insn_get(env, s, OT_BYTE);
-        gen_op_movl_T0_im(val);
-        gen_push_T0(s);
+            val = (int8_t)insn_get(env, s, MO_8);
+        tcg_gen_movi_tl(cpu_T[0], val);
+        gen_push_v(s, cpu_T[0]);
         break;
     case 0x8f: /* pop Ev */
-        if (CODE64(s)) {
-            ot = dflag ? OT_QUAD : OT_WORD;
-        } else {
-            ot = dflag + OT_WORD;
-        }
         modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
-        gen_pop_T0(s);
+        ot = gen_pop_T0(s);
         if (mod == 3) {
             /* NOTE: order is important for pop %sp */
-            gen_pop_update(s);
+            gen_pop_update(s, ot);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_T0(ot, rm);
+            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             s->popl_esp_hack = 0;
-            gen_pop_update(s);
+            gen_pop_update(s, ot);
         }
         break;
     case 0xc8: /* enter */
@@ -5577,23 +5265,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0xc9: /* leave */
         /* XXX: exception not precise (ESP is updated before potential exception) */
         if (CODE64(s)) {
-            gen_op_mov_TN_reg(OT_QUAD, 0, R_EBP);
-            gen_op_mov_reg_T0(OT_QUAD, R_ESP);
+            gen_op_mov_v_reg(MO_64, cpu_T[0], R_EBP);
+            gen_op_mov_reg_v(MO_64, R_ESP, cpu_T[0]);
         } else if (s->ss32) {
-            gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
-            gen_op_mov_reg_T0(OT_LONG, R_ESP);
-        } else {
-            gen_op_mov_TN_reg(OT_WORD, 0, R_EBP);
-            gen_op_mov_reg_T0(OT_WORD, R_ESP);
-        }
-        gen_pop_T0(s);
-        if (CODE64(s)) {
-            ot = dflag ? OT_QUAD : OT_WORD;
+            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
+            gen_op_mov_reg_v(MO_32, R_ESP, cpu_T[0]);
         } else {
-            ot = dflag + OT_WORD;
+            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EBP);
+            gen_op_mov_reg_v(MO_16, R_ESP, cpu_T[0]);
         }
-        gen_op_mov_reg_T0(ot, R_EBP);
-        gen_pop_update(s);
+        ot = gen_pop_T0(s);
+        gen_op_mov_reg_v(ot, R_EBP, cpu_T[0]);
+        gen_pop_update(s, ot);
         break;
     case 0x06: /* push es */
     case 0x0e: /* push cs */
@@ -5602,12 +5285,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (CODE64(s))
             goto illegal_op;
         gen_op_movl_T0_seg(b >> 3);
-        gen_push_T0(s);
+        gen_push_v(s, cpu_T[0]);
         break;
     case 0x1a0: /* push fs */
     case 0x1a8: /* push gs */
         gen_op_movl_T0_seg((b >> 3) & 7);
-        gen_push_T0(s);
+        gen_push_v(s, cpu_T[0]);
         break;
     case 0x07: /* pop es */
     case 0x17: /* pop ss */
@@ -5615,9 +5298,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (CODE64(s))
             goto illegal_op;
         reg = b >> 3;
-        gen_pop_T0(s);
+        ot = gen_pop_T0(s);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
-        gen_pop_update(s);
+        gen_pop_update(s, ot);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace. */
             /* If several instructions disable interrupts, only the
@@ -5633,9 +5316,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0x1a1: /* pop fs */
     case 0x1a9: /* pop gs */
-        gen_pop_T0(s);
+        ot = gen_pop_T0(s);
         gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base);
-        gen_pop_update(s);
+        gen_pop_update(s, ot);
         if (s->is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
@@ -5646,10 +5329,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         /* mov */
     case 0x88:
     case 0x89: /* mov Gv, Ev */
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
 
@@ -5658,41 +5338,36 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
         modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
         }
         val = insn_get(env, s, ot);
-        gen_op_movl_T0_im(val);
-        if (mod != 3)
-            gen_op_st_T0_A0(ot + s->mem_index);
-        else
-            gen_op_mov_reg_T0(ot, (modrm & 7) | REX_B(s));
+        tcg_gen_movi_tl(cpu_T[0], val);
+        if (mod != 3) {
+            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+        } else {
+            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T[0]);
+        }
         break;
     case 0x8a:
     case 0x8b: /* mov Ev, Gv */
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = OT_WORD + dflag;
+        ot = mo_b_d(b, dflag);
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_reg_T0(ot, reg);
+        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         break;
     case 0x8e: /* mov seg, Gv */
         modrm = cpu_ldub_code(env, s->pc++);
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
-        gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace */
@@ -5714,10 +5389,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (reg >= 6)
             goto illegal_op;
         gen_op_movl_T0_seg(reg);
-        if (mod == 3)
-            ot = OT_WORD + dflag;
-        else
-            ot = OT_WORD;
+        ot = mod == 3 ? dflag : MO_16;
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
         break;
 
@@ -5726,48 +5398,49 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x1be: /* movsbS Gv, Eb */
     case 0x1bf: /* movswS Gv, Eb */
         {
-            int d_ot;
+            TCGMemOp d_ot;
+            TCGMemOp s_ot;
+
             /* d_ot is the size of destination */
-            d_ot = dflag + OT_WORD;
+            d_ot = dflag;
             /* ot is the size of source */
-            ot = (b & 1) + OT_BYTE;
+            ot = (b & 1) + MO_8;
+            /* s_ot is the sign+size of source */
+            s_ot = b & 8 ? MO_SIGN | ot : ot;
+
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_TN_reg(ot, 0, rm);
-                switch(ot | (b & 8)) {
-                case OT_BYTE:
+                gen_op_mov_v_reg(ot, cpu_T[0], rm);
+                switch (s_ot) {
+                case MO_UB:
                     tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
                     break;
-                case OT_BYTE | 8:
+                case MO_SB:
                     tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
                     break;
-                case OT_WORD:
+                case MO_UW:
                     tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
                     break;
                 default:
-                case OT_WORD | 8:
+                case MO_SW:
                     tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
                     break;
                 }
-                gen_op_mov_reg_T0(d_ot, reg);
+                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                if (b & 8) {
-                    gen_op_lds_T0_A0(ot + s->mem_index);
-                } else {
-                    gen_op_ldu_T0_A0(ot + s->mem_index);
-                }
-                gen_op_mov_reg_T0(d_ot, reg);
+                gen_lea_modrm(env, s, modrm);
+                gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
+                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
         }
         break;
 
     case 0x8d: /* lea */
-        ot = dflag + OT_WORD;
+        ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
@@ -5777,9 +5450,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         s->override = -1;
         val = s->addseg;
         s->addseg = 0;
-        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+        gen_lea_modrm(env, s, modrm);
         s->addseg = val;
-        gen_op_mov_reg_A0(ot - OT_WORD, reg);
+        gen_op_mov_reg_v(ot, reg, cpu_A0);
         break;
 
     case 0xa0: /* mov EAX, Ov */
@@ -5789,117 +5462,94 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         {
             target_ulong offset_addr;
 
-            if ((b & 1) == 0)
-                ot = OT_BYTE;
-            else
-                ot = dflag + OT_WORD;
+            ot = mo_b_d(b, dflag);
+            switch (s->aflag) {
 #ifdef TARGET_X86_64
-            if (s->aflag == 2) {
+            case MO_64:
                 offset_addr = cpu_ldq_code(env, s->pc);
                 s->pc += 8;
-                gen_op_movq_A0_im(offset_addr);
-            } else
+                break;
 #endif
-            {
-                if (s->aflag) {
-                    offset_addr = insn_get(env, s, OT_LONG);
-                } else {
-                    offset_addr = insn_get(env, s, OT_WORD);
-                }
-                gen_op_movl_A0_im(offset_addr);
+            default:
+                offset_addr = insn_get(env, s, s->aflag);
+                break;
             }
+            tcg_gen_movi_tl(cpu_A0, offset_addr);
             gen_add_A0_ds_seg(s);
             if ((b & 2) == 0) {
-                gen_op_ld_T0_A0(ot + s->mem_index);
-                gen_op_mov_reg_T0(ot, R_EAX);
+                gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+                gen_op_mov_reg_v(ot, R_EAX, cpu_T[0]);
             } else {
-                gen_op_mov_TN_reg(ot, 0, R_EAX);
-                gen_op_st_T0_A0(ot + s->mem_index);
+                gen_op_mov_v_reg(ot, cpu_T[0], R_EAX);
+                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
             }
         }
         break;
     case 0xd7: /* xlat */
-#ifdef TARGET_X86_64
-        if (s->aflag == 2) {
-            gen_op_movq_A0_reg(R_EBX);
-            gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
-            tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff);
-            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
-        } else
-#endif
-        {
-            gen_op_movl_A0_reg(R_EBX);
-            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
-            tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff);
-            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
-            if (s->aflag == 0)
-                gen_op_andl_A0_ffff();
-            else
-                tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
-        }
+        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
+        tcg_gen_ext8u_tl(cpu_T[0], cpu_regs[R_EAX]);
+        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
+        gen_extu(s->aflag, cpu_A0);
         gen_add_A0_ds_seg(s);
-        gen_op_ldu_T0_A0(OT_BYTE + s->mem_index);
-        gen_op_mov_reg_T0(OT_BYTE, R_EAX);
+        gen_op_ld_v(s, MO_8, cpu_T[0], cpu_A0);
+        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T[0]);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
-        val = insn_get(env, s, OT_BYTE);
-        gen_op_movl_T0_im(val);
-        gen_op_mov_reg_T0(OT_BYTE, (b & 7) | REX_B(s));
+        val = insn_get(env, s, MO_8);
+        tcg_gen_movi_tl(cpu_T[0], val);
+        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T[0]);
         break;
     case 0xb8 ... 0xbf: /* mov R, Iv */
 #ifdef TARGET_X86_64
-        if (dflag == 2) {
+        if (dflag == MO_64) {
             uint64_t tmp;
             /* 64 bit case */
             tmp = cpu_ldq_code(env, s->pc);
             s->pc += 8;
             reg = (b & 7) | REX_B(s);
-            gen_movtl_T0_im(tmp);
-            gen_op_mov_reg_T0(OT_QUAD, reg);
+            tcg_gen_movi_tl(cpu_T[0], tmp);
+            gen_op_mov_reg_v(MO_64, reg, cpu_T[0]);
         } else
 #endif
         {
-            ot = dflag ? OT_LONG : OT_WORD;
+            ot = dflag;
             val = insn_get(env, s, ot);
             reg = (b & 7) | REX_B(s);
-            gen_op_movl_T0_im(val);
-            gen_op_mov_reg_T0(ot, reg);
+            tcg_gen_movi_tl(cpu_T[0], val);
+            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         }
         break;
 
     case 0x91 ... 0x97: /* xchg R, EAX */
     do_xchg_reg_eax:
-        ot = dflag + OT_WORD;
+        ot = dflag;
         reg = (b & 7) | REX_B(s);
         rm = R_EAX;
         goto do_xchg_reg;
     case 0x86:
     case 0x87: /* xchg Ev, Gv */
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
         do_xchg_reg:
-            gen_op_mov_TN_reg(ot, 0, reg);
-            gen_op_mov_TN_reg(ot, 1, rm);
-            gen_op_mov_reg_T0(ot, rm);
-            gen_op_mov_reg_T1(ot, reg);
+            gen_op_mov_v_reg(ot, cpu_T[0], reg);
+            gen_op_mov_v_reg(ot, cpu_T[1], rm);
+            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         } else {
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-            gen_op_mov_TN_reg(ot, 0, reg);
+            gen_lea_modrm(env, s, modrm);
+            gen_op_mov_v_reg(ot, cpu_T[0], reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
                 gen_helper_lock();
-            gen_op_ld_T1_A0(ot + s->mem_index);
-            gen_op_st_T0_A0(ot + s->mem_index);
+            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
             if (!(prefixes & PREFIX_LOCK))
                 gen_helper_unlock();
-            gen_op_mov_reg_T1(ot, reg);
+            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         }
         break;
     case 0xc4: /* les Gv */
@@ -5919,20 +5569,20 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x1b5: /* lgs Gv */
         op = R_GS;
     do_lxx:
-        ot = dflag ? OT_LONG : OT_WORD;
+        ot = dflag != MO_16 ? MO_32 : MO_16;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-        gen_op_ld_T1_A0(ot + s->mem_index);
-        gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+        gen_lea_modrm(env, s, modrm);
+        gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+        gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
-        gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+        gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
         gen_movl_seg_T0(s, op, pc_start - s->cs_base);
         /* then put the data */
-        gen_op_mov_reg_T1(ot, reg);
+        gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         if (s->is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
@@ -5947,11 +5597,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         shift = 2;
     grp2:
         {
-            if ((b & 1) == 0)
-                ot = OT_BYTE;
-            else
-                ot = dflag + OT_WORD;
-
+            ot = mo_b_d(b, dflag);
             modrm = cpu_ldub_code(env, s->pc++);
             mod = (modrm >> 6) & 3;
             op = (modrm >> 3) & 7;
@@ -5960,7 +5606,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 if (shift == 2) {
                     s->rip_offset = 1;
                 }
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
                 opreg = OR_TMP0;
             } else {
                 opreg = (modrm & 7) | REX_B(s);
@@ -6004,18 +5650,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         op = 1;
         shift = 0;
     do_shiftd:
-        ot = dflag + OT_WORD;
+        ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             opreg = OR_TMP0;
         } else {
             opreg = rm;
         }
-        gen_op_mov_TN_reg(ot, 1, reg);
+        gen_op_mov_v_reg(ot, cpu_T[1], reg);
 
         if (shift) {
             TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
@@ -6041,7 +5687,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -6053,24 +5699,24 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
                     switch(op >> 4) {
                     case 0:
-                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
-                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUL);
                         gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 1:
-                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
-                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUL);
                         gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
-                                          (s->mem_index >> 2) - 1);
+                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                                            s->mem_index, MO_LEQ);
                         gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
                         break;
                     case 3:
                     default:
-                        gen_op_lds_T0_A0(OT_WORD + s->mem_index);
-                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LESW);
                         gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     }
@@ -6092,24 +5738,24 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 case 0:
                     switch(op >> 4) {
                     case 0:
-                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
-                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUL);
                         gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 1:
-                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
-                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUL);
                         gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
-                                          (s->mem_index >> 2) - 1);
+                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                                            s->mem_index, MO_LEQ);
                         gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
                         break;
                     case 3:
                     default:
-                        gen_op_lds_T0_A0(OT_WORD + s->mem_index);
-                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LESW);
                         gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     }
@@ -6119,19 +5765,19 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     switch(op >> 4) {
                     case 1:
                         gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-                        gen_op_st_T0_A0(OT_LONG + s->mem_index);
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUL);
                         break;
                     case 2:
                         gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
-                                          (s->mem_index >> 2) - 1);
+                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                                            s->mem_index, MO_LEQ);
                         break;
                     case 3:
                     default:
                         gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-                        gen_op_st_T0_A0(OT_WORD + s->mem_index);
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUW);
                         break;
                     }
                     gen_helper_fpop(cpu_env);
@@ -6140,24 +5786,24 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     switch(op >> 4) {
                     case 0:
                         gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-                        gen_op_st_T0_A0(OT_LONG + s->mem_index);
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUL);
                         break;
                     case 1:
                         gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-                        gen_op_st_T0_A0(OT_LONG + s->mem_index);
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUL);
                         break;
                     case 2:
                         gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
-                                          (s->mem_index >> 2) - 1);
+                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                                            s->mem_index, MO_LEQ);
                         break;
                     case 3:
                     default:
                         gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-                        gen_op_st_T0_A0(OT_WORD + s->mem_index);
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                                            s->mem_index, MO_LEUW);
                         break;
                     }
                     if ((op & 7) == 3)
@@ -6168,22 +5814,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             case 0x0c: /* fldenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0d: /* fldcw mem */
-                gen_op_ld_T0_A0(OT_WORD + s->mem_index);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                    s->mem_index, MO_LEUW);
                 gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
                 break;
             case 0x0e: /* fnstenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-                gen_op_st_T0_A0(OT_WORD + s->mem_index);
+                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                                    s->mem_index, MO_LEUW);
                 break;
             case 0x1d: /* fldt mem */
                 gen_update_cc_op(s);
@@ -6199,17 +5845,17 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             case 0x2c: /* frstor mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2e: /* fnsave mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-                gen_op_st_T0_A0(OT_WORD + s->mem_index);
+                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                                    s->mem_index, MO_LEUW);
                 break;
             case 0x3c: /* fbld */
                 gen_update_cc_op(s);
@@ -6223,14 +5869,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x3d: /* fildll */
-                tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
-                                  (s->mem_index >> 2) - 1);
+                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
                 gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
                 break;
             case 0x3f: /* fistpll */
                 gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
-                tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
-                                  (s->mem_index >> 2) - 1);
+                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
                 gen_helper_fpop(cpu_env);
                 break;
             default:
@@ -6496,7 +6140,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 case 0:
                     gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
                     tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-                    gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
                     break;
                 default:
                     goto illegal_op;
@@ -6553,11 +6197,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
     case 0xa4: /* movsS */
     case 0xa5:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
-
+        ot = mo_b_d(b, dflag);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
         } else {
@@ -6567,11 +6207,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
     case 0xaa: /* stosS */
     case 0xab:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
-
+        ot = mo_b_d(b, dflag);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
         } else {
@@ -6580,10 +6216,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0xac: /* lodsS */
     case 0xad:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
         } else {
@@ -6592,10 +6225,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0xae: /* scasS */
     case 0xaf:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
         if (prefixes & PREFIX_REPNZ) {
             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
         } else if (prefixes & PREFIX_REPZ) {
@@ -6607,10 +6237,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
     case 0xa6: /* cmpsS */
     case 0xa7:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag + OT_WORD;
+        ot = mo_b_d(b, dflag);
         if (prefixes & PREFIX_REPNZ) {
             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
         } else if (prefixes & PREFIX_REPZ) {
@@ -6621,12 +6248,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0x6c: /* insS */
     case 0x6d:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag ? OT_LONG : OT_WORD;
-        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
-        gen_op_andl_T0_ffff();
+        ot = mo_b_d32(b, dflag);
+        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base, 
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -6640,12 +6263,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0x6e: /* outsS */
     case 0x6f:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag ? OT_LONG : OT_WORD;
-        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
-        gen_op_andl_T0_ffff();
+        ot = mo_b_d32(b, dflag);
+        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -6663,19 +6282,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
     case 0xe4:
     case 0xe5:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag ? OT_LONG : OT_WORD;
+        ot = mo_b_d32(b, dflag);
         val = cpu_ldub_code(env, s->pc++);
-        gen_op_movl_T0_im(val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
         if (use_icount)
             gen_io_start();
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        tcg_gen_movi_i32(cpu_tmp2_i32, val);
         gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
-        gen_op_mov_reg_T1(ot, R_EAX);
+        gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
         if (use_icount) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6683,19 +6298,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0xe6:
     case 0xe7:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag ? OT_LONG : OT_WORD;
+        ot = mo_b_d32(b, dflag);
         val = cpu_ldub_code(env, s->pc++);
-        gen_op_movl_T0_im(val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_TN_reg(ot, 1, R_EAX);
+        gen_op_mov_v_reg(ot, cpu_T[1], R_EAX);
 
         if (use_icount)
             gen_io_start();
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        tcg_gen_movi_i32(cpu_tmp2_i32, val);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
         if (use_icount) {
@@ -6705,19 +6316,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0xec:
     case 0xed:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag ? OT_LONG : OT_WORD;
-        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
-        gen_op_andl_T0_ffff();
+        ot = mo_b_d32(b, dflag);
+        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
         if (use_icount)
             gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
-        gen_op_mov_reg_T1(ot, R_EAX);
+        gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
         if (use_icount) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6725,15 +6332,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0xee:
     case 0xef:
-        if ((b & 1) == 0)
-            ot = OT_BYTE;
-        else
-            ot = dflag ? OT_LONG : OT_WORD;
-        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
-        gen_op_andl_T0_ffff();
+        ot = mo_b_d32(b, dflag);
+        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_TN_reg(ot, 1, R_EAX);
+        gen_op_mov_v_reg(ot, cpu_T[1], R_EAX);
 
         if (use_icount)
             gen_io_start();
@@ -6751,21 +6354,17 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0xc2: /* ret im */
         val = cpu_ldsw_code(env, s->pc);
         s->pc += 2;
-        gen_pop_T0(s);
-        if (CODE64(s) && s->dflag)
-            s->dflag = 2;
-        gen_stack_update(s, val + (2 << s->dflag));
-        if (s->dflag == 0)
-            gen_op_andl_T0_ffff();
-        gen_op_jmp_T0();
+        ot = gen_pop_T0(s);
+        gen_stack_update(s, val + (1 << ot));
+        /* Note that gen_pop_T0 uses a zero-extending load.  */
+        gen_op_jmp_v(cpu_T[0]);
         gen_eob(s);
         break;
     case 0xc3: /* ret */
-        gen_pop_T0(s);
-        gen_pop_update(s);
-        if (s->dflag == 0)
-            gen_op_andl_T0_ffff();
-        gen_op_jmp_T0();
+        ot = gen_pop_T0(s);
+        gen_pop_update(s, ot);
+        /* Note that gen_pop_T0 uses a zero-extending load.  */
+        gen_op_jmp_v(cpu_T[0]);
         gen_eob(s);
         break;
     case 0xca: /* lret im */
@@ -6775,23 +6374,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (s->pe && !s->vm86) {
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_lret_protected(cpu_env, tcg_const_i32(s->dflag),
+            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(val));
         } else {
             gen_stack_A0(s);
             /* pop offset */
-            gen_op_ld_T0_A0(1 + s->dflag + s->mem_index);
-            if (s->dflag == 0)
-                gen_op_andl_T0_ffff();
+            gen_op_ld_v(s, dflag, cpu_T[0], cpu_A0);
             /* NOTE: keeping EIP updated is not a problem in case of
                exception */
-            gen_op_jmp_T0();
+            gen_op_jmp_v(cpu_T[0]);
             /* pop selector */
-            gen_op_addl_A0_im(2 << s->dflag);
-            gen_op_ld_T0_A0(1 + s->dflag + s->mem_index);
+            gen_op_addl_A0_im(1 << dflag);
+            gen_op_ld_v(s, dflag, cpu_T[0], cpu_A0);
             gen_op_movl_seg_T0_vm(R_CS);
             /* add stack offset */
-            gen_stack_update(s, val + (4 << s->dflag));
+            gen_stack_update(s, val + (2 << dflag));
         }
         gen_eob(s);
         break;
@@ -6802,19 +6399,19 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
         if (!s->pe) {
             /* real mode */
-            gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag));
+            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
             set_cc_op(s, CC_OP_EFLAGS);
         } else if (s->vm86) {
             if (s->iopl != 3) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
-                gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag));
+                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
                 set_cc_op(s, CC_OP_EFLAGS);
             }
         } else {
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_iret_protected(cpu_env, tcg_const_i32(s->dflag),
+            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             set_cc_op(s, CC_OP_EFLAGS);
         }
@@ -6822,18 +6419,20 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0xe8: /* call im */
         {
-            if (dflag)
-                tval = (int32_t)insn_get(env, s, OT_LONG);
-            else
-                tval = (int16_t)insn_get(env, s, OT_WORD);
+            if (dflag != MO_16) {
+                tval = (int32_t)insn_get(env, s, MO_32);
+            } else {
+                tval = (int16_t)insn_get(env, s, MO_16);
+            }
             next_eip = s->pc - s->cs_base;
             tval += next_eip;
-            if (s->dflag == 0)
+            if (dflag == MO_16) {
                 tval &= 0xffff;
-            else if(!CODE64(s))
+            } else if (!CODE64(s)) {
                 tval &= 0xffffffff;
-            gen_movtl_T0_im(next_eip);
-            gen_push_T0(s);
+            }
+            tcg_gen_movi_tl(cpu_T[0], next_eip);
+            gen_push_v(s, cpu_T[0]);
             gen_jmp(s, tval);
         }
         break;
@@ -6843,24 +6442,26 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
             if (CODE64(s))
                 goto illegal_op;
-            ot = dflag ? OT_LONG : OT_WORD;
+            ot = dflag;
             offset = insn_get(env, s, ot);
-            selector = insn_get(env, s, OT_WORD);
+            selector = insn_get(env, s, MO_16);
 
-            gen_op_movl_T0_im(selector);
-            gen_op_movl_T1_imu(offset);
+            tcg_gen_movi_tl(cpu_T[0], selector);
+            tcg_gen_movi_tl(cpu_T[1], offset);
         }
         goto do_lcall;
     case 0xe9: /* jmp im */
-        if (dflag)
-            tval = (int32_t)insn_get(env, s, OT_LONG);
-        else
-            tval = (int16_t)insn_get(env, s, OT_WORD);
+        if (dflag != MO_16) {
+            tval = (int32_t)insn_get(env, s, MO_32);
+        } else {
+            tval = (int16_t)insn_get(env, s, MO_16);
+        }
         tval += s->pc - s->cs_base;
-        if (s->dflag == 0)
+        if (dflag == MO_16) {
             tval &= 0xffff;
-        else if(!CODE64(s))
+        } else if (!CODE64(s)) {
             tval &= 0xffffffff;
+        }
         gen_jmp(s, tval);
         break;
     case 0xea: /* ljmp im */
@@ -6869,48 +6470,50 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
             if (CODE64(s))
                 goto illegal_op;
-            ot = dflag ? OT_LONG : OT_WORD;
+            ot = dflag;
             offset = insn_get(env, s, ot);
-            selector = insn_get(env, s, OT_WORD);
+            selector = insn_get(env, s, MO_16);
 
-            gen_op_movl_T0_im(selector);
-            gen_op_movl_T1_imu(offset);
+            tcg_gen_movi_tl(cpu_T[0], selector);
+            tcg_gen_movi_tl(cpu_T[1], offset);
         }
         goto do_ljmp;
     case 0xeb: /* jmp Jb */
-        tval = (int8_t)insn_get(env, s, OT_BYTE);
+        tval = (int8_t)insn_get(env, s, MO_8);
         tval += s->pc - s->cs_base;
-        if (s->dflag == 0)
+        if (dflag == MO_16) {
             tval &= 0xffff;
+        }
         gen_jmp(s, tval);
         break;
     case 0x70 ... 0x7f: /* jcc Jb */
-        tval = (int8_t)insn_get(env, s, OT_BYTE);
+        tval = (int8_t)insn_get(env, s, MO_8);
         goto do_jcc;
     case 0x180 ... 0x18f: /* jcc Jv */
-        if (dflag) {
-            tval = (int32_t)insn_get(env, s, OT_LONG);
+        if (dflag != MO_16) {
+            tval = (int32_t)insn_get(env, s, MO_32);
         } else {
-            tval = (int16_t)insn_get(env, s, OT_WORD);
+            tval = (int16_t)insn_get(env, s, MO_16);
         }
     do_jcc:
         next_eip = s->pc - s->cs_base;
         tval += next_eip;
-        if (s->dflag == 0)
+        if (dflag == MO_16) {
             tval &= 0xffff;
+        }
         gen_jcc(s, b, tval, next_eip);
         break;
 
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = cpu_ldub_code(env, s->pc++);
         gen_setcc1(s, b, cpu_T[0]);
-        gen_ldst_modrm(env, s, modrm, OT_BYTE, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         if (!(s->cpuid_features & CPUID_CMOV)) {
             goto illegal_op;
         }
-        ot = dflag + OT_WORD;
+        ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         gen_cmovcc1(env, s, ot, b, modrm, reg);
@@ -6925,7 +6528,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         } else {
             gen_update_cc_op(s);
             gen_helper_read_eflags(cpu_T[0], cpu_env);
-            gen_push_T0(s);
+            gen_push_v(s, cpu_T[0]);
         }
         break;
     case 0x9d: /* popf */
@@ -6933,9 +6536,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (s->vm86 && s->iopl != 3) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            gen_pop_T0(s);
+            ot = gen_pop_T0(s);
             if (s->cpl == 0) {
-                if (s->dflag) {
+                if (dflag != MO_16) {
                     gen_helper_write_eflags(cpu_env, cpu_T[0],
                                             tcg_const_i32((TF_MASK | AC_MASK |
                                                            ID_MASK | NT_MASK |
@@ -6950,7 +6553,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
             } else {
                 if (s->cpl <= s->iopl) {
-                    if (s->dflag) {
+                    if (dflag != MO_16) {
                         gen_helper_write_eflags(cpu_env, cpu_T[0],
                                                 tcg_const_i32((TF_MASK |
                                                                AC_MASK |
@@ -6967,7 +6570,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                                                               & 0xffff));
                     }
                 } else {
-                    if (s->dflag) {
+                    if (dflag != MO_16) {
                         gen_helper_write_eflags(cpu_env, cpu_T[0],
                                            tcg_const_i32((TF_MASK | AC_MASK |
                                                           ID_MASK | NT_MASK)));
@@ -6979,7 +6582,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     }
                 }
             }
-            gen_pop_update(s);
+            gen_pop_update(s, ot);
             set_cc_op(s, CC_OP_EFLAGS);
             /* abort translation because TF/AC flag may change */
             gen_jmp_im(s->pc - s->cs_base);
@@ -6989,7 +6592,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x9e: /* sahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
+        gen_op_mov_v_reg(MO_8, cpu_T[0], R_AH);
         gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
@@ -7001,7 +6604,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_compute_eflags(s);
         /* Note: gen_compute_eflags() only gives the condition codes */
         tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
-        gen_op_mov_reg_T0(OT_BYTE, R_AH);
+        gen_op_mov_reg_v(MO_8, R_AH, cpu_T[0]);
         break;
     case 0xf5: /* cmc */
         gen_compute_eflags(s);
@@ -7027,21 +6630,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         /************************/
         /* bit operations */
     case 0x1ba: /* bt/bts/btr/btc Gv, im */
-        ot = dflag + OT_WORD;
+        ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         op = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
             s->rip_offset = 1;
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-            gen_op_ld_T0_A0(ot + s->mem_index);
+            gen_lea_modrm(env, s, modrm);
+            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
-            gen_op_mov_TN_reg(ot, 0, rm);
+            gen_op_mov_v_reg(ot, cpu_T[0], rm);
         }
         /* load shift */
         val = cpu_ldub_code(env, s->pc++);
-        gen_op_movl_T1_im(val);
+        tcg_gen_movi_tl(cpu_T[1], val);
         if (op < 4)
             goto illegal_op;
         op -= 4;
@@ -7058,22 +6661,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x1bb: /* btc */
         op = 3;
     do_btx:
-        ot = dflag + OT_WORD;
+        ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
-        gen_op_mov_TN_reg(OT_LONG, 1, reg);
+        gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T[1]);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
             tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
             tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-            gen_op_ld_T0_A0(ot + s->mem_index);
+            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
-            gen_op_mov_TN_reg(ot, 0, rm);
+            gen_op_mov_v_reg(ot, cpu_T[0], rm);
         }
     bt_op:
         tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
@@ -7105,17 +6708,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
         set_cc_op(s, CC_OP_SARB + ot);
         if (op != 0) {
-            if (mod != 3)
-                gen_op_st_T0_A0(ot + s->mem_index);
-            else
-                gen_op_mov_reg_T0(ot, rm);
+            if (mod != 3) {
+                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+            } else {
+                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+            }
             tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
             tcg_gen_movi_tl(cpu_cc_dst, 0);
         }
         break;
     case 0x1bc: /* bsf / tzcnt */
     case 0x1bd: /* bsr / lzcnt */
-        ot = dflag + OT_WORD;
+        ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
@@ -7164,7 +6768,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[0], cpu_cc_dst, cpu_tmp0,
                                cpu_regs[reg], cpu_T[0]);
         }
-        gen_op_mov_reg_T0(ot, reg);
+        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         break;
         /************************/
         /* bcd */
@@ -7314,17 +6918,17 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x62: /* bound */
         if (CODE64(s))
             goto illegal_op;
-        ot = dflag ? OT_LONG : OT_WORD;
+        ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_op_mov_TN_reg(ot, 0, reg);
-        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+        gen_op_mov_v_reg(ot, cpu_T[0], reg);
+        gen_lea_modrm(env, s, modrm);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        if (ot == OT_WORD) {
+        if (ot == MO_16) {
             gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
         } else {
             gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
@@ -7333,17 +6937,17 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x1c8 ... 0x1cf: /* bswap reg */
         reg = (b & 7) | REX_B(s);
 #ifdef TARGET_X86_64
-        if (dflag == 2) {
-            gen_op_mov_TN_reg(OT_QUAD, 0, reg);
+        if (dflag == MO_64) {
+            gen_op_mov_v_reg(MO_64, cpu_T[0], reg);
             tcg_gen_bswap64_i64(cpu_T[0], cpu_T[0]);
-            gen_op_mov_reg_T0(OT_QUAD, reg);
+            gen_op_mov_reg_v(MO_64, reg, cpu_T[0]);
         } else
 #endif
         {
-            gen_op_mov_TN_reg(OT_LONG, 0, reg);
+            gen_op_mov_v_reg(MO_32, cpu_T[0], reg);
             tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
             tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
-            gen_op_mov_reg_T0(OT_LONG, reg);
+            gen_op_mov_reg_v(MO_32, reg, cpu_T[0]);
         }
         break;
     case 0xd6: /* salc */
@@ -7351,7 +6955,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             goto illegal_op;
         gen_compute_eflags_c(s, cpu_T[0]);
         tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
-        gen_op_mov_reg_T0(OT_BYTE, R_EAX);
+        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T[0]);
         break;
     case 0xe0: /* loopnz */
     case 0xe1: /* loopz */
@@ -7360,11 +6964,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         {
             int l1, l2, l3;
 
-            tval = (int8_t)insn_get(env, s, OT_BYTE);
+            tval = (int8_t)insn_get(env, s, MO_8);
             next_eip = s->pc - s->cs_base;
             tval += next_eip;
-            if (s->dflag == 0)
+            if (dflag == MO_16) {
                 tval &= 0xffff;
+            }
 
             l1 = gen_new_label();
             l2 = gen_new_label();
@@ -7449,7 +7054,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         } else {
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag));
+            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
             gen_eob(s);
         }
         break;
@@ -7467,7 +7072,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         } else {
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_sysret(cpu_env, tcg_const_i32(s->dflag));
+            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
             /* condition codes are modified only in long mode */
             if (s->lma) {
                 set_cc_op(s, CC_OP_EFLAGS);
@@ -7501,9 +7106,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,ldt.selector));
-            ot = OT_WORD;
-            if (mod == 3)
-                ot += s->dflag;
+            ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
         case 2: /* lldt */
@@ -7513,7 +7116,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
-                gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
@@ -7524,9 +7127,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,tr.selector));
-            ot = OT_WORD;
-            if (mod == 3)
-                ot += s->dflag;
+            ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
         case 3: /* ltr */
@@ -7536,7 +7137,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
-                gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
@@ -7546,7 +7147,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         case 5: /* verw */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             gen_update_cc_op(s);
             if (op == 4) {
                 gen_helper_verr(cpu_env, cpu_T[0]);
@@ -7569,14 +7170,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (mod == 3)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit));
-            gen_op_st_T0_A0(OT_WORD + s->mem_index);
+            gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
             gen_add_A0_im(s, 2);
             tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base));
-            if (!s->dflag)
-                gen_op_andl_T0_im(0xffffff);
-            gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+            if (dflag == MO_16) {
+                tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
+            }
+            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
             break;
         case 1:
             if (mod == 3) {
@@ -7587,16 +7189,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                         goto illegal_op;
                     gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
-#ifdef TARGET_X86_64
-                    if (s->aflag == 2) {
-                        gen_op_movq_A0_reg(R_EAX);
-                    } else
-#endif
-                    {
-                        gen_op_movl_A0_reg(R_EAX);
-                        if (s->aflag == 0)
-                            gen_op_andl_A0_ffff();
-                    }
+                    tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
+                    gen_extu(s->aflag, cpu_A0);
                     gen_add_A0_ds_seg(s);
                     gen_helper_monitor(cpu_env, cpu_A0);
                     break;
@@ -7632,14 +7226,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
             } else { /* sidt */
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit));
-                gen_op_st_T0_A0(OT_WORD + s->mem_index);
+                gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
                 gen_add_A0_im(s, 2);
                 tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base));
-                if (!s->dflag)
-                    gen_op_andl_T0_im(0xffffff);
-                gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+                if (dflag == MO_16) {
+                    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
+                }
+                gen_op_st_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
             }
             break;
         case 2: /* lgdt */
@@ -7655,7 +7250,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag),
+                        gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
                                          tcg_const_i32(s->pc - pc_start));
                         tcg_gen_exit_tb(0);
                         s->is_jmp = DISAS_TB_JUMP;
@@ -7673,7 +7268,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag));
+                        gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
                     }
                     break;
                 case 3: /* VMSAVE */
@@ -7683,7 +7278,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag));
+                        gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
                     }
                     break;
                 case 4: /* STGI */
@@ -7722,7 +7317,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag));
+                        gen_helper_invlpga(cpu_env,
+                                           tcg_const_i32(s->aflag - 1));
                     }
                     break;
                 default:
@@ -7733,12 +7329,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             } else {
                 gen_svm_check_intercept(s, pc_start,
                                         op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_op_ld_T1_A0(OT_WORD + s->mem_index);
+                gen_lea_modrm(env, s, modrm);
+                gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
                 gen_add_A0_im(s, 2);
-                gen_op_ld_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
-                if (!s->dflag)
-                    gen_op_andl_T0_im(0xffffff);
+                gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
+                if (dflag == MO_16) {
+                    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
+                }
                 if (op == 2) {
                     tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,gdt.base));
                     tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,gdt.limit));
@@ -7755,14 +7352,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 #else
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
 #endif
-            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
             break;
         case 6: /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
                 gen_helper_lmsw(cpu_env, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
@@ -7775,7 +7372,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 } else {
                     gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm);
                     gen_helper_invlpg(cpu_env, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
@@ -7839,7 +7436,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (CODE64(s)) {
             int d_ot;
             /* d_ot is the size of destination */
-            d_ot = dflag + OT_WORD;
+            d_ot = dflag;
 
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
@@ -7847,19 +7444,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_TN_reg(OT_LONG, 0, rm);
+                gen_op_mov_v_reg(MO_32, cpu_T[0], rm);
                 /* sign extend */
-                if (d_ot == OT_QUAD)
+                if (d_ot == MO_64) {
                     tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
-                gen_op_mov_reg_T0(d_ot, reg);
-            } else {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                if (d_ot == OT_QUAD) {
-                    gen_op_lds_T0_A0(OT_LONG + s->mem_index);
-                } else {
-                    gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                 }
-                gen_op_mov_reg_T0(d_ot, reg);
+                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
+            } else {
+                gen_lea_modrm(env, s, modrm);
+                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
+                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
         } else
 #endif
@@ -7872,14 +7466,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             t0 = tcg_temp_local_new();
             t1 = tcg_temp_local_new();
             t2 = tcg_temp_local_new();
-            ot = OT_WORD;
+            ot = MO_16;
             modrm = cpu_ldub_code(env, s->pc++);
             reg = (modrm >> 3) & 7;
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
+                gen_lea_modrm(env, s, modrm);
+                gen_op_ld_v(s, ot, t0, cpu_A0);
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, cpu_A0);
             } else {
@@ -7897,7 +7491,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_movi_tl(t2, CC_Z);
             gen_set_label(label1);
             if (mod != 3) {
-                gen_op_st_v(ot + s->mem_index, t0, a0);
+                gen_op_st_v(s, ot, t0, a0);
                 tcg_temp_free(a0);
            } else {
                 gen_op_mov_reg_v(ot, rm, t0);
@@ -7917,10 +7511,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             TCGv t0;
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            ot = dflag ? OT_LONG : OT_WORD;
+            ot = dflag != MO_16 ? MO_32 : MO_16;
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
@@ -7948,7 +7542,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
@@ -7974,9 +7568,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             rm = (modrm & 7) | REX_B(s);
             reg = ((modrm >> 3) & 7) | rex_r;
             if (CODE64(s))
-                ot = OT_QUAD;
+                ot = MO_64;
             else
-                ot = OT_LONG;
+                ot = MO_32;
             if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
                 reg = 8;
@@ -7990,14 +7584,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 if (b & 2) {
-                    gen_op_mov_TN_reg(ot, 0, rm);
+                    gen_op_mov_v_reg(ot, cpu_T[0], rm);
                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
                                          cpu_T[0]);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
                 } else {
                     gen_helper_read_crN(cpu_T[0], cpu_env, tcg_const_i32(reg));
-                    gen_op_mov_reg_T0(ot, rm);
+                    gen_op_mov_reg_v(ot, rm, cpu_T[0]);
                 }
                 break;
             default:
@@ -8019,22 +7613,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             rm = (modrm & 7) | REX_B(s);
             reg = ((modrm >> 3) & 7) | rex_r;
             if (CODE64(s))
-                ot = OT_QUAD;
+                ot = MO_64;
             else
-                ot = OT_LONG;
+                ot = MO_32;
             /* XXX: do it dynamically with CR4.DE bit */
             if (reg == 4 || reg == 5 || reg >= 8)
                 goto illegal_op;
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
-                gen_op_mov_TN_reg(ot, 0, rm);
+                gen_op_mov_v_reg(ot, cpu_T[0], rm);
                 gen_helper_movl_drN_T0(cpu_env, tcg_const_i32(reg), cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
                 tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,dr[reg]));
-                gen_op_mov_reg_T0(ot, rm);
+                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             }
         }
         break;
@@ -8053,7 +7647,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x1c3: /* MOVNTI reg, mem */
         if (!(s->cpuid_features & CPUID_SSE2))
             goto illegal_op;
-        ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+        ot = mo_64_32(dflag);
         modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
@@ -8075,10 +7669,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32((s->dflag == 2)));
+            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
             break;
         case 1: /* fxrstor */
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
@@ -8088,11 +7682,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_env, cpu_A0,
-                               tcg_const_i32((s->dflag == 2)));
+            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
             break;
         case 2: /* ldmxcsr */
         case 3: /* stmxcsr */
@@ -8103,14 +7696,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
                 mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm);
             if (op == 2) {
-                gen_op_ld_T0_A0(OT_LONG + s->mem_index);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                                    s->mem_index, MO_LEUL);
                 gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
             } else {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
-                gen_op_st_T0_A0(OT_LONG + s->mem_index);
+                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
             }
             break;
         case 5: /* lfence */
@@ -8128,7 +7721,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 /* clflush */
                 if (!(s->cpuid_features & CPUID_CLFLUSH))
                     goto illegal_op;
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm);
             }
             break;
         default:
@@ -8140,7 +7733,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+        gen_lea_modrm(env, s, modrm);
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
@@ -8162,16 +7755,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
 
-        if (s->prefix & PREFIX_DATA)
-            ot = OT_WORD;
-        else if (s->dflag != 2)
-            ot = OT_LONG;
-        else
-            ot = OT_QUAD;
+        if (s->prefix & PREFIX_DATA) {
+            ot = MO_16;
+        } else {
+            ot = mo_64_32(dflag);
+        }
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
-        gen_op_mov_reg_T0(ot, reg);
+        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
 
         set_cc_op(s, CC_OP_EFLAGS);
         break;
@@ -8205,6 +7797,37 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
 void optimize_flags_init(void)
 {
+    static const char reg_names[CPU_NB_REGS][4] = {
+#ifdef TARGET_X86_64
+        [R_EAX] = "rax",
+        [R_EBX] = "rbx",
+        [R_ECX] = "rcx",
+        [R_EDX] = "rdx",
+        [R_ESI] = "rsi",
+        [R_EDI] = "rdi",
+        [R_EBP] = "rbp",
+        [R_ESP] = "rsp",
+        [8]  = "r8",
+        [9]  = "r9",
+        [10] = "r10",
+        [11] = "r11",
+        [12] = "r12",
+        [13] = "r13",
+        [14] = "r14",
+        [15] = "r15",
+#else
+        [R_EAX] = "eax",
+        [R_EBX] = "ebx",
+        [R_ECX] = "ecx",
+        [R_EDX] = "edx",
+        [R_ESI] = "esi",
+        [R_EDI] = "edi",
+        [R_EBP] = "ebp",
+        [R_ESP] = "esp",
+#endif
+    };
+    int i;
+
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
     cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0,
                                        offsetof(CPUX86State, cc_op), "cc_op");
@@ -8215,57 +7838,11 @@ void optimize_flags_init(void)
     cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
                                      "cc_src2");
 
-#ifdef TARGET_X86_64
-    cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_EAX]), "rax");
-    cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_ECX]), "rcx");
-    cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_EDX]), "rdx");
-    cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_EBX]), "rbx");
-    cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_ESP]), "rsp");
-    cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_EBP]), "rbp");
-    cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_ESI]), "rsi");
-    cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_EDI]), "rdi");
-    cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0,
-                                         offsetof(CPUX86State, regs[8]), "r8");
-    cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUX86State, regs[9]), "r9");
-    cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUX86State, regs[10]), "r10");
-    cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUX86State, regs[11]), "r11");
-    cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUX86State, regs[12]), "r12");
-    cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUX86State, regs[13]), "r13");
-    cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUX86State, regs[14]), "r14");
-    cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUX86State, regs[15]), "r15");
-#else
-    cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_EAX]), "eax");
-    cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_ECX]), "ecx");
-    cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_EDX]), "edx");
-    cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_EBX]), "ebx");
-    cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_ESP]), "esp");
-    cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_EBP]), "ebp");
-    cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_ESI]), "esi");
-    cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUX86State, regs[R_EDI]), "edi");
-#endif
+    for (i = 0; i < CPU_NB_REGS; ++i) {
+        cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
+                                         offsetof(CPUX86State, regs[i]),
+                                         reg_names[i]);
+    }
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
@@ -8311,7 +7888,7 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
     /* select memory access functions */
     dc->mem_index = 0;
     if (flags & HF_SOFTMMU_MASK) {
-        dc->mem_index = (cpu_mmu_index(env) + 1) << 2;
+        dc->mem_index = cpu_mmu_index(env);
     }
     dc->cpuid_features = env->features[FEAT_1_EDX];
     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
diff --git a/target-microblaze/cpu.c b/target-microblaze/cpu.c
index 0ef9aa4b74..f108c0b521 100644
--- a/target-microblaze/cpu.c
+++ b/target-microblaze/cpu.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2009 Edgar E. Iglesias
  * Copyright (c) 2009-2012 PetaLogix Qld Pty Ltd.
  * Copyright (c) 2012 SUSE LINUX Products GmbH
+ * Copyright (c) 2009 Edgar E. Iglesias, Axis Communications AB.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -33,6 +34,21 @@ static void mb_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.sregs[SR_PC] = value;
 }
 
+#ifndef CONFIG_USER_ONLY
+static void microblaze_cpu_set_irq(void *opaque, int irq, int level)
+{
+    MicroBlazeCPU *cpu = opaque;
+    CPUState *cs = CPU(cpu);
+    int type = irq ? CPU_INTERRUPT_NMI : CPU_INTERRUPT_HARD;
+
+    if (level) {
+        cpu_interrupt(cs, type);
+    } else {
+        cpu_reset_interrupt(cs, type);
+    }
+}
+#endif
+
 /* CPUClass::reset() */
 static void mb_cpu_reset(CPUState *s)
 {
@@ -111,6 +127,11 @@ static void mb_cpu_initfn(Object *obj)
 
     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
 
+#ifndef CONFIG_USER_ONLY
+    /* Inbound IRQ and FIR lines */
+    qdev_init_gpio_in(DEVICE(cpu), microblaze_cpu_set_irq, 2);
+#endif
+
     if (tcg_enabled() && !tcg_initialized) {
         tcg_initialized = true;
         mb_tcg_init();
diff --git a/target-microblaze/cpu.h b/target-microblaze/cpu.h
index e1415f043c..1df014e92e 100644
--- a/target-microblaze/cpu.h
+++ b/target-microblaze/cpu.h
@@ -48,6 +48,10 @@ typedef struct CPUMBState CPUMBState;
 /* MicroBlaze-specific interrupt pending bits.  */
 #define CPU_INTERRUPT_NMI       CPU_INTERRUPT_TGT_EXT_3
 
+/* Meanings of the MBCPU object's two inbound GPIO lines */
+#define MB_CPU_IRQ 0
+#define MB_CPU_FIR 1
+
 /* Register aliases. R0 - R15 */
 #define R_SP     1
 #define SR_PC    0
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 91c60ebaae..b381477d29 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -112,7 +112,7 @@ void openrisc_translate_init(void)
     }
 }
 
-/* Writeback SR_F transaltion-space to execution-space.  */
+/* Writeback SR_F translation space to execution space.  */
 static inline void wb_SR_F(void)
 {
     int label;
diff --git a/tests/.gitignore b/tests/.gitignore
index 425757cfe1..1aed2249ff 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -20,6 +20,7 @@ test-qmp-commands
 test-qmp-input-strict
 test-qmp-marshal.c
 test-thread-pool
+test-vmstate
 test-x86-cpuid
 test-xbzrle
 *-test
diff --git a/tests/Makefile b/tests/Makefile
index 8d258781b7..0aaf657be5 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -52,6 +52,9 @@ check-unit-y += tests/test-int128$(EXESUF)
 gcov-files-test-int128-y =
 check-unit-y += tests/test-bitops$(EXESUF)
 check-unit-y += tests/test-qdev-global-props$(EXESUF)
+check-unit-y += tests/check-qom-interface$(EXESUF)
+gcov-files-check-qom-interface-y = qom/object.c
+check-unit-y += tests/test-vmstate$(EXESUF)
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -138,6 +141,7 @@ test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 QEMU_CFLAGS += -I$(SRC_PATH)/tests
+qom-core-obj = qom/object.o qom/qom-qobject.o qom/container.o
 
 tests/test-x86-cpuid.o: QEMU_INCLUDES += -I$(SRC_PATH)/target-i386
 
@@ -147,6 +151,7 @@ tests/check-qdict$(EXESUF): tests/check-qdict.o libqemuutil.a
 tests/check-qlist$(EXESUF): tests/check-qlist.o libqemuutil.a
 tests/check-qfloat$(EXESUF): tests/check-qfloat.o libqemuutil.a
 tests/check-qjson$(EXESUF): tests/check-qjson.o libqemuutil.a libqemustub.a
+tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(qom-core-obj) libqemuutil.a
 tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) libqemuutil.a libqemustub.a
@@ -160,9 +165,12 @@ tests/test-int128$(EXESUF): tests/test-int128.o
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
 	hw/core/qdev.o hw/core/qdev-properties.o \
 	hw/core/irq.o \
-	qom/object.o qom/container.o qom/qom-qobject.o \
+	$(qom-core-obj) \
 	$(test-qapi-obj-y) \
 	libqemuutil.a libqemustub.a
+tests/test-vmstate$(EXESUF): tests/test-vmstate.o \
+	vmstate.o qemu-file.o \
+	libqemuutil.a
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py
diff --git a/tests/acpi-test.c b/tests/acpi-test.c
index ca83b1d6b6..df1af83158 100644
--- a/tests/acpi-test.c
+++ b/tests/acpi-test.c
@@ -382,6 +382,7 @@ int main(int argc, char *argv[])
 {
     const char *arch = qtest_get_arch();
     FILE *f = fopen(disk, "w");
+    int ret;
     fwrite(boot_sector, 1, sizeof boot_sector, f);
     fclose(f);
 
@@ -390,5 +391,7 @@ int main(int argc, char *argv[])
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         qtest_add_func("acpi/tcg", test_acpi_tcg);
     }
-    return g_test_run();
+    ret = g_test_run();
+    unlink(disk);
+    return ret;
 }
diff --git a/tests/check-qom-interface.c b/tests/check-qom-interface.c
new file mode 100644
index 0000000000..f06380ef14
--- /dev/null
+++ b/tests/check-qom-interface.c
@@ -0,0 +1,105 @@
+/*
+ * QOM interface test.
+ *
+ * Copyright (C) 2013 Red Hat Inc.
+ *
+ * Authors:
+ *  Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#include <glib.h>
+
+#include "qom/object.h"
+#include "qemu/module.h"
+
+
+#define TYPE_TEST_IF "test-interface"
+#define TEST_IF_CLASS(klass) \
+     OBJECT_CLASS_CHECK(TestIfClass, (klass), TYPE_TEST_IF)
+#define TEST_IF_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(TestIfClass, (obj), TYPE_TEST_IF)
+#define TEST_IF(obj) \
+     INTERFACE_CHECK(TestIf, (obj), TYPE_TEST_IF)
+
+typedef struct TestIf {
+    Object parent_obj;
+} TestIf;
+
+typedef struct TestIfClass {
+    InterfaceClass parent_class;
+
+    uint32_t test;
+} TestIfClass;
+
+static const TypeInfo test_if_info = {
+    .name          = TYPE_TEST_IF,
+    .parent        = TYPE_INTERFACE,
+    .class_size = sizeof(TestIfClass),
+};
+
+#define PATTERN 0xFAFBFCFD
+
+static void test_class_init(ObjectClass *oc, void *data)
+{
+    TestIfClass *tc = TEST_IF_CLASS(oc);
+
+    g_assert(tc);
+    tc->test = PATTERN;
+}
+
+#define TYPE_DIRECT_IMPL "direct-impl"
+
+static const TypeInfo direct_impl_info = {
+    .name = TYPE_DIRECT_IMPL,
+    .parent = TYPE_OBJECT,
+    .class_init = test_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_TEST_IF },
+        { }
+    }
+};
+
+#define TYPE_INTERMEDIATE_IMPL "intermediate-impl"
+
+static const TypeInfo intermediate_impl_info = {
+    .name = TYPE_INTERMEDIATE_IMPL,
+    .parent = TYPE_DIRECT_IMPL,
+};
+
+static void test_interface_impl(const char *type)
+{
+    Object *obj = object_new(type);
+    TestIf *iobj = TEST_IF(obj);
+    TestIfClass *ioc = TEST_IF_GET_CLASS(iobj);
+
+    g_assert(iobj);
+    g_assert(ioc->test == PATTERN);
+}
+
+static void interface_direct_test(void)
+{
+    test_interface_impl(TYPE_DIRECT_IMPL);
+}
+
+static void interface_intermediate_test(void)
+{
+    test_interface_impl(TYPE_INTERMEDIATE_IMPL);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    module_call_init(MODULE_INIT_QOM);
+    type_register_static(&test_if_info);
+    type_register_static(&direct_impl_info);
+    type_register_static(&intermediate_impl_info);
+
+    g_test_add_func("/qom/interface/direct_impl", interface_direct_test);
+    g_test_add_func("/qom/interface/intermediate_impl",
+                    interface_intermediate_test);
+
+    return g_test_run();
+}
diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index 65c786ca1e..fa3e3d6b87 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -2,9 +2,11 @@
  * qtest I440FX test case
  *
  * Copyright IBM, Corp. 2012-2013
+ * Copyright Red Hat, Inc. 2013
  *
  * Authors:
  *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Laszlo Ersek      <lersek@redhat.com>
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -18,6 +20,11 @@
 
 #include <glib.h>
 #include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <stdlib.h>
 
 #define BROKEN 1
 
@@ -26,16 +33,32 @@
 typedef struct TestData
 {
     int num_cpus;
-    QPCIBus *bus;
 } TestData;
 
+typedef struct FirmwareTestFixture {
+    /* decides whether we're testing -bios or -pflash */
+    bool is_bios;
+} FirmwareTestFixture;
+
+static QPCIBus *test_start_get_bus(const TestData *s)
+{
+    char *cmdline;
+
+    cmdline = g_strdup_printf("-smp %d", s->num_cpus);
+    qtest_start(cmdline);
+    g_free(cmdline);
+    return qpci_init_pc();
+}
+
 static void test_i440fx_defaults(gconstpointer opaque)
 {
     const TestData *s = opaque;
+    QPCIBus *bus;
     QPCIDevice *dev;
     uint32_t value;
 
-    dev = qpci_device_find(s->bus, QPCI_DEVFN(0, 0));
+    bus = test_start_get_bus(s);
+    dev = qpci_device_find(bus, QPCI_DEVFN(0, 0));
     g_assert(dev != NULL);
 
     /* 3.2.2 */
@@ -119,6 +142,8 @@ static void test_i440fx_defaults(gconstpointer opaque)
     g_assert_cmpint(qpci_config_readb(dev, 0x91), ==, 0x00); /* ERRSTS */
     /* 3.2.26 */
     g_assert_cmpint(qpci_config_readb(dev, 0x93), ==, 0x00); /* TRC */
+
+    qtest_end();
 }
 
 #define PAM_RE 1
@@ -177,6 +202,7 @@ static void write_area(uint32_t start, uint32_t end, uint8_t value)
 static void test_i440fx_pam(gconstpointer opaque)
 {
     const TestData *s = opaque;
+    QPCIBus *bus;
     QPCIDevice *dev;
     int i;
     static struct {
@@ -199,7 +225,8 @@ static void test_i440fx_pam(gconstpointer opaque)
         { 0xEC000, 0xEFFFF }, /* BIOS Extension */
     };
 
-    dev = qpci_device_find(s->bus, QPCI_DEVFN(0, 0));
+    bus = test_start_get_bus(s);
+    dev = qpci_device_find(bus, QPCI_DEVFN(0, 0));
     g_assert(dev != NULL);
 
     for (i = 0; i < ARRAY_SIZE(pam_area); i++) {
@@ -252,34 +279,140 @@ static void test_i440fx_pam(gconstpointer opaque)
         /* Verify the area is not our new mask */
         g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x82));
     }
+    qtest_end();
+}
+
+#define BLOB_SIZE ((size_t)65536)
+#define ISA_BIOS_MAXSZ ((size_t)(128 * 1024))
+
+/* Create a blob file, and return its absolute pathname as a dynamically
+ * allocated string.
+ * The file is closed before the function returns.
+ * In case of error, NULL is returned. The function prints the error message.
+ */
+static char *create_blob_file(void)
+{
+    int ret, fd;
+    char *pathname;
+    GError *error = NULL;
+
+    ret = -1;
+    fd = g_file_open_tmp("blob_XXXXXX", &pathname, &error);
+    if (fd == -1) {
+        fprintf(stderr, "unable to create blob file: %s\n", error->message);
+        g_error_free(error);
+    } else {
+        if (ftruncate(fd, BLOB_SIZE) == -1) {
+            fprintf(stderr, "ftruncate(\"%s\", %zu): %s\n", pathname,
+                    BLOB_SIZE, strerror(errno));
+        } else {
+            void *buf;
+
+            buf = mmap(NULL, BLOB_SIZE, PROT_WRITE, MAP_SHARED, fd, 0);
+            if (buf == MAP_FAILED) {
+                fprintf(stderr, "mmap(\"%s\", %zu): %s\n", pathname, BLOB_SIZE,
+                        strerror(errno));
+            } else {
+                size_t i;
+
+                for (i = 0; i < BLOB_SIZE; ++i) {
+                    ((uint8_t *)buf)[i] = i;
+                }
+                munmap(buf, BLOB_SIZE);
+                ret = 0;
+            }
+        }
+        close(fd);
+        if (ret == -1) {
+            unlink(pathname);
+            g_free(pathname);
+        }
+    }
+
+    return ret == -1 ? NULL : pathname;
+}
+
+static void test_i440fx_firmware(FirmwareTestFixture *fixture,
+                                 gconstpointer user_data)
+{
+    char *fw_pathname, *cmdline;
+    uint8_t *buf;
+    size_t i, isa_bios_size;
+
+    fw_pathname = create_blob_file();
+    g_assert(fw_pathname != NULL);
+
+    /* Better hope the user didn't put metacharacters in TMPDIR and co. */
+    cmdline = g_strdup_printf("-S %s %s",
+                              fixture->is_bios ? "-bios" : "-pflash",
+                              fw_pathname);
+    g_test_message("qemu cmdline: %s", cmdline);
+    qtest_start(cmdline);
+    g_free(cmdline);
+
+    /* Qemu has loaded the firmware (because qtest_start() only returns after
+     * the QMP handshake completes). We must unlink the firmware blob right
+     * here, because any assertion firing below would leak it in the
+     * filesystem. This is also the reason why we recreate the blob every time
+     * this function is invoked.
+     */
+    unlink(fw_pathname);
+    g_free(fw_pathname);
+
+    /* check below 4G */
+    buf = g_malloc0(BLOB_SIZE);
+    memread(0x100000000ULL - BLOB_SIZE, buf, BLOB_SIZE);
+    for (i = 0; i < BLOB_SIZE; ++i) {
+        g_assert_cmphex(buf[i], ==, (uint8_t)i);
+    }
+
+    /* check in ISA space too */
+    memset(buf, 0, BLOB_SIZE);
+    isa_bios_size = ISA_BIOS_MAXSZ < BLOB_SIZE ? ISA_BIOS_MAXSZ : BLOB_SIZE;
+    memread(0x100000 - isa_bios_size, buf, isa_bios_size);
+    for (i = 0; i < isa_bios_size; ++i) {
+        g_assert_cmphex(buf[i], ==,
+                        (uint8_t)((BLOB_SIZE - isa_bios_size) + i));
+    }
+
+    g_free(buf);
+    qtest_end();
+}
+
+static void add_firmware_test(const char *testpath,
+                              void (*setup_fixture)(FirmwareTestFixture *f,
+                                                    gconstpointer test_data))
+{
+    g_test_add(testpath, FirmwareTestFixture, NULL, setup_fixture,
+               test_i440fx_firmware, NULL);
+}
+
+static void request_bios(FirmwareTestFixture *fixture,
+                         gconstpointer user_data)
+{
+    fixture->is_bios = true;
+}
+
+static void request_pflash(FirmwareTestFixture *fixture,
+                           gconstpointer user_data)
+{
+    fixture->is_bios = false;
 }
 
 int main(int argc, char **argv)
 {
-    QTestState *s;
     TestData data;
-    char *cmdline;
     int ret;
 
     g_test_init(&argc, &argv, NULL);
 
     data.num_cpus = 1;
 
-    cmdline = g_strdup_printf("-smp %d", data.num_cpus);
-    s = qtest_start(cmdline);
-    g_free(cmdline);
-
-    data.bus = qpci_init_pc();
-
     g_test_add_data_func("/i440fx/defaults", &data, test_i440fx_defaults);
     g_test_add_data_func("/i440fx/pam", &data, test_i440fx_pam);
-    
+    add_firmware_test("/i440fx/firmware/bios", request_bios);
+    add_firmware_test("/i440fx/firmware/pflash", request_pflash);
 
     ret = g_test_run();
-
-    if (s) {
-        qtest_quit(s);
-    }
-
     return ret;
 }
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 18dcd61ef2..72eaad5b08 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -39,6 +39,29 @@ class ImageCommitTestCase(iotests.QMPTestCase):
         result = self.vm.qmp('query-block-jobs')
         self.assert_qmp(result, 'return', [])
 
+    def run_commit_test(self, top, base):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top=top, base=base)
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assert_qmp(event, 'data/type', 'commit')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+                elif event['event'] == 'BLOCK_JOB_READY':
+                    self.assert_qmp(event, 'data/type', 'commit')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    self.vm.qmp('block-job-complete', device='drive0')
+
+        self.assert_no_active_commit()
+        self.vm.shutdown()
+
 class TestSingleDrive(ImageCommitTestCase):
     image_len = 1 * 1024 * 1024
     test_len = 1 * 1024 * 256
@@ -59,23 +82,7 @@ class TestSingleDrive(ImageCommitTestCase):
         os.remove(backing_img)
 
     def test_commit(self):
-        self.assert_no_active_commit()
-        result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img)
-        self.assert_qmp(result, 'return', {})
-
-        completed = False
-        while not completed:
-            for event in self.vm.get_qmp_events(wait=True):
-                if event['event'] == 'BLOCK_JOB_COMPLETED':
-                    self.assert_qmp(event, 'data/type', 'commit')
-                    self.assert_qmp(event, 'data/device', 'drive0')
-                    self.assert_qmp(event, 'data/offset', self.image_len)
-                    self.assert_qmp(event, 'data/len', self.image_len)
-                    completed = True
-
-        self.assert_no_active_commit()
-        self.vm.shutdown()
-
+        self.run_commit_test(mid_img, backing_img)
         self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
         self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
 
@@ -102,10 +109,9 @@ class TestSingleDrive(ImageCommitTestCase):
         self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
 
     def test_top_is_active(self):
-        self.assert_no_active_commit()
-        result = self.vm.qmp('block-commit', device='drive0', top='%s' % test_img, base='%s' % backing_img)
-        self.assert_qmp(result, 'error/class', 'GenericError')
-        self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported')
+        self.run_commit_test(test_img, backing_img)
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
 
     def test_top_and_base_reversed(self):
         self.assert_no_active_commit()
@@ -166,23 +172,7 @@ class TestRelativePaths(ImageCommitTestCase):
                 raise
 
     def test_commit(self):
-        self.assert_no_active_commit()
-        result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.mid_img)
-        self.assert_qmp(result, 'return', {})
-
-        completed = False
-        while not completed:
-            for event in self.vm.get_qmp_events(wait=True):
-                if event['event'] == 'BLOCK_JOB_COMPLETED':
-                    self.assert_qmp(event, 'data/type', 'commit')
-                    self.assert_qmp(event, 'data/device', 'drive0')
-                    self.assert_qmp(event, 'data/offset', self.image_len)
-                    self.assert_qmp(event, 'data/len', self.image_len)
-                    completed = True
-
-        self.assert_no_active_commit()
-        self.vm.shutdown()
-
+        self.run_commit_test(self.mid_img, self.backing_img)
         self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', self.backing_img_abs).find("verification failed"))
         self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', self.backing_img_abs).find("verification failed"))
 
@@ -209,10 +199,9 @@ class TestRelativePaths(ImageCommitTestCase):
         self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
 
     def test_top_is_active(self):
-        self.assert_no_active_commit()
-        result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.test_img, base='%s' % self.backing_img)
-        self.assert_qmp(result, 'error/class', 'GenericError')
-        self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported')
+        self.run_commit_test(self.test_img, self.backing_img)
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', self.backing_img_abs).find("verification failed"))
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', self.backing_img_abs).find("verification failed"))
 
     def test_top_and_base_reversed(self):
         self.assert_no_active_commit()
@@ -229,6 +218,7 @@ class TestSetSpeed(ImageCommitTestCase):
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
         qemu_io('-c', 'write -P 0x1 0 512', test_img)
+        qemu_io('-c', 'write -P 0xef 524288 524288', mid_img)
         self.vm = iotests.VM().add_drive(test_img)
         self.vm.launch()
 
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index 49e95a20cf..c2cadba2fc 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -91,7 +91,6 @@ Testing: -drive if=virtio
 QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) QEMU_PROG: -drive if=virtio: Device needs media, but drive is empty
 QEMU_PROG: -drive if=virtio: Device initialization failed.
-QEMU_PROG: -drive if=virtio: Device initialization failed.
 QEMU_PROG: -drive if=virtio: Device 'virtio-blk-pci' could not be initialized
 
 Testing: -drive if=scsi
diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059
index 73941c3e61..65bea1d6c6 100755
--- a/tests/qemu-iotests/059
+++ b/tests/qemu-iotests/059
@@ -81,6 +81,20 @@ IMGOPTS="subformat=twoGbMaxExtentFlat" _make_test_img 1000G
 $QEMU_IMG info $TEST_IMG | _filter_testdir | sed -e 's/cid: [0-9]*/cid: XXXXXXXX/'
 
 echo
+echo "=== Testing malformed VMFS extent description line ==="
+cat >"$TEST_IMG" <<EOF
+# Disk DescriptorFile
+version=1
+CID=58ab4847
+parentCID=ffffffff
+createType="vmfs"
+
+# Extent description
+RW 12582912 VMFS "dummy.vmdk" 1
+EOF
+_img_info
+
+echo
 echo "=== Testing version 3 ==="
 _use_sample_img iotest-version3.vmdk.bz2
 _img_info
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index 4ff935c6f4..16ab7c6c1f 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -2038,6 +2038,11 @@ Format specific information:
             filename: TEST_DIR/t-f500.vmdk
             format: FLAT
 
+=== Testing malformed VMFS extent description line ===
+qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Invalid extent lines:
+RW 12582912 VMFS "dummy.IMGFMT" 1
+
+
 === Testing version 3 ===
 image: TEST_DIR/iotest-version3.IMGFMT
 file format: IMGFMT
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
new file mode 100644
index 0000000000..75cd1a1fd4
--- /dev/null
+++ b/tests/test-vmstate.c
@@ -0,0 +1,357 @@
+/*
+ *  Test code for VMState
+ *
+ *  Copyright (c) 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <glib.h>
+
+#include "qemu-common.h"
+#include "migration/migration.h"
+#include "migration/vmstate.h"
+#include "block/coroutine.h"
+
+char temp_file[] = "/tmp/vmst.test.XXXXXX";
+int temp_fd;
+
+/* Fake yield_until_fd_readable() implementation so we don't have to pull the
+ * coroutine code as dependency.
+ */
+void yield_until_fd_readable(int fd)
+{
+    fd_set fds;
+    FD_ZERO(&fds);
+    FD_SET(fd, &fds);
+    select(fd + 1, &fds, NULL, NULL, NULL);
+}
+
+/* Duplicate temp_fd and seek to the beginning of the file */
+static int dup_temp_fd(bool truncate)
+{
+    int fd = dup(temp_fd);
+    lseek(fd, 0, SEEK_SET);
+    if (truncate) {
+        g_assert_cmpint(ftruncate(fd, 0), ==, 0);
+    }
+    return fd;
+}
+
+typedef struct TestSruct {
+    uint32_t a, b, c, e;
+    uint64_t d, f;
+    bool skip_c_e;
+} TestStruct;
+
+
+static const VMStateDescription vmstate_simple = {
+    .name = "test",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT32(a, TestStruct),
+        VMSTATE_UINT32(b, TestStruct),
+        VMSTATE_UINT32(c, TestStruct),
+        VMSTATE_UINT64(d, TestStruct),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void test_simple_save(void)
+{
+    QEMUFile *fsave = qemu_fdopen(dup_temp_fd(true), "wb");
+    TestStruct obj = { .a = 1, .b = 2, .c = 3, .d = 4 };
+    vmstate_save_state(fsave, &vmstate_simple, &obj);
+    g_assert(!qemu_file_get_error(fsave));
+    qemu_fclose(fsave);
+
+    QEMUFile *loading = qemu_fdopen(dup_temp_fd(false), "rb");
+    uint8_t expected[] = {
+        0, 0, 0, 1, /* a */
+        0, 0, 0, 2, /* b */
+        0, 0, 0, 3, /* c */
+        0, 0, 0, 0, 0, 0, 0, 4, /* d */
+    };
+    uint8_t result[sizeof(expected)];
+    g_assert_cmpint(qemu_get_buffer(loading, result, sizeof(result)), ==,
+                    sizeof(result));
+    g_assert(!qemu_file_get_error(loading));
+    g_assert_cmpint(memcmp(result, expected, sizeof(result)), ==, 0);
+
+    /* Must reach EOF */
+    qemu_get_byte(loading);
+    g_assert_cmpint(qemu_file_get_error(loading), ==, -EIO);
+
+    qemu_fclose(loading);
+}
+
+static void test_simple_load(void)
+{
+    QEMUFile *fsave = qemu_fdopen(dup_temp_fd(true), "wb");
+    uint8_t buf[] = {
+        0, 0, 0, 10,             /* a */
+        0, 0, 0, 20,             /* b */
+        0, 0, 0, 30,             /* c */
+        0, 0, 0, 0, 0, 0, 0, 40, /* d */
+        QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+    };
+    qemu_put_buffer(fsave, buf, sizeof(buf));
+    qemu_fclose(fsave);
+
+    QEMUFile *loading = qemu_fdopen(dup_temp_fd(false), "rb");
+    TestStruct obj;
+    vmstate_load_state(loading, &vmstate_simple, &obj, 1);
+    g_assert(!qemu_file_get_error(loading));
+    g_assert_cmpint(obj.a, ==, 10);
+    g_assert_cmpint(obj.b, ==, 20);
+    g_assert_cmpint(obj.c, ==, 30);
+    g_assert_cmpint(obj.d, ==, 40);
+    qemu_fclose(loading);
+}
+
+static const VMStateDescription vmstate_versioned = {
+    .name = "test",
+    .version_id = 2,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT32(a, TestStruct),
+        VMSTATE_UINT32_V(b, TestStruct, 2), /* Versioned field in the middle, so
+                                             * we catch bugs more easily.
+                                             */
+        VMSTATE_UINT32(c, TestStruct),
+        VMSTATE_UINT64(d, TestStruct),
+        VMSTATE_UINT32_V(e, TestStruct, 2),
+        VMSTATE_UINT64_V(f, TestStruct, 2),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void test_load_v1(void)
+{
+    QEMUFile *fsave = qemu_fdopen(dup_temp_fd(true), "wb");
+    uint8_t buf[] = {
+        0, 0, 0, 10,             /* a */
+        0, 0, 0, 30,             /* c */
+        0, 0, 0, 0, 0, 0, 0, 40, /* d */
+        QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+    };
+    qemu_put_buffer(fsave, buf, sizeof(buf));
+    qemu_fclose(fsave);
+
+    QEMUFile *loading = qemu_fdopen(dup_temp_fd(false), "rb");
+    TestStruct obj = { .b = 200, .e = 500, .f = 600 };
+    vmstate_load_state(loading, &vmstate_versioned, &obj, 1);
+    g_assert(!qemu_file_get_error(loading));
+    g_assert_cmpint(obj.a, ==, 10);
+    g_assert_cmpint(obj.b, ==, 200);
+    g_assert_cmpint(obj.c, ==, 30);
+    g_assert_cmpint(obj.d, ==, 40);
+    g_assert_cmpint(obj.e, ==, 500);
+    g_assert_cmpint(obj.f, ==, 600);
+    qemu_fclose(loading);
+}
+
+static void test_load_v2(void)
+{
+    QEMUFile *fsave = qemu_fdopen(dup_temp_fd(true), "wb");
+    uint8_t buf[] = {
+        0, 0, 0, 10,             /* a */
+        0, 0, 0, 20,             /* b */
+        0, 0, 0, 30,             /* c */
+        0, 0, 0, 0, 0, 0, 0, 40, /* d */
+        0, 0, 0, 50,             /* e */
+        0, 0, 0, 0, 0, 0, 0, 60, /* f */
+        QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+    };
+    qemu_put_buffer(fsave, buf, sizeof(buf));
+    qemu_fclose(fsave);
+
+    QEMUFile *loading = qemu_fdopen(dup_temp_fd(false), "rb");
+    TestStruct obj;
+    vmstate_load_state(loading, &vmstate_versioned, &obj, 2);
+    g_assert_cmpint(obj.a, ==, 10);
+    g_assert_cmpint(obj.b, ==, 20);
+    g_assert_cmpint(obj.c, ==, 30);
+    g_assert_cmpint(obj.d, ==, 40);
+    g_assert_cmpint(obj.e, ==, 50);
+    g_assert_cmpint(obj.f, ==, 60);
+    qemu_fclose(loading);
+}
+
+static bool test_skip(void *opaque, int version_id)
+{
+    TestStruct *t = (TestStruct *)opaque;
+    return !t->skip_c_e;
+}
+
+static const VMStateDescription vmstate_skipping = {
+    .name = "test",
+    .version_id = 2,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT32(a, TestStruct),
+        VMSTATE_UINT32(b, TestStruct),
+        VMSTATE_UINT32_TEST(c, TestStruct, test_skip),
+        VMSTATE_UINT64(d, TestStruct),
+        VMSTATE_UINT32_TEST(e, TestStruct, test_skip),
+        VMSTATE_UINT64_V(f, TestStruct, 2),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+
+static void test_save_noskip(void)
+{
+    QEMUFile *fsave = qemu_fdopen(dup_temp_fd(true), "wb");
+    TestStruct obj = { .a = 1, .b = 2, .c = 3, .d = 4, .e = 5, .f = 6,
+                       .skip_c_e = false };
+    vmstate_save_state(fsave, &vmstate_skipping, &obj);
+    g_assert(!qemu_file_get_error(fsave));
+    qemu_fclose(fsave);
+
+    QEMUFile *loading = qemu_fdopen(dup_temp_fd(false), "rb");
+    uint8_t expected[] = {
+        0, 0, 0, 1,             /* a */
+        0, 0, 0, 2,             /* b */
+        0, 0, 0, 3,             /* c */
+        0, 0, 0, 0, 0, 0, 0, 4, /* d */
+        0, 0, 0, 5,             /* e */
+        0, 0, 0, 0, 0, 0, 0, 6, /* f */
+    };
+    uint8_t result[sizeof(expected)];
+    g_assert_cmpint(qemu_get_buffer(loading, result, sizeof(result)), ==,
+                    sizeof(result));
+    g_assert(!qemu_file_get_error(loading));
+    g_assert_cmpint(memcmp(result, expected, sizeof(result)), ==, 0);
+
+    /* Must reach EOF */
+    qemu_get_byte(loading);
+    g_assert_cmpint(qemu_file_get_error(loading), ==, -EIO);
+
+    qemu_fclose(loading);
+}
+
+static void test_save_skip(void)
+{
+    QEMUFile *fsave = qemu_fdopen(dup_temp_fd(true), "wb");
+    TestStruct obj = { .a = 1, .b = 2, .c = 3, .d = 4, .e = 5, .f = 6,
+                       .skip_c_e = true };
+    vmstate_save_state(fsave, &vmstate_skipping, &obj);
+    g_assert(!qemu_file_get_error(fsave));
+    qemu_fclose(fsave);
+
+    QEMUFile *loading = qemu_fdopen(dup_temp_fd(false), "rb");
+    uint8_t expected[] = {
+        0, 0, 0, 1,             /* a */
+        0, 0, 0, 2,             /* b */
+        0, 0, 0, 0, 0, 0, 0, 4, /* d */
+        0, 0, 0, 0, 0, 0, 0, 6, /* f */
+    };
+    uint8_t result[sizeof(expected)];
+    g_assert_cmpint(qemu_get_buffer(loading, result, sizeof(result)), ==,
+                    sizeof(result));
+    g_assert(!qemu_file_get_error(loading));
+    g_assert_cmpint(memcmp(result, expected, sizeof(result)), ==, 0);
+
+
+    /* Must reach EOF */
+    qemu_get_byte(loading);
+    g_assert_cmpint(qemu_file_get_error(loading), ==, -EIO);
+
+    qemu_fclose(loading);
+}
+
+static void test_load_noskip(void)
+{
+    QEMUFile *fsave = qemu_fdopen(dup_temp_fd(true), "wb");
+    uint8_t buf[] = {
+        0, 0, 0, 10,             /* a */
+        0, 0, 0, 20,             /* b */
+        0, 0, 0, 30,             /* c */
+        0, 0, 0, 0, 0, 0, 0, 40, /* d */
+        0, 0, 0, 50,             /* e */
+        0, 0, 0, 0, 0, 0, 0, 60, /* f */
+        QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+    };
+    qemu_put_buffer(fsave, buf, sizeof(buf));
+    qemu_fclose(fsave);
+
+    QEMUFile *loading = qemu_fdopen(dup_temp_fd(false), "rb");
+    TestStruct obj = { .skip_c_e = false };
+    vmstate_load_state(loading, &vmstate_skipping, &obj, 2);
+    g_assert(!qemu_file_get_error(loading));
+    g_assert_cmpint(obj.a, ==, 10);
+    g_assert_cmpint(obj.b, ==, 20);
+    g_assert_cmpint(obj.c, ==, 30);
+    g_assert_cmpint(obj.d, ==, 40);
+    g_assert_cmpint(obj.e, ==, 50);
+    g_assert_cmpint(obj.f, ==, 60);
+    qemu_fclose(loading);
+}
+
+static void test_load_skip(void)
+{
+    QEMUFile *fsave = qemu_fdopen(dup_temp_fd(true), "wb");
+    uint8_t buf[] = {
+        0, 0, 0, 10,             /* a */
+        0, 0, 0, 20,             /* b */
+        0, 0, 0, 0, 0, 0, 0, 40, /* d */
+        0, 0, 0, 0, 0, 0, 0, 60, /* f */
+        QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+    };
+    qemu_put_buffer(fsave, buf, sizeof(buf));
+    qemu_fclose(fsave);
+
+    QEMUFile *loading = qemu_fdopen(dup_temp_fd(false), "rb");
+    TestStruct obj = { .skip_c_e = true, .c = 300, .e = 500 };
+    vmstate_load_state(loading, &vmstate_skipping, &obj, 2);
+    g_assert(!qemu_file_get_error(loading));
+    g_assert_cmpint(obj.a, ==, 10);
+    g_assert_cmpint(obj.b, ==, 20);
+    g_assert_cmpint(obj.c, ==, 300);
+    g_assert_cmpint(obj.d, ==, 40);
+    g_assert_cmpint(obj.e, ==, 500);
+    g_assert_cmpint(obj.f, ==, 60);
+    qemu_fclose(loading);
+}
+
+int main(int argc, char **argv)
+{
+    temp_fd = mkstemp(temp_file);
+
+    g_test_init(&argc, &argv, NULL);
+    g_test_add_func("/vmstate/simple/save", test_simple_save);
+    g_test_add_func("/vmstate/simple/load", test_simple_load);
+    g_test_add_func("/vmstate/versioned/load/v1", test_load_v1);
+    g_test_add_func("/vmstate/versioned/load/v2", test_load_v2);
+    g_test_add_func("/vmstate/field_exists/load/noskip", test_load_noskip);
+    g_test_add_func("/vmstate/field_exists/load/skip", test_load_skip);
+    g_test_add_func("/vmstate/field_exists/save/noskip", test_save_noskip);
+    g_test_add_func("/vmstate/field_exists/save/skip", test_save_skip);
+    g_test_run();
+
+    close(temp_fd);
+    unlink(temp_file);
+
+    return 0;
+}
diff --git a/translate-all.c b/translate-all.c
index 1c63d78b7d..105c25aff3 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -703,11 +703,10 @@ void tb_flush(CPUArchState *env1)
     CPU_FOREACH(cpu) {
         CPUArchState *env = cpu->env_ptr;
 
-        memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
+        memset(env->tb_jmp_cache, 0, sizeof(env->tb_jmp_cache));
     }
 
-    memset(tcg_ctx.tb_ctx.tb_phys_hash, 0,
-            CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
+    memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
     page_flush_tb();
 
     tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
diff --git a/ui/cocoa.m b/ui/cocoa.m
index be491794dc..866177770a 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -52,7 +52,7 @@
 #define COCOA_MOUSE_EVENT \
         if (isTabletEnabled) { \
             kbd_mouse_event((int)(p.x * 0x7FFF / (screen.width - 1)), (int)((screen.height - p.y) * 0x7FFF / (screen.height - 1)), 0, buttons); \
-        } else if (isMouseGrabed) { \
+        } else if (isMouseGrabbed) { \
             kbd_mouse_event((int)[event deltaX], (int)[event deltaY], 0, buttons); \
         } else { \
             [NSApp sendEvent:event]; \
@@ -129,8 +129,8 @@ int keymap[] =
     14, //  51      0x33    0x0e            BKSP    QZ_BACKSPACE
     0,  //  52      0x34    Undefined
     1,  //  53      0x35    0x01            ESC     QZ_ESCAPE
-    0,  //  54      0x36                            QZ_RMETA
-    0,  //  55      0x37                            QZ_LMETA
+    220, // 54      0x36    0xdc    E0,5C   R GUI   QZ_RMETA
+    219, // 55      0x37    0xdb    E0,5B   L GUI   QZ_LMETA
     42, //  56      0x38    0x2a            L SHFT  QZ_LSHIFT
     58, //  57      0x39    0x3a            CAPS    QZ_CAPSLOCK
     56, //  58      0x3A    0x38            L ALT   QZ_LALT
@@ -204,10 +204,8 @@ int keymap[] =
     200,//  126     0x7E    0xc8    E0,48   U ARROW QZ_UP
 /* completed according to http://www.libsdl.org/cgi/cvsweb.cgi/SDL12/src/video/quartz/SDL_QuartzKeys.h?rev=1.6&content-type=text/x-cvsweb-markup */
 
-/* Aditional 104 Key XP-Keyboard Scancodes from http://www.computer-engineering.org/ps2keyboard/scancodes1.html */
+/* Additional 104 Key XP-Keyboard Scancodes from http://www.computer-engineering.org/ps2keyboard/scancodes1.html */
 /*
-    219 //          0xdb            e0,5b   L GUI
-    220 //          0xdc            e0,5c   R GUI
     221 //          0xdd            e0,5d   APPS
         //              E0,2A,E0,37         PRNT SCRN
         //              E1,1D,45,E1,9D,C5   PAUSE
@@ -240,9 +238,8 @@ int keymap[] =
 
 static int cocoa_keycode_to_qemu(int keycode)
 {
-    if((sizeof(keymap)/sizeof(int)) <= keycode)
-    {
-        printf("(cocoa) warning unknow keycode 0x%x\n", keycode);
+    if (ARRAY_SIZE(keymap) <= keycode) {
+        fprintf(stderr, "(cocoa) warning unknown keycode 0x%x\n", keycode);
         return 0;
     }
     return keymap[keycode];
@@ -262,7 +259,7 @@ static int cocoa_keycode_to_qemu(int keycode)
     float cx,cy,cw,ch,cdx,cdy;
     CGDataProviderRef dataProviderRef;
     int modifiers_state[256];
-    BOOL isMouseGrabed;
+    BOOL isMouseGrabbed;
     BOOL isFullscreen;
     BOOL isAbsoluteEnabled;
     BOOL isTabletEnabled;
@@ -273,7 +270,7 @@ static int cocoa_keycode_to_qemu(int keycode)
 - (void) toggleFullScreen:(id)sender;
 - (void) handleEvent:(NSEvent *)event;
 - (void) setAbsoluteEnabled:(BOOL)tIsAbsoluteEnabled;
-- (BOOL) isMouseGrabed;
+- (BOOL) isMouseGrabbed;
 - (BOOL) isAbsoluteEnabled;
 - (float) cdx;
 - (float) cdy;
@@ -324,7 +321,12 @@ QemuCocoaView *cocoaView;
     CGContextSetShouldAntialias (viewContextRef, NO);
 
     // draw screen bitmap directly to Core Graphics context
-    if (dataProviderRef) {
+    if (!dataProviderRef) {
+        // Draw request before any guest device has set up a framebuffer:
+        // just draw an opaque black rectangle
+        CGContextSetRGBFillColor(viewContextRef, 0, 0, 0, 1.0);
+        CGContextFillRect(viewContextRef, NSRectToCGRect(rect));
+    } else {
         CGImageRef imageRef = CGImageCreate(
             screen.width, //width
             screen.height, //height
@@ -408,31 +410,41 @@ QemuCocoaView *cocoaView;
 
     int w = surface_width(surface);
     int h = surface_height(surface);
+    bool isResize = (w != screen.width || h != screen.height);
+
+    int oldh = screen.height;
+    if (isResize) {
+        // Resize before we trigger the redraw, or we'll redraw at the wrong size
+        COCOA_DEBUG("switchSurface: new size %d x %d\n", w, h);
+        screen.width = w;
+        screen.height = h;
+        [self setContentDimensions];
+        [self setFrame:NSMakeRect(cx, cy, cw, ch)];
+    }
 
     // update screenBuffer
     if (dataProviderRef)
         CGDataProviderRelease(dataProviderRef);
 
     //sync host window color space with guests
-	screen.bitsPerPixel = surface_bits_per_pixel(surface);
-	screen.bitsPerComponent = surface_bytes_per_pixel(surface) * 2;
+    screen.bitsPerPixel = surface_bits_per_pixel(surface);
+    screen.bitsPerComponent = surface_bytes_per_pixel(surface) * 2;
 
     dataProviderRef = CGDataProviderCreateWithData(NULL, surface_data(surface), w * 4 * h, NULL);
 
     // update windows
     if (isFullscreen) {
         [[fullScreenWindow contentView] setFrame:[[NSScreen mainScreen] frame]];
-        [normalWindow setFrame:NSMakeRect([normalWindow frame].origin.x, [normalWindow frame].origin.y - h + screen.height, w, h + [normalWindow frame].size.height - screen.height) display:NO animate:NO];
+        [normalWindow setFrame:NSMakeRect([normalWindow frame].origin.x, [normalWindow frame].origin.y - h + oldh, w, h + [normalWindow frame].size.height - oldh) display:NO animate:NO];
     } else {
         if (qemu_name)
             [normalWindow setTitle:[NSString stringWithFormat:@"QEMU %s", qemu_name]];
-        [normalWindow setFrame:NSMakeRect([normalWindow frame].origin.x, [normalWindow frame].origin.y - h + screen.height, w, h + [normalWindow frame].size.height - screen.height) display:YES animate:NO];
+        [normalWindow setFrame:NSMakeRect([normalWindow frame].origin.x, [normalWindow frame].origin.y - h + oldh, w, h + [normalWindow frame].size.height - oldh) display:YES animate:NO];
+    }
+
+    if (isResize) {
+        [normalWindow center];
     }
-    screen.width = w;
-    screen.height = h;
-	[normalWindow center];
-    [self setContentDimensions];
-    [self setFrame:NSMakeRect(cx, cy, cw, ch)];
 }
 
 - (void) toggleFullScreen:(id)sender
@@ -494,6 +506,12 @@ QemuCocoaView *cocoaView;
     switch ([event type]) {
         case NSFlagsChanged:
             keycode = cocoa_keycode_to_qemu([event keyCode]);
+
+            if ((keycode == 219 || keycode == 220) && !isMouseGrabbed) {
+              /* Don't pass command key changes to guest unless mouse is grabbed */
+              keycode = 0;
+            }
+
             if (keycode) {
                 if (keycode == 58 || keycode == 69) { // emulate caps lock and num lock keydown and keyup
                     kbd_put_keycode(keycode);
@@ -517,15 +535,15 @@ QemuCocoaView *cocoaView;
             }
             break;
         case NSKeyDown:
+            keycode = cocoa_keycode_to_qemu([event keyCode]);
 
-            // forward command Key Combos
-            if ([event modifierFlags] & NSCommandKeyMask) {
+            // forward command key combos to the host UI unless the mouse is grabbed
+            if (!isMouseGrabbed && ([event modifierFlags] & NSCommandKeyMask)) {
                 [NSApp sendEvent:event];
                 return;
             }
 
             // default
-            keycode = cocoa_keycode_to_qemu([event keyCode]);
 
             // handle control + alt Key Combos (ctrl+alt is reserved for QEMU)
             if (([event modifierFlags] & NSControlKeyMask) && ([event modifierFlags] & NSAlternateKeyMask)) {
@@ -581,6 +599,13 @@ QemuCocoaView *cocoaView;
             break;
         case NSKeyUp:
             keycode = cocoa_keycode_to_qemu([event keyCode]);
+
+            // don't pass the guest a spurious key-up if we treated this
+            // command-key combo as a host UI action
+            if (!isMouseGrabbed && ([event modifierFlags] & NSCommandKeyMask)) {
+                return;
+            }
+
             if (qemu_console_is_graphic(NULL)) {
                 if (keycode & 0x80)
                     kbd_put_keycode(0xe0);
@@ -638,7 +663,7 @@ QemuCocoaView *cocoaView;
         case NSLeftMouseUp:
             if (isTabletEnabled) {
                     COCOA_MOUSE_EVENT
-            } else if (!isMouseGrabed) {
+            } else if (!isMouseGrabbed) {
                 if (p.x > -1 && p.x < screen.width && p.y > -1 && p.y < screen.height) {
                     [self grabMouse];
                 } else {
@@ -655,7 +680,7 @@ QemuCocoaView *cocoaView;
             COCOA_MOUSE_EVENT
             break;
         case NSScrollWheel:
-            if (isTabletEnabled || isMouseGrabed) {
+            if (isTabletEnabled || isMouseGrabbed) {
                 kbd_mouse_event(0, 0, -[event deltaY], 0);
             } else {
                 [NSApp sendEvent:event];
@@ -678,7 +703,7 @@ QemuCocoaView *cocoaView;
     }
     [NSCursor hide];
     CGAssociateMouseAndMouseCursorPosition(FALSE);
-    isMouseGrabed = TRUE; // while isMouseGrabed = TRUE, QemuCocoaApp sends all events to [cocoaView handleEvent:]
+    isMouseGrabbed = TRUE; // while isMouseGrabbed = TRUE, QemuCocoaApp sends all events to [cocoaView handleEvent:]
 }
 
 - (void) ungrabMouse
@@ -693,11 +718,11 @@ QemuCocoaView *cocoaView;
     }
     [NSCursor unhide];
     CGAssociateMouseAndMouseCursorPosition(TRUE);
-    isMouseGrabed = FALSE;
+    isMouseGrabbed = FALSE;
 }
 
 - (void) setAbsoluteEnabled:(BOOL)tIsAbsoluteEnabled {isAbsoluteEnabled = tIsAbsoluteEnabled;}
-- (BOOL) isMouseGrabed {return isMouseGrabed;}
+- (BOOL) isMouseGrabbed {return isMouseGrabbed;}
 - (BOOL) isAbsoluteEnabled {return isAbsoluteEnabled;}
 - (float) cdx {return cdx;}
 - (float) cdy {return cdy;}
@@ -749,7 +774,7 @@ QemuCocoaView *cocoaView;
         [normalWindow setContentView:cocoaView];
         [normalWindow useOptimizedDrawing:YES];
         [normalWindow makeKeyAndOrderFront:self];
-		[normalWindow center];
+        [normalWindow center];
 
     }
     return self;
@@ -768,14 +793,14 @@ QemuCocoaView *cocoaView;
 {
     COCOA_DEBUG("QemuCocoaAppController: applicationDidFinishLaunching\n");
 
-    // Display an open dialog box if no argument were passed or
+    // Display an open dialog box if no arguments were passed or
     // if qemu was launched from the finder ( the Finder passes "-psn" )
     if( gArgc <= 1 || strncmp ((char *)gArgv[1], "-psn", 4) == 0) {
         NSOpenPanel *op = [[NSOpenPanel alloc] init];
         [op setPrompt:@"Boot image"];
         [op setMessage:@"Select the disk image you want to boot.\n\nHit the \"Cancel\" button to quit"];
         NSArray *filetypes = [NSArray arrayWithObjects:@"img", @"iso", @"dmg",
-                                 @"qcow", @"cow", @"cloop", @"vmdk", nil];
+                                 @"qcow", @"qcow2", @"cow", @"cloop", @"vmdk", nil];
 #if (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
         [op setAllowedFileTypes:filetypes];
         [op beginSheetModalForWindow:normalWindow
@@ -823,18 +848,18 @@ QemuCocoaView *cocoaView;
     if(returnCode == NSCancelButton) {
         exit(0);
     } else if(returnCode == NSOKButton) {
-        const char *bin = "qemu";
         char *img = (char*)[ [ [ sheet URL ] path ] cStringUsingEncoding:NSASCIIStringEncoding];
 
-        char **argv = (char**)malloc( sizeof(char*)*3 );
+        char **argv = g_new(char *, 4);
 
         [sheet close];
 
-        argv[0] = g_strdup_printf("%s", bin);
-        argv[1] = g_strdup_printf("-hda");
-        argv[2] = g_strdup_printf("%s", img);
+        argv[0] = g_strdup(gArgv[0]);
+        argv[1] = g_strdup("-hda");
+        argv[2] = g_strdup(img);
+        argv[3] = NULL;
 
-        printf("Using argc %d argv %s -hda %s\n", 3, bin, img);
+        // printf("Using argc %d argv %s -hda %s\n", 3, gArgv[0], img);
 
         [self startEmulationWithArgc:3 argv:(char**)argv];
     }
@@ -1000,7 +1025,7 @@ static void cocoa_refresh(DisplayChangeListener *dcl)
 
     if (kbd_mouse_is_absolute()) {
         if (![cocoaView isAbsoluteEnabled]) {
-            if ([cocoaView isMouseGrabed]) {
+            if ([cocoaView isMouseGrabbed]) {
                 [cocoaView ungrabMouse];
             }
         }
diff --git a/util/bitmap.c b/util/bitmap.c
index 687841dcec..9c6bb526f6 100644
--- a/util/bitmap.c
+++ b/util/bitmap.c
@@ -36,9 +36,9 @@
  * endian architectures.
  */
 
-int slow_bitmap_empty(const unsigned long *bitmap, int bits)
+int slow_bitmap_empty(const unsigned long *bitmap, long bits)
 {
-    int k, lim = bits/BITS_PER_LONG;
+    long k, lim = bits/BITS_PER_LONG;
 
     for (k = 0; k < lim; ++k) {
         if (bitmap[k]) {
@@ -54,9 +54,9 @@ int slow_bitmap_empty(const unsigned long *bitmap, int bits)
     return 1;
 }
 
-int slow_bitmap_full(const unsigned long *bitmap, int bits)
+int slow_bitmap_full(const unsigned long *bitmap, long bits)
 {
-    int k, lim = bits/BITS_PER_LONG;
+    long k, lim = bits/BITS_PER_LONG;
 
     for (k = 0; k < lim; ++k) {
         if (~bitmap[k]) {
@@ -74,9 +74,9 @@ int slow_bitmap_full(const unsigned long *bitmap, int bits)
 }
 
 int slow_bitmap_equal(const unsigned long *bitmap1,
-                      const unsigned long *bitmap2, int bits)
+                      const unsigned long *bitmap2, long bits)
 {
-    int k, lim = bits/BITS_PER_LONG;
+    long k, lim = bits/BITS_PER_LONG;
 
     for (k = 0; k < lim; ++k) {
         if (bitmap1[k] != bitmap2[k]) {
@@ -94,9 +94,9 @@ int slow_bitmap_equal(const unsigned long *bitmap1,
 }
 
 void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
-                            int bits)
+                            long bits)
 {
-    int k, lim = bits/BITS_PER_LONG;
+    long k, lim = bits/BITS_PER_LONG;
 
     for (k = 0; k < lim; ++k) {
         dst[k] = ~src[k];
@@ -108,10 +108,10 @@ void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
 }
 
 int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-                    const unsigned long *bitmap2, int bits)
+                    const unsigned long *bitmap2, long bits)
 {
-    int k;
-    int nr = BITS_TO_LONGS(bits);
+    long k;
+    long nr = BITS_TO_LONGS(bits);
     unsigned long result = 0;
 
     for (k = 0; k < nr; k++) {
@@ -121,10 +121,10 @@ int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 }
 
 void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-                    const unsigned long *bitmap2, int bits)
+                    const unsigned long *bitmap2, long bits)
 {
-    int k;
-    int nr = BITS_TO_LONGS(bits);
+    long k;
+    long nr = BITS_TO_LONGS(bits);
 
     for (k = 0; k < nr; k++) {
         dst[k] = bitmap1[k] | bitmap2[k];
@@ -132,10 +132,10 @@ void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 }
 
 void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-                     const unsigned long *bitmap2, int bits)
+                     const unsigned long *bitmap2, long bits)
 {
-    int k;
-    int nr = BITS_TO_LONGS(bits);
+    long k;
+    long nr = BITS_TO_LONGS(bits);
 
     for (k = 0; k < nr; k++) {
         dst[k] = bitmap1[k] ^ bitmap2[k];
@@ -143,10 +143,10 @@ void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 }
 
 int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits)
+                       const unsigned long *bitmap2, long bits)
 {
-    int k;
-    int nr = BITS_TO_LONGS(bits);
+    long k;
+    long nr = BITS_TO_LONGS(bits);
     unsigned long result = 0;
 
     for (k = 0; k < nr; k++) {
@@ -157,10 +157,10 @@ int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
 
-void bitmap_set(unsigned long *map, int start, int nr)
+void bitmap_set(unsigned long *map, long start, long nr)
 {
     unsigned long *p = map + BIT_WORD(start);
-    const int size = start + nr;
+    const long size = start + nr;
     int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
     unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
 
@@ -177,10 +177,10 @@ void bitmap_set(unsigned long *map, int start, int nr)
     }
 }
 
-void bitmap_clear(unsigned long *map, int start, int nr)
+void bitmap_clear(unsigned long *map, long start, long nr)
 {
     unsigned long *p = map + BIT_WORD(start);
-    const int size = start + nr;
+    const long size = start + nr;
     int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
     unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
 
@@ -212,10 +212,10 @@ void bitmap_clear(unsigned long *map, int start, int nr)
  * power of 2. A @align_mask of 0 means no alignment is required.
  */
 unsigned long bitmap_find_next_zero_area(unsigned long *map,
-					 unsigned long size,
-					 unsigned long start,
-					 unsigned int nr,
-					 unsigned long align_mask)
+                                         unsigned long size,
+                                         unsigned long start,
+                                         unsigned long nr,
+                                         unsigned long align_mask)
 {
     unsigned long index, end, i;
 again:
@@ -237,9 +237,9 @@ again:
 }
 
 int slow_bitmap_intersects(const unsigned long *bitmap1,
-                           const unsigned long *bitmap2, int bits)
+                           const unsigned long *bitmap2, long bits)
 {
-    int k, lim = bits/BITS_PER_LONG;
+    long k, lim = bits/BITS_PER_LONG;
 
     for (k = 0; k < lim; ++k) {
         if (bitmap1[k] & bitmap2[k]) {
diff --git a/vl.c b/vl.c
index b14032a142..7f4fe0d5df 100644
--- a/vl.c
+++ b/vl.c
@@ -230,7 +230,7 @@ int ctrl_grab = 0;
 unsigned int nb_prom_envs = 0;
 const char *prom_envs[MAX_PROM_ENVS];
 int boot_menu;
-bool boot_strict;
+static bool boot_strict;
 uint8_t *boot_splash_filedata;
 size_t boot_splash_filedata_size;
 uint8_t qemu_extra_params_fw[2];
@@ -461,7 +461,7 @@ static QemuOptsList qemu_boot_opts = {
             .type = QEMU_OPT_STRING,
         }, {
             .name = "strict",
-            .type = QEMU_OPT_STRING,
+            .type = QEMU_OPT_BOOL,
         },
         { /*End of list */ }
     },
@@ -4086,6 +4086,7 @@ int main(int argc, char **argv, char **envp)
         }
 
         boot_menu = qemu_opt_get_bool(opts, "menu", boot_menu);
+        boot_strict = qemu_opt_get_bool(opts, "strict", false);
     }
 
     if (!kernel_cmdline) {
diff --git a/vmstate.c b/vmstate.c
new file mode 100644
index 0000000000..284b080f46
--- /dev/null
+++ b/vmstate.c
@@ -0,0 +1,650 @@
+#include "qemu-common.h"
+#include "migration/migration.h"
+#include "migration/qemu-file.h"
+#include "migration/vmstate.h"
+#include "qemu/bitops.h"
+
+static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
+                                    void *opaque);
+static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
+                                   void *opaque);
+
+int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
+                       void *opaque, int version_id)
+{
+    VMStateField *field = vmsd->fields;
+    int ret;
+
+    if (version_id > vmsd->version_id) {
+        return -EINVAL;
+    }
+    if (version_id < vmsd->minimum_version_id_old) {
+        return -EINVAL;
+    }
+    if  (version_id < vmsd->minimum_version_id) {
+        return vmsd->load_state_old(f, opaque, version_id);
+    }
+    if (vmsd->pre_load) {
+        int ret = vmsd->pre_load(opaque);
+        if (ret) {
+            return ret;
+        }
+    }
+    while (field->name) {
+        if ((field->field_exists &&
+             field->field_exists(opaque, version_id)) ||
+            (!field->field_exists &&
+             field->version_id <= version_id)) {
+            void *base_addr = opaque + field->offset;
+            int i, n_elems = 1;
+            int size = field->size;
+
+            if (field->flags & VMS_VBUFFER) {
+                size = *(int32_t *)(opaque+field->size_offset);
+                if (field->flags & VMS_MULTIPLY) {
+                    size *= field->size;
+                }
+            }
+            if (field->flags & VMS_ARRAY) {
+                n_elems = field->num;
+            } else if (field->flags & VMS_VARRAY_INT32) {
+                n_elems = *(int32_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT32) {
+                n_elems = *(uint32_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT16) {
+                n_elems = *(uint16_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT8) {
+                n_elems = *(uint8_t *)(opaque+field->num_offset);
+            }
+            if (field->flags & VMS_POINTER) {
+                base_addr = *(void **)base_addr + field->start;
+            }
+            for (i = 0; i < n_elems; i++) {
+                void *addr = base_addr + size * i;
+
+                if (field->flags & VMS_ARRAY_OF_POINTER) {
+                    addr = *(void **)addr;
+                }
+                if (field->flags & VMS_STRUCT) {
+                    ret = vmstate_load_state(f, field->vmsd, addr,
+                                             field->vmsd->version_id);
+                } else {
+                    ret = field->info->get(f, addr, size);
+
+                }
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+        }
+        field++;
+    }
+    ret = vmstate_subsection_load(f, vmsd, opaque);
+    if (ret != 0) {
+        return ret;
+    }
+    if (vmsd->post_load) {
+        return vmsd->post_load(opaque, version_id);
+    }
+    return 0;
+}
+
+void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
+                        void *opaque)
+{
+    VMStateField *field = vmsd->fields;
+
+    if (vmsd->pre_save) {
+        vmsd->pre_save(opaque);
+    }
+    while (field->name) {
+        if (!field->field_exists ||
+            field->field_exists(opaque, vmsd->version_id)) {
+            void *base_addr = opaque + field->offset;
+            int i, n_elems = 1;
+            int size = field->size;
+
+            if (field->flags & VMS_VBUFFER) {
+                size = *(int32_t *)(opaque+field->size_offset);
+                if (field->flags & VMS_MULTIPLY) {
+                    size *= field->size;
+                }
+            }
+            if (field->flags & VMS_ARRAY) {
+                n_elems = field->num;
+            } else if (field->flags & VMS_VARRAY_INT32) {
+                n_elems = *(int32_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT32) {
+                n_elems = *(uint32_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT16) {
+                n_elems = *(uint16_t *)(opaque+field->num_offset);
+            } else if (field->flags & VMS_VARRAY_UINT8) {
+                n_elems = *(uint8_t *)(opaque+field->num_offset);
+            }
+            if (field->flags & VMS_POINTER) {
+                base_addr = *(void **)base_addr + field->start;
+            }
+            for (i = 0; i < n_elems; i++) {
+                void *addr = base_addr + size * i;
+
+                if (field->flags & VMS_ARRAY_OF_POINTER) {
+                    addr = *(void **)addr;
+                }
+                if (field->flags & VMS_STRUCT) {
+                    vmstate_save_state(f, field->vmsd, addr);
+                } else {
+                    field->info->put(f, addr, size);
+                }
+            }
+        }
+        field++;
+    }
+    vmstate_subsection_save(f, vmsd, opaque);
+}
+
+static const VMStateDescription *
+    vmstate_get_subsection(const VMStateSubsection *sub, char *idstr)
+{
+    while (sub && sub->needed) {
+        if (strcmp(idstr, sub->vmsd->name) == 0) {
+            return sub->vmsd;
+        }
+        sub++;
+    }
+    return NULL;
+}
+
+static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
+                                   void *opaque)
+{
+    while (qemu_peek_byte(f, 0) == QEMU_VM_SUBSECTION) {
+        char idstr[256];
+        int ret;
+        uint8_t version_id, len, size;
+        const VMStateDescription *sub_vmsd;
+
+        len = qemu_peek_byte(f, 1);
+        if (len < strlen(vmsd->name) + 1) {
+            /* subsection name has be be "section_name/a" */
+            return 0;
+        }
+        size = qemu_peek_buffer(f, (uint8_t *)idstr, len, 2);
+        if (size != len) {
+            return 0;
+        }
+        idstr[size] = 0;
+
+        if (strncmp(vmsd->name, idstr, strlen(vmsd->name)) != 0) {
+            /* it don't have a valid subsection name */
+            return 0;
+        }
+        sub_vmsd = vmstate_get_subsection(vmsd->subsections, idstr);
+        if (sub_vmsd == NULL) {
+            return -ENOENT;
+        }
+        qemu_file_skip(f, 1); /* subsection */
+        qemu_file_skip(f, 1); /* len */
+        qemu_file_skip(f, len); /* idstr */
+        version_id = qemu_get_be32(f);
+
+        ret = vmstate_load_state(f, sub_vmsd, opaque, version_id);
+        if (ret) {
+            return ret;
+        }
+    }
+    return 0;
+}
+
+static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
+                                    void *opaque)
+{
+    const VMStateSubsection *sub = vmsd->subsections;
+
+    while (sub && sub->needed) {
+        if (sub->needed(opaque)) {
+            const VMStateDescription *vmsd = sub->vmsd;
+            uint8_t len;
+
+            qemu_put_byte(f, QEMU_VM_SUBSECTION);
+            len = strlen(vmsd->name);
+            qemu_put_byte(f, len);
+            qemu_put_buffer(f, (uint8_t *)vmsd->name, len);
+            qemu_put_be32(f, vmsd->version_id);
+            vmstate_save_state(f, vmsd, opaque);
+        }
+        sub++;
+    }
+}
+
+/* bool */
+
+static int get_bool(QEMUFile *f, void *pv, size_t size)
+{
+    bool *v = pv;
+    *v = qemu_get_byte(f);
+    return 0;
+}
+
+static void put_bool(QEMUFile *f, void *pv, size_t size)
+{
+    bool *v = pv;
+    qemu_put_byte(f, *v);
+}
+
+const VMStateInfo vmstate_info_bool = {
+    .name = "bool",
+    .get  = get_bool,
+    .put  = put_bool,
+};
+
+/* 8 bit int */
+
+static int get_int8(QEMUFile *f, void *pv, size_t size)
+{
+    int8_t *v = pv;
+    qemu_get_s8s(f, v);
+    return 0;
+}
+
+static void put_int8(QEMUFile *f, void *pv, size_t size)
+{
+    int8_t *v = pv;
+    qemu_put_s8s(f, v);
+}
+
+const VMStateInfo vmstate_info_int8 = {
+    .name = "int8",
+    .get  = get_int8,
+    .put  = put_int8,
+};
+
+/* 16 bit int */
+
+static int get_int16(QEMUFile *f, void *pv, size_t size)
+{
+    int16_t *v = pv;
+    qemu_get_sbe16s(f, v);
+    return 0;
+}
+
+static void put_int16(QEMUFile *f, void *pv, size_t size)
+{
+    int16_t *v = pv;
+    qemu_put_sbe16s(f, v);
+}
+
+const VMStateInfo vmstate_info_int16 = {
+    .name = "int16",
+    .get  = get_int16,
+    .put  = put_int16,
+};
+
+/* 32 bit int */
+
+static int get_int32(QEMUFile *f, void *pv, size_t size)
+{
+    int32_t *v = pv;
+    qemu_get_sbe32s(f, v);
+    return 0;
+}
+
+static void put_int32(QEMUFile *f, void *pv, size_t size)
+{
+    int32_t *v = pv;
+    qemu_put_sbe32s(f, v);
+}
+
+const VMStateInfo vmstate_info_int32 = {
+    .name = "int32",
+    .get  = get_int32,
+    .put  = put_int32,
+};
+
+/* 32 bit int. See that the received value is the same than the one
+   in the field */
+
+static int get_int32_equal(QEMUFile *f, void *pv, size_t size)
+{
+    int32_t *v = pv;
+    int32_t v2;
+    qemu_get_sbe32s(f, &v2);
+
+    if (*v == v2) {
+        return 0;
+    }
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_int32_equal = {
+    .name = "int32 equal",
+    .get  = get_int32_equal,
+    .put  = put_int32,
+};
+
+/* 32 bit int. See that the received value is the less or the same
+   than the one in the field */
+
+static int get_int32_le(QEMUFile *f, void *pv, size_t size)
+{
+    int32_t *old = pv;
+    int32_t new;
+    qemu_get_sbe32s(f, &new);
+
+    if (*old <= new) {
+        return 0;
+    }
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_int32_le = {
+    .name = "int32 equal",
+    .get  = get_int32_le,
+    .put  = put_int32,
+};
+
+/* 64 bit int */
+
+static int get_int64(QEMUFile *f, void *pv, size_t size)
+{
+    int64_t *v = pv;
+    qemu_get_sbe64s(f, v);
+    return 0;
+}
+
+static void put_int64(QEMUFile *f, void *pv, size_t size)
+{
+    int64_t *v = pv;
+    qemu_put_sbe64s(f, v);
+}
+
+const VMStateInfo vmstate_info_int64 = {
+    .name = "int64",
+    .get  = get_int64,
+    .put  = put_int64,
+};
+
+/* 8 bit unsigned int */
+
+static int get_uint8(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t *v = pv;
+    qemu_get_8s(f, v);
+    return 0;
+}
+
+static void put_uint8(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t *v = pv;
+    qemu_put_8s(f, v);
+}
+
+const VMStateInfo vmstate_info_uint8 = {
+    .name = "uint8",
+    .get  = get_uint8,
+    .put  = put_uint8,
+};
+
+/* 16 bit unsigned int */
+
+static int get_uint16(QEMUFile *f, void *pv, size_t size)
+{
+    uint16_t *v = pv;
+    qemu_get_be16s(f, v);
+    return 0;
+}
+
+static void put_uint16(QEMUFile *f, void *pv, size_t size)
+{
+    uint16_t *v = pv;
+    qemu_put_be16s(f, v);
+}
+
+const VMStateInfo vmstate_info_uint16 = {
+    .name = "uint16",
+    .get  = get_uint16,
+    .put  = put_uint16,
+};
+
+/* 32 bit unsigned int */
+
+static int get_uint32(QEMUFile *f, void *pv, size_t size)
+{
+    uint32_t *v = pv;
+    qemu_get_be32s(f, v);
+    return 0;
+}
+
+static void put_uint32(QEMUFile *f, void *pv, size_t size)
+{
+    uint32_t *v = pv;
+    qemu_put_be32s(f, v);
+}
+
+const VMStateInfo vmstate_info_uint32 = {
+    .name = "uint32",
+    .get  = get_uint32,
+    .put  = put_uint32,
+};
+
+/* 32 bit uint. See that the received value is the same than the one
+   in the field */
+
+static int get_uint32_equal(QEMUFile *f, void *pv, size_t size)
+{
+    uint32_t *v = pv;
+    uint32_t v2;
+    qemu_get_be32s(f, &v2);
+
+    if (*v == v2) {
+        return 0;
+    }
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_uint32_equal = {
+    .name = "uint32 equal",
+    .get  = get_uint32_equal,
+    .put  = put_uint32,
+};
+
+/* 64 bit unsigned int */
+
+static int get_uint64(QEMUFile *f, void *pv, size_t size)
+{
+    uint64_t *v = pv;
+    qemu_get_be64s(f, v);
+    return 0;
+}
+
+static void put_uint64(QEMUFile *f, void *pv, size_t size)
+{
+    uint64_t *v = pv;
+    qemu_put_be64s(f, v);
+}
+
+const VMStateInfo vmstate_info_uint64 = {
+    .name = "uint64",
+    .get  = get_uint64,
+    .put  = put_uint64,
+};
+
+/* 64 bit unsigned int. See that the received value is the same than the one
+   in the field */
+
+static int get_uint64_equal(QEMUFile *f, void *pv, size_t size)
+{
+    uint64_t *v = pv;
+    uint64_t v2;
+    qemu_get_be64s(f, &v2);
+
+    if (*v == v2) {
+        return 0;
+    }
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_uint64_equal = {
+    .name = "int64 equal",
+    .get  = get_uint64_equal,
+    .put  = put_uint64,
+};
+
+/* 8 bit int. See that the received value is the same than the one
+   in the field */
+
+static int get_uint8_equal(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t *v = pv;
+    uint8_t v2;
+    qemu_get_8s(f, &v2);
+
+    if (*v == v2) {
+        return 0;
+    }
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_uint8_equal = {
+    .name = "uint8 equal",
+    .get  = get_uint8_equal,
+    .put  = put_uint8,
+};
+
+/* 16 bit unsigned int int. See that the received value is the same than the one
+   in the field */
+
+static int get_uint16_equal(QEMUFile *f, void *pv, size_t size)
+{
+    uint16_t *v = pv;
+    uint16_t v2;
+    qemu_get_be16s(f, &v2);
+
+    if (*v == v2) {
+        return 0;
+    }
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_uint16_equal = {
+    .name = "uint16 equal",
+    .get  = get_uint16_equal,
+    .put  = put_uint16,
+};
+
+/* floating point */
+
+static int get_float64(QEMUFile *f, void *pv, size_t size)
+{
+    float64 *v = pv;
+
+    *v = make_float64(qemu_get_be64(f));
+    return 0;
+}
+
+static void put_float64(QEMUFile *f, void *pv, size_t size)
+{
+    uint64_t *v = pv;
+
+    qemu_put_be64(f, float64_val(*v));
+}
+
+const VMStateInfo vmstate_info_float64 = {
+    .name = "float64",
+    .get  = get_float64,
+    .put  = put_float64,
+};
+
+/* uint8_t buffers */
+
+static int get_buffer(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t *v = pv;
+    qemu_get_buffer(f, v, size);
+    return 0;
+}
+
+static void put_buffer(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t *v = pv;
+    qemu_put_buffer(f, v, size);
+}
+
+const VMStateInfo vmstate_info_buffer = {
+    .name = "buffer",
+    .get  = get_buffer,
+    .put  = put_buffer,
+};
+
+/* unused buffers: space that was used for some fields that are
+   not useful anymore */
+
+static int get_unused_buffer(QEMUFile *f, void *pv, size_t size)
+{
+    uint8_t buf[1024];
+    int block_len;
+
+    while (size > 0) {
+        block_len = MIN(sizeof(buf), size);
+        size -= block_len;
+        qemu_get_buffer(f, buf, block_len);
+    }
+   return 0;
+}
+
+static void put_unused_buffer(QEMUFile *f, void *pv, size_t size)
+{
+    static const uint8_t buf[1024];
+    int block_len;
+
+    while (size > 0) {
+        block_len = MIN(sizeof(buf), size);
+        size -= block_len;
+        qemu_put_buffer(f, buf, block_len);
+    }
+}
+
+const VMStateInfo vmstate_info_unused_buffer = {
+    .name = "unused_buffer",
+    .get  = get_unused_buffer,
+    .put  = put_unused_buffer,
+};
+
+/* bitmaps (as defined by bitmap.h). Note that size here is the size
+ * of the bitmap in bits. The on-the-wire format of a bitmap is 64
+ * bit words with the bits in big endian order. The in-memory format
+ * is an array of 'unsigned long', which may be either 32 or 64 bits.
+ */
+/* This is the number of 64 bit words sent over the wire */
+#define BITS_TO_U64S(nr) DIV_ROUND_UP(nr, 64)
+static int get_bitmap(QEMUFile *f, void *pv, size_t size)
+{
+    unsigned long *bmp = pv;
+    int i, idx = 0;
+    for (i = 0; i < BITS_TO_U64S(size); i++) {
+        uint64_t w = qemu_get_be64(f);
+        bmp[idx++] = w;
+        if (sizeof(unsigned long) == 4 && idx < BITS_TO_LONGS(size)) {
+            bmp[idx++] = w >> 32;
+        }
+    }
+    return 0;
+}
+
+static void put_bitmap(QEMUFile *f, void *pv, size_t size)
+{
+    unsigned long *bmp = pv;
+    int i, idx = 0;
+    for (i = 0; i < BITS_TO_U64S(size); i++) {
+        uint64_t w = bmp[idx++];
+        if (sizeof(unsigned long) == 4 && idx < BITS_TO_LONGS(size)) {
+            w |= ((uint64_t)bmp[idx++]) << 32;
+        }
+        qemu_put_be64(f, w);
+    }
+}
+
+const VMStateInfo vmstate_info_bitmap = {
+    .name = "bitmap",
+    .get = get_bitmap,
+    .put = put_bitmap,
+};